# Objective: To predict successful ICOs (who are the winners in the cryptocurrency ICOs?)
!pip install statsmodels
import statsmodels.api as sm
import numpy as np # linear algebra
import pandas as pd # data processing
import seaborn as sns
import matplotlib.pyplot as plt
import numpy.ma as ma # deal with NaN
import scipy.stats
from scipy import stats
import datetime as dt
import plotly.express as px
Preparatory work and DataFrame Cleaning
# Import dataset
df = pd.read_csv("tord_v3.csv", delimiter=';', skiprows=0, low_memory=False) #This line solves the ParseError prompt
#filter out the relevant columns for analysis later
coin_ico = df[['name','country','is_ico','is_ieo','is_sto','ico_start','ico_end','raised_usd','whitelist','kyc','bonus','rating','teamsize','platform','distributed_in_ico']]
df.head()
df.describe()
#converting the dates to datetime to minimise inconsistency
coin_ico['ico_start'] = pd.to_datetime(coin_ico['ico_start'])
coin_ico['ico_end'] = pd.to_datetime(coin_ico['ico_end'])
coin_ico['ico_duration'] = coin_ico['ico_end'] - coin_ico['ico_start']
#NEW, only keep coins which have successful ICOs
coin_ico = coin_ico[coin_ico['is_ico']== 1]
#changing the number of days from datetime format to integer
coin_ico["ico_duration"] = coin_ico["ico_duration"].dt.days
#getting rid of the NaT values in the ICO date variable
coin_ico['ico_start'] = coin_ico['ico_start'].apply(lambda x: x.strftime('%Y-%m-%d')if not pd.isnull(x) else '')
coin_ico['ico_end'] = coin_ico['ico_end'].apply(lambda x: x.strftime('%Y-%m-%d')if not pd.isnull(x) else '')
#NEW, REPLACED NaN VALUES WITH MEDIAN NUMBERS
coin_ico['ico_duration'].fillna(31,inplace=True)
coin_ico['raised_usd'].fillna(0,inplace=True)
coin_ico['distributed_in_ico'].fillna(0.5,inplace=True)
#Winsorize variables
def clip_series(s, lower, upper):
clipped = s.clip(lower=s.quantile(lower), upper=s.quantile(upper))
return clipped
# Manage list of features to be winsorized
winsorize_list = ['ico_duration','raised_usd','distributed_in_ico']
for var in winsorize_list:
coin_ico[var+'_winsorised'] = clip_series(coin_ico[var], 0.005, 0.995)
#find out the median or mean value of each columns, which informs the pervious step's fillna formula
print(coin_ico['ico_duration'].median())
print(coin_ico['raised_usd'].median())
print(coin_ico['distributed_in_ico'].median())
#Creating a dummy variable indicating how successful the Initial Coin Offering is successful
a=[]
coin_ico['is_successful'] = coin_ico['raised_usd_winsorised']
for row in coin_ico['is_successful']:
if row >= 500000: #this line sets the threshold of 500k USD, the prescribed yardstick of a successful ICO.
a.append(1)
else:
a.append(0)
coin_ico['is_successful'] = a
print(coin_ico['is_successful'])
#the 'is_successful' column will now indicate whether the respective tokens had successful ICOs.
#dropping the pre-winsorised columns to rule out collinearity in subsequent data analysis
coin_ico = coin_ico.drop(columns=['ico_duration','raised_usd','distributed_in_ico'])
coin_ico.head()
print(sum(coin_ico['raised_usd_winsorised']>=500000)/len(coin_ico)) # calculate the successful fund raising rate if the soft cap is set to $500000 as stated in the reference paper.
#Creating a dummy variable represented by 2 columns with regards to whether the respective stock is on whitelist
coin_ico = pd.get_dummies(coin_ico, columns=['whitelist'])
coin_ico.head()
#General visualisation of correlations amongst the variables created & cleaned
f, ax = plt.subplots(figsize=(10, 8))
corr = coin_ico.corr()
# Generate a custom diverging colormap
cmap = sns.diverging_palette(230, 20, as_cmap=True)
# Plot heatmap
sns.heatmap(corr, cmap=cmap, square=True, ax=ax)
#g = sns.pairplot(coin_ico[['is_successful','ico_duration', 'is_ico', 'raised_usd', 'kyc','rating','bonus','teamsize','whitelist_No','whitelist_Yes','country']], hue='is_successful',palette='gnuplot2')
#g.fig.suptitle("Pairplot, colour-coded with country of origin",y=1.1)
def successful_visual(dependent, independent, dataframe,colour):
fig = px.scatter(
dataframe[dataframe[independent] != None],
x=independent,
y=dependent,
color=colour,
title="Successfulness:" + dependent,
marginal_y="violin",
marginal_x="box",
)
fig.show()
successful_visual('raised_usd_winsorised', 'rating', coin_ico,'rating')
successful_visual('raised_usd_winsorised', 'ico_duration_winsorised', coin_ico,'distributed_in_ico_winsorised')
successful_visual('country', 'raised_usd_winsorised', coin_ico,'raised_usd_winsorised')
Experimenting on Regressions
#regression trials
X = coin_ico[['teamsize','rating','ico_duration_winsorised','distributed_in_ico_winsorised']]
y = coin_ico['raised_usd_winsorised']
# Note the difference in argument order
X = sm.add_constant(X)
model = sm.OLS(y.astype(float), X.astype(float), missing='drop').fit()
predictions = model.predict(X.astype(float)) # make the predictions by the model
# Print out the statistics
print(model.summary())
#regression trials
X = coin_ico[['teamsize','rating','ico_duration_winsorised','distributed_in_ico_winsorised']]
y = coin_ico['raised_usd_winsorised']
# Note the difference in argument order
X = sm.add_constant(X)
model = sm.OLS(y.astype(float), X.astype(float), missing='drop').fit()
predictions = model.predict(X.astype(float)) # make the predictions by the model
# Print out the statistics
print(model.summary())
#regression trials logit model
coin_ico['log_rating'] = np.log(coin_ico['rating'])
X = coin_ico[['teamsize','log_rating','kyc','whitelist_No','ico_duration_winsorised']]
y = coin_ico['is_successful']
# Note the difference in argument order
X = sm.add_constant(X)
model = sm.Logit(y, X.astype(float), missing='drop').fit()
# Print out the statistics
print(model.summary2())
average_marginal_effect = model.get_margeff(at = "mean", method = "dydx")
print(average_marginal_effect.summary())
sns.regplot(x = 'teamsize', y = "is_successful", data = coin_ico,logistic=True)
plt.ylabel("Probability of Successful ICO")
plt.xlabel("Team Size")
sns.regplot(x = 'log_rating', y = "is_successful", data = coin_ico,logistic=True)
plt.ylabel("Probability of Successful ICO")
plt.xlabel("Logarithmic form of Rating")
sns.regplot(x = 'kyc', y = "is_successful", data = coin_ico,logistic=True)
plt.ylabel("Probability of Successful ICO")
plt.xlabel("Know Your Customer Dummy Variable")
sns.regplot(x = 'whitelist_No', y = "is_successful", data = coin_ico,logistic=True)
plt.ylabel("Probability of Successful ICO")
plt.xlabel("Not being on White List Dummy Variable ")
sns.regplot(x = 'ico_duration_winsorised', y = "is_successful", data = coin_ico,logistic=True)
plt.ylabel("Probability of Successful ICO")
plt.xlabel("ICO Duration")