import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns

df = pd.read_csv('/work/tord_v3_edited.csv') df.info()

#df.replace(('yes', 'no'), (1, 0), inplace=True) df['whitelist'].replace(('Yes', 'No'), (1, 0), inplace=True) df.head(20)

df.describe()

Q1. What defines the success of an ICO (i.e., what is the Y)?

The ICO is defined as success when the raised used exeeds 500,000, where 'success' dummy takes the value of 1.

# sanitize "raised_usd" df['raised_usd'] = df['raised_usd'].fillna(0) # create outcome variable success df['success'] = np.where(df['raised_usd'] >= 500000, 1, 0)

df['success'].describe()

sns.pairplot(df, vars = ['raised_usd','teamsize','success'])

# Potential multicollinearity issue corr = df.corr() sns.heatmap(corr) plt.show()

!pip install statsmodels

Q2. What are the factors that determine the success of an ICO (i.e., what are the Xs)?

import statsmodels.api as sm from statsmodels.formula.api import ols, logit # dependent/target/outcome variable y = df['success'] #creating interaction variables df['teamsizeXERC20']=df['teamsize']*df['ERC20'] # A larger team size can further benefit from higher potential for further related APP development captured by ERC20 df['whitelistXbonus']=df['whitelist']*df['bonus'] df['whitelistXbounty']=df['whitelist']*df['bounty'] df['eos_teamsize']=df['teamsize']*df['teamsize'] #being on whitelist may attract more attention and amplify the effects of bonus and bounty # independent/predictor/explanatory variable X = df[['is_ieo','is_sto','teamsize', 'rating','kyc','bonus','token_for_sale','bounty','ERC20','whitelist','teamsizeXERC20','whitelistXbonus','whitelistXbounty','eos_teamsize']] # A. Logit regression # turn independent variables into floating type (best practice) # 'missing='drop'' drops rows with missing values from the regression logit_model=sm.Logit(y,X.astype(float), missing='drop' ) # fit logit model into the data result=logit_model.fit() # summarize the logit model print(result.summary2())

sns.regplot(x = "rating", y = "success", data = df, logistic = True, y_jitter = .05) plt.ylabel("success probability")

''' options for "at" 1. 'overall' The average of the marginal effects at each observation 2. 'mean' The marginal effects at the mean of each regressor 3. 'median' The marginal effects at the median of each regressor 4. 'zero' The marginal effects at zero for each regressor 5. 'all' The marginal effects at each observation. options for "method" 1. 'dydx' No transformation is made and amrginal effects are returned 2. 'eyex' estimate elasticities of variables in exog 3. 'dyex' estimate semi-elasticity 4. 'eydx' estimate semi-elasticity ''' average_marginal_effect = result.get_margeff(at = "mean", method = "dydx") print(average_marginal_effect.summary())

# B. Linear Probability Model # logit regression X = sm.add_constant(X) ols_model=sm.OLS(y,X.astype(float), missing='drop') result=ols_model.fit() print(result.summary2())

# Use wrapper lazypredict !pip install lazypredict

from lazypredict.Supervised import LazyClassifier, LazyRegressor from sklearn.model_selection import train_test_split

# load data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=42) # fit all models clf = LazyClassifier(predictions=True) models, predictions = clf.fit(X_train, X_test, y_train, y_test)

models

predictions