import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.read_csv('/work/tord_v3_edited.csv')
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6415 entries, 0 to 6414
Data columns (total 34 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 id 6415 non-null int64
1 name 6415 non-null object
2 token 6282 non-null object
3 country 6415 non-null object
4 is_ico 6415 non-null int64
5 is_ieo 6415 non-null int64
6 is_sto 6415 non-null int64
7 ico_start 5636 non-null object
8 ico_end 5497 non-null object
9 price_usd 5928 non-null object
10 raised_usd 2139 non-null float64
11 distributed_in_ico 4870 non-null float64
12 sold_tokens 192 non-null float64
13 token_for_sale 5122 non-null float64
14 whitelist 3882 non-null object
15 kyc 6415 non-null int64
16 bonus 6415 non-null int64
17 restricted_areas 2343 non-null object
18 min_investment 2079 non-null object
19 bounty 6415 non-null int64
20 mvp 1296 non-null object
21 pre_ico_start 2717 non-null object
22 pre_ico_end 2705 non-null object
23 pre_ico_price_usd 1733 non-null object
24 platform 6415 non-null int64
25 accepting 5545 non-null object
26 link_white_paper 5828 non-null object
27 linkedin_link 4355 non-null object
28 github_link 5649 non-null object
29 website 5649 non-null object
30 rating 5709 non-null float64
31 teamsize 4622 non-null float64
32 Coinmarketcap_identifier 1281 non-null float64
33 ERC20 5679 non-null float64
dtypes: float64(8), int64(8), object(18)
memory usage: 1.7+ MB
#df.replace(('yes', 'no'), (1, 0), inplace=True)
df['whitelist'].replace(('Yes', 'No'), (1, 0), inplace=True)
df.head(20)
df.describe()
Q1. What defines the success of an ICO (i.e., what is the Y)?
The ICO is defined as success when the raised used exeeds 500,000, where 'success' dummy takes the value of 1.
# sanitize "raised_usd"
df['raised_usd'] = df['raised_usd'].fillna(0)
# create outcome variable success
df['success'] = np.where(df['raised_usd'] >= 500000, 1, 0)
df['success'].describe()
sns.pairplot(df, vars = ['raised_usd','teamsize','success'])
# Potential multicollinearity issue
corr = df.corr()
sns.heatmap(corr)
plt.show()
!pip install statsmodels
Requirement already satisfied: statsmodels in /root/venv/lib/python3.7/site-packages (0.13.0)
Requirement already satisfied: numpy>=1.17 in /root/venv/lib/python3.7/site-packages (from statsmodels) (1.19.1)
Requirement already satisfied: scipy>=1.3 in /root/venv/lib/python3.7/site-packages (from statsmodels) (1.5.4)
Requirement already satisfied: patsy>=0.5.2 in /root/venv/lib/python3.7/site-packages (from statsmodels) (0.5.2)
Requirement already satisfied: pandas>=0.25 in /root/venv/lib/python3.7/site-packages (from statsmodels) (1.0.5)
Requirement already satisfied: pytz>=2017.2 in /shared-libs/python3.7/py/lib/python3.7/site-packages (from pandas>=0.25->statsmodels) (2021.3)
Requirement already satisfied: python-dateutil>=2.6.1 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from pandas>=0.25->statsmodels) (2.8.2)
Requirement already satisfied: six in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from patsy>=0.5.2->statsmodels) (1.16.0)
WARNING: You are using pip version 21.2.4; however, version 21.3 is available.
You should consider upgrading via the '/root/venv/bin/python -m pip install --upgrade pip' command.
Q2. What are the factors that determine the success of an ICO (i.e., what are the Xs)?
import statsmodels.api as sm
from statsmodels.formula.api import ols, logit
# dependent/target/outcome variable
y = df['success']
#creating interaction variables
df['teamsizeXERC20']=df['teamsize']*df['ERC20']
# A larger team size can further benefit from higher potential for further related APP development captured by ERC20
df['whitelistXbonus']=df['whitelist']*df['bonus']
df['whitelistXbounty']=df['whitelist']*df['bounty']
df['eos_teamsize']=df['teamsize']*df['teamsize']
#being on whitelist may attract more attention and amplify the effects of bonus and bounty
# independent/predictor/explanatory variable
X = df[['is_ieo','is_sto','teamsize', 'rating','kyc','bonus','token_for_sale','bounty','ERC20','whitelist','teamsizeXERC20','whitelistXbonus','whitelistXbounty','eos_teamsize']]
# A. Logit regression
# turn independent variables into floating type (best practice)
# 'missing='drop'' drops rows with missing values from the regression
logit_model=sm.Logit(y,X.astype(float), missing='drop' )
# fit logit model into the data
result=logit_model.fit()
# summarize the logit model
print(result.summary2())
Optimization terminated successfully.
Current function value: 0.564458
Iterations 8
Results: Logit
=================================================================
Model: Logit Pseudo R-squared: 0.070
Dependent Variable: success AIC: 3053.4930
Date: 2021-10-22 12:07 BIC: 3136.0030
No. Observations: 2680 Log-Likelihood: -1512.7
Df Model: 13 LL-Null: -1626.8
Df Residuals: 2666 LLR p-value: 2.1621e-41
Converged: 1.0000 Scale: 1.0000
No. Iterations: 8.0000
-----------------------------------------------------------------
Coef. Std.Err. z P>|z| [0.025 0.975]
-----------------------------------------------------------------
is_ieo 0.2004 0.1764 1.1359 0.2560 -0.1454 0.5462
is_sto -1.5405 1.0773 -1.4299 0.1527 -3.6521 0.5711
teamsize -0.0239 0.0209 -1.1456 0.2520 -0.0649 0.0170
rating -0.0030 0.0576 -0.0519 0.9586 -0.1158 0.1098
kyc 0.0470 0.1052 0.4465 0.6552 -0.1592 0.2531
bonus -2.2156 0.4300 -5.1524 0.0000 -3.0584 -1.3728
token_for_sale -0.0000 0.0000 -1.2500 0.2113 -0.0000 0.0000
bounty -0.1834 0.1221 -1.5025 0.1330 -0.4226 0.0558
ERC20 -1.6384 0.1880 -8.7130 0.0000 -2.0070 -1.2699
whitelist -0.0896 0.1225 -0.7320 0.4642 -0.3296 0.1504
teamsizeXERC20 0.1133 0.0168 6.7588 0.0000 0.0805 0.1462
whitelistXbonus -0.8344 0.6301 -1.3243 0.1854 -2.0694 0.4005
whitelistXbounty 0.2214 0.1774 1.2475 0.2122 -0.1264 0.5692
eos_teamsize -0.0006 0.0005 -1.1691 0.2423 -0.0015 0.0004
=================================================================
sns.regplot(x = "rating", y = "success", data = df,
logistic = True, y_jitter = .05)
plt.ylabel("success probability")
'''
options for "at"
1. 'overall' The average of the marginal effects at each observation
2. 'mean' The marginal effects at the mean of each regressor
3. 'median' The marginal effects at the median of each regressor
4. 'zero' The marginal effects at zero for each regressor
5. 'all' The marginal effects at each observation.
options for "method"
1. 'dydx' No transformation is made and amrginal effects are returned
2. 'eyex' estimate elasticities of variables in exog
3. 'dyex' estimate semi-elasticity
4. 'eydx' estimate semi-elasticity
'''
average_marginal_effect = result.get_margeff(at = "mean", method = "dydx")
print(average_marginal_effect.summary())
Logit Marginal Effects
=====================================
Dep. Variable: success
Method: dydx
At: mean
====================================================================================
dy/dx std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------------
is_ieo 0.0398 0.035 1.136 0.256 -0.029 0.108
is_sto -0.3059 0.214 -1.431 0.152 -0.725 0.113
teamsize -0.0048 0.004 -1.146 0.252 -0.013 0.003
rating -0.0006 0.011 -0.052 0.959 -0.023 0.022
kyc 0.0093 0.021 0.447 0.655 -0.032 0.050
bonus -0.4399 0.083 -5.278 0.000 -0.603 -0.277
token_for_sale -1.423e-14 1.14e-14 -1.252 0.211 -3.65e-14 8.05e-15
bounty -0.0364 0.024 -1.503 0.133 -0.084 0.011
ERC20 -0.3253 0.037 -8.867 0.000 -0.397 -0.253
whitelist -0.0178 0.024 -0.732 0.464 -0.065 0.030
teamsizeXERC20 0.0225 0.003 6.827 0.000 0.016 0.029
whitelistXbonus -0.1657 0.124 -1.331 0.183 -0.410 0.078
whitelistXbounty 0.0440 0.035 1.248 0.212 -0.025 0.113
eos_teamsize -0.0001 9.63e-05 -1.171 0.242 -0.000 7.6e-05
====================================================================================
# B. Linear Probability Model
# logit regression
X = sm.add_constant(X)
ols_model=sm.OLS(y,X.astype(float), missing='drop')
result=ols_model.fit()
print(result.summary2())
Results: Ordinary least squares
==================================================================
Model: OLS Adj. R-squared: 0.116
Dependent Variable: success AIC: 3084.3281
Date: 2021-10-22 12:07 BIC: 3172.7317
No. Observations: 2680 Log-Likelihood: -1527.2
Df Model: 14 F-statistic: 26.19
Df Residuals: 2665 Prob (F-statistic): 6.56e-65
R-squared: 0.121 Scale: 0.18404
------------------------------------------------------------------
Coef. Std.Err. t P>|t| [0.025 0.975]
------------------------------------------------------------------
const -0.2866 0.0621 -4.6176 0.0000 -0.4083 -0.1649
is_ieo 0.0173 0.0356 0.4864 0.6268 -0.0525 0.0871
is_sto -0.1987 0.1364 -1.4570 0.1452 -0.4661 0.0687
teamsize 0.0162 0.0042 3.8037 0.0001 0.0078 0.0245
rating 0.1249 0.0150 8.3280 0.0000 0.0955 0.1543
kyc 0.0153 0.0197 0.7752 0.4383 -0.0234 0.0540
bonus -0.2746 0.0429 -6.4073 0.0000 -0.3586 -0.1905
token_for_sale -0.0000 0.0000 -1.3144 0.1888 -0.0000 0.0000
bounty -0.0258 0.0231 -1.1175 0.2639 -0.0710 0.0194
ERC20 0.0610 0.0470 1.2973 0.1946 -0.0312 0.1531
whitelist 0.0126 0.0237 0.5324 0.5945 -0.0339 0.0592
teamsizeXERC20 -0.0028 0.0037 -0.7521 0.4520 -0.0100 0.0045
whitelistXbonus -0.0639 0.0557 -1.1472 0.2514 -0.1731 0.0453
whitelistXbounty 0.0032 0.0336 0.0958 0.9237 -0.0627 0.0691
eos_teamsize -0.0001 0.0001 -1.3708 0.1705 -0.0003 0.0000
------------------------------------------------------------------
Omnibus: 872.133 Durbin-Watson: 1.786
Prob(Omnibus): 0.000 Jarque-Bera (JB): 335.705
Skew: 0.699 Prob(JB): 0.000
Kurtosis: 1.973 Condition No.: 27434281602011
==================================================================
* The condition number is large (3e+13). This might indicate
strong multicollinearity or other numerical problems.
# Use wrapper lazypredict
!pip install lazypredict
Requirement already satisfied: lazypredict in /root/venv/lib/python3.7/site-packages (0.2.9)
Collecting six==1.15.0
Using cached six-1.15.0-py2.py3-none-any.whl (10 kB)
Requirement already satisfied: PyYAML==5.3.1 in /root/venv/lib/python3.7/site-packages (from lazypredict) (5.3.1)
Requirement already satisfied: numpy==1.19.1 in /root/venv/lib/python3.7/site-packages (from lazypredict) (1.19.1)
Requirement already satisfied: pandas==1.0.5 in /root/venv/lib/python3.7/site-packages (from lazypredict) (1.0.5)
Requirement already satisfied: xgboost==1.1.1 in /root/venv/lib/python3.7/site-packages (from lazypredict) (1.1.1)
Requirement already satisfied: joblib==1.0.0 in /root/venv/lib/python3.7/site-packages (from lazypredict) (1.0.0)
Requirement already satisfied: pytest==5.4.3 in /root/venv/lib/python3.7/site-packages (from lazypredict) (5.4.3)
Requirement already satisfied: tqdm==4.56.0 in /root/venv/lib/python3.7/site-packages (from lazypredict) (4.56.0)
Requirement already satisfied: scipy==1.5.4 in /root/venv/lib/python3.7/site-packages (from lazypredict) (1.5.4)
Requirement already satisfied: lightgbm==2.3.1 in /root/venv/lib/python3.7/site-packages (from lazypredict) (2.3.1)
Requirement already satisfied: scikit-learn==0.23.1 in /root/venv/lib/python3.7/site-packages (from lazypredict) (0.23.1)
Requirement already satisfied: click==7.1.2 in /root/venv/lib/python3.7/site-packages (from lazypredict) (7.1.2)
Requirement already satisfied: pytz>=2017.2 in /shared-libs/python3.7/py/lib/python3.7/site-packages (from pandas==1.0.5->lazypredict) (2021.3)
Requirement already satisfied: python-dateutil>=2.6.1 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from pandas==1.0.5->lazypredict) (2.8.2)
Requirement already satisfied: packaging in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from pytest==5.4.3->lazypredict) (21.0)
Requirement already satisfied: py>=1.5.0 in /shared-libs/python3.7/py/lib/python3.7/site-packages (from pytest==5.4.3->lazypredict) (1.10.0)
Collecting pluggy<1.0,>=0.12
Using cached pluggy-0.13.1-py2.py3-none-any.whl (18 kB)
Requirement already satisfied: attrs>=17.4.0 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from pytest==5.4.3->lazypredict) (21.2.0)
Requirement already satisfied: wcwidth in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from pytest==5.4.3->lazypredict) (0.2.5)
Requirement already satisfied: more-itertools>=4.0.0 in /root/venv/lib/python3.7/site-packages (from pytest==5.4.3->lazypredict) (8.10.0)
Requirement already satisfied: importlib-metadata>=0.12 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from pytest==5.4.3->lazypredict) (4.8.1)
Requirement already satisfied: threadpoolctl>=2.0.0 in /shared-libs/python3.7/py/lib/python3.7/site-packages (from scikit-learn==0.23.1->lazypredict) (3.0.0)
Requirement already satisfied: typing-extensions>=3.6.4 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from importlib-metadata>=0.12->pytest==5.4.3->lazypredict) (3.10.0.2)
Requirement already satisfied: zipp>=0.5 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from importlib-metadata>=0.12->pytest==5.4.3->lazypredict) (3.6.0)
Requirement already satisfied: pyparsing>=2.0.2 in /shared-libs/python3.7/py-core/lib/python3.7/site-packages (from packaging->pytest==5.4.3->lazypredict) (2.4.7)
Installing collected packages: six, pluggy
Attempting uninstall: six
Found existing installation: six 1.16.0
Not uninstalling six at /shared-libs/python3.7/py-core/lib/python3.7/site-packages, outside environment /root/venv
Can't uninstall 'six'. No files were found to uninstall.
Attempting uninstall: pluggy
Found existing installation: pluggy 1.0.0
Not uninstalling pluggy at /shared-libs/python3.7/py-core/lib/python3.7/site-packages, outside environment /root/venv
Can't uninstall 'pluggy'. No files were found to uninstall.
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow 2.4.1 requires numpy~=1.19.2, but you have numpy 1.19.1 which is incompatible.
tensorflow 2.4.1 requires typing-extensions~=3.7.4, but you have typing-extensions 3.10.0.2 which is incompatible.
Successfully installed pluggy-0.13.1 six-1.15.0
WARNING: You are using pip version 21.2.4; however, version 21.3 is available.
You should consider upgrading via the '/root/venv/bin/python -m pip install --upgrade pip' command.
from lazypredict.Supervised import LazyClassifier, LazyRegressor
from sklearn.model_selection import train_test_split
/root/venv/lib/python3.7/site-packages/sklearn/utils/deprecation.py:143: FutureWarning: The sklearn.utils.testing module is deprecated in version 0.22 and will be removed in version 0.24. The corresponding classes / functions should instead be imported from sklearn.utils. Anything that cannot be imported from sklearn.utils is now part of the private API.
warnings.warn(message, FutureWarning)
# load data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=42)
# fit all models
clf = LazyClassifier(predictions=True)
models, predictions = clf.fit(X_train, X_test, y_train, y_test)
100%|██████████| 29/29 [00:11<00:00, 2.44it/s]
models
predictions