# Start writing code here...
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.read_csv('/work/tord_v3_edited.csv')
df.info()
df.head()
df['raised_usd'] = df['raised_usd'].fillna(0)
df['success'] = np.where(df['raised_usd'] >= 500000, 1, 0)
# Potential multicollinearity issue
corr = df.corr()
plt.figure(figsize=(21,16))
sns.heatmap(corr)
plt.show()
df.columns
corr.columns
!pip install statsmodels
import statsmodels.api as sm
from statsmodels.formula.api import ols, logit
# dependent/target/outcome variable
y = df['success']
# independent/predictor/explanatory variable
X = df[['is_ico', 'is_sto', 'raised_usd',
'token_for_sale', 'bonus', 'bounty',
'rating', 'teamsize', 'ERC20']] # 'distributed_in_ico', 'platform','kyc',
# A. Logit regression
# turn independent variables into floating type (best practice)
# 'missing='drop'' drops rows with missing values from the regression
logit_model=sm.Logit(y,X.astype(float), missing='drop' )
# fit logit model into the data
result=logit_model.fit()
# summarize the logit model
print(result.summary2())
import statsmodels.api as sm
from statsmodels.formula.api import ols, logit
# dependent/target/outcome variable
y = df['success']
# independent/predictor/explanatory variable
X = df[['is_ico', 'is_sto', 'kyc', 'bonus', 'bounty',
'rating', 'teamsize', 'ERC20','platform', 'token_for_sale', 'distributed_in_ico' ]] # ,'raised_usd',
# A. Logit regression
# turn independent variables into floating type (best practice)
# 'missing='drop'' drops rows with missing values from the regression
logit_model=sm.Logit(y,X.astype(float), missing='drop' )
# fit logit model into the data
result=logit_model.fit()
# summarize the logit model
print(result.summary2())
average_marginal_effect = result.get_margeff(at = "mean", method = "dydx")
print(average_marginal_effect.summary())
sns.regplot(x = "rating", y = "success", data = df,
logistic = True, y_jitter = .05)
plt.ylabel("success probability")
# B. Linear Probability Model
# logit regression
X = sm.add_constant(X)
ols_model=sm.OLS(y,X.astype(float), missing='drop')
result=ols_model.fit()
print(result.summary2())
# C. Linear Probability Model with transformation
# dependent/target/outcome variable
y3 = df['success']
df['token_for_sale_log'] = np.log(df['token_for_sale'])
# independent/predictor/explanatory variable
X3 = df[['is_ico', 'is_sto', 'kyc', 'bonus', 'bounty',
'rating', 'teamsize', 'ERC20','platform', 'token_for_sale_log', 'distributed_in_ico' ]] # raised_usd
# turn independent variables into floating type (best practice)
# 'missing='drop'' drops rows with missing values from the regression
logit_model=sm.Logit(y3,X3.astype(float), missing='drop' )
# fit logit model into the data
result=logit_model.fit()
# summarize the logit model
print(result.summary2())
# Use wrapper lazypredict
!pip install lazypredict
from lazypredict.Supervised import LazyClassifier, LazyRegressor
from sklearn.model_selection import train_test_split
# load data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=42)
# fit all models
clf = LazyClassifier(predictions=True)
models, predictions = clf.fit(X_train, X_test, y_train, y_test)
predictions
models