Untitled Python Project

# Start writing code here... import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns

df = pd.read_csv('/work/tord_v3_edited.csv') df.info()

df.head()

df['raised_usd'] = df['raised_usd'].fillna(0) df['success'] = np.where(df['raised_usd'] >= 500000, 1, 0)

# Potential multicollinearity issue corr = df.corr() plt.figure(figsize=(21,16)) sns.heatmap(corr) plt.show()

df.columns

corr.columns

!pip install statsmodels

import statsmodels.api as sm from statsmodels.formula.api import ols, logit # dependent/target/outcome variable y = df['success'] # independent/predictor/explanatory variable X = df[['is_ico', 'is_sto', 'raised_usd', 'token_for_sale', 'bonus', 'bounty', 'rating', 'teamsize', 'ERC20']] # 'distributed_in_ico', 'platform','kyc', # A. Logit regression # turn independent variables into floating type (best practice) # 'missing='drop'' drops rows with missing values from the regression logit_model=sm.Logit(y,X.astype(float), missing='drop' ) # fit logit model into the data result=logit_model.fit() # summarize the logit model print(result.summary2())

import statsmodels.api as sm from statsmodels.formula.api import ols, logit # dependent/target/outcome variable y = df['success'] # independent/predictor/explanatory variable X = df[['is_ico', 'is_sto', 'kyc', 'bonus', 'bounty', 'rating', 'teamsize', 'ERC20','platform', 'token_for_sale', 'distributed_in_ico' ]] # ,'raised_usd', # A. Logit regression # turn independent variables into floating type (best practice) # 'missing='drop'' drops rows with missing values from the regression logit_model=sm.Logit(y,X.astype(float), missing='drop' ) # fit logit model into the data result=logit_model.fit() # summarize the logit model print(result.summary2())

average_marginal_effect = result.get_margeff(at = "mean", method = "dydx") print(average_marginal_effect.summary())

sns.regplot(x = "rating", y = "success", data = df, logistic = True, y_jitter = .05) plt.ylabel("success probability")

# B. Linear Probability Model # logit regression X = sm.add_constant(X) ols_model=sm.OLS(y,X.astype(float), missing='drop') result=ols_model.fit() print(result.summary2())

# C. Linear Probability Model with transformation # dependent/target/outcome variable y3 = df['success'] df['token_for_sale_log'] = np.log(df['token_for_sale']) # independent/predictor/explanatory variable X3 = df[['is_ico', 'is_sto', 'kyc', 'bonus', 'bounty', 'rating', 'teamsize', 'ERC20','platform', 'token_for_sale_log', 'distributed_in_ico' ]] # raised_usd # turn independent variables into floating type (best practice) # 'missing='drop'' drops rows with missing values from the regression logit_model=sm.Logit(y3,X3.astype(float), missing='drop' ) # fit logit model into the data result=logit_model.fit() # summarize the logit model print(result.summary2())

# Use wrapper lazypredict !pip install lazypredict

from lazypredict.Supervised import LazyClassifier, LazyRegressor from sklearn.model_selection import train_test_split

# load data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=42) # fit all models clf = LazyClassifier(predictions=True) models, predictions = clf.fit(X_train, X_test, y_train, y_test)

predictions

models