import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,confusion_matrix,matthews_corrcoef,cohen_kappa_score,precision_score,recall_score,accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.datasets import make_classification
from sklearn.ensemble import RandomForestClassifier
from imblearn.over_sampling import RandomOverSampler,SMOTE
X,y = make_classification(n_samples=50000,n_features=30, n_redundant=15, n_informative=15,class_sep=.66,weights=(.99,1-.99),random_state=1)
X_train,X_test, y_train, y_test = train_test_split(X ,y, random_state=2)
#Standardize Data
scaler=StandardScaler()
X_train_scaled=scaler.fit_transform(X_train)
X_test_scaled=scaler.transform(X_test)
pca=PCA(.95)
pca.fit(X_train_scaled)
X_train_scaled_pca = pca.transform(X_train_scaled)
X_test_scaled_pca = pca.transform(X_test_scaled)
oversample = RandomOverSampler(sampling_strategy=1, random_state=2)
X_over, y_over = oversample.fit_resample(X_train_scaled_pca, y_train)
smote = SMOTE(random_state=2)
X_SMOTE, y_SMOTE = smote.fit_resample(X_train_scaled_pca, y_train)
RndFor=RandomForestClassifier(min_samples_leaf=25,max_depth=8,random_state=2)
RndFor.fit(X_train_scaled_pca,y_train)
y_pred=RndFor.predict(X_test_scaled_pca)
#print(classification_report(y_test,y_pred))
# tn fp fn tp
print(confusion_matrix(y_test,y_pred).ravel())
print(matthews_corrcoef(y_test,y_pred))
print(cohen_kappa_score(y_test,y_pred))
print(precision_score(y_test,y_pred))
print(recall_score(y_test,y_pred))
print(accuracy_score(y_test,y_pred))
RndFor=RandomForestClassifier(min_samples_leaf=25,max_depth=8,random_state=2)
RndFor.fit(X_over,y_over)
y_pred=RndFor.predict(X_test_scaled_pca)
#print(classification_report(y_test,y_pred))
print(confusion_matrix(y_test,y_pred).ravel())
print(matthews_corrcoef(y_test,y_pred))
print(cohen_kappa_score(y_test,y_pred))
print(precision_score(y_test,y_pred))
print(recall_score(y_test,y_pred))
print(accuracy_score(y_test,y_pred))
RndFor=RandomForestClassifier(min_samples_leaf=25,max_depth=8,random_state=2)
RndFor.fit(X_SMOTE, y_SMOTE)
y_pred=RndFor.predict(X_test_scaled_pca)
print(confusion_matrix(y_test,y_pred).ravel())
print(matthews_corrcoef(y_test,y_pred))
print(cohen_kappa_score(y_test,y_pred))
print(precision_score(y_test,y_pred))
print(recall_score(y_test,y_pred))
print(accuracy_score(y_test,y_pred))