import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler
import seaborn as sns
from sklearn.decomposition import PCA, KernelPCA
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier, GradientBoostingClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
df = pd.read_csv("/content/sample_data/Placement_Data_Full_Class.csv")
df
label = LabelEncoder()
df["status"] = label.fit_transform(df["status"])
df["salary"] = df["salary"].fillna(value=df["salary"].mean())
df_g = pd.get_dummies(df)
sns.set(rc={"figure.figsize": (15, 9)})
sns.heatmap(df_g.corr(), annot=True)
df_s = pd.pivot_table(df.drop(columns="ssc_p"), index=round(df["ssc_p"]), aggfunc="mean")
df_s = df_s.reset_index()
sns.lineplot(x = df_s.index, y = df_s["status"])
df_c = pd.pivot_table(df, index="degree_t")
df_c = df_c.reset_index()
label = LabelEncoder()
df["workex"] = label.fit_transform(df["workex"])
df_c
sns.barplot(x=df_c["degree_t"], y=df_c["workex"])
df = pd.get_dummies(df)
df
X = df.drop(columns=["status", "salary", "sl_no"])
y = df["status"]
scaler = StandardScaler()
X = scaler.fit_transform(X)
kpca = KernelPCA(n_components=2, kernel="poly")
kpca = kpca.fit(X)
sns.lineplot(x=range(pca.n_components_), y = pca.explained_variance_ratio_)
X = kpca.transform(X)
sns.scatterplot(x=X[0], y=X[1], hue=df["status"])
X
X = pd.DataFrame(X)
randomforestclassifier = RandomForestClassifier()
gridsearchcv = GridSearchCV(randomforestclassifier, {"n_estimators": [10, 100 , 200], "n_jobs": [None, -1, 1]}, cv=10)
gridsearchcv = gridsearchcv.fit(X, y)
gridsearchcv.best_score_
sns.scatterplot(x=X[0], y=X[1], hue=gridsearchcv.predict(X))
logistic = LogisticRegression()
bagging = BaggingClassifier(base_estimator=logistic, random_state=0)
gridsearchcv = GridSearchCV(bagging, {"n_estimators": [10, 15,20], "n_jobs": [None, 1, -1]}, cv=5)
gridsearchcv = gridsearchcv.fit(X, y)
gridsearchcv.best_score_
sns.scatterplot(x=X[0], y=X[1], hue=gridsearchcv.predict(X))
X
gradientboostingclassifier = GradientBoostingClassifier()
gridsearchcv = GridSearchCV(gradientboostingclassifier, {"n_estimators": [10, 50, 100], "learning_rate": [0.01, 0.1, 1]}, cv=5)
gridsearchcv = gridsearchcv.fit(X, y)
gridsearchcv.best_score_
sns.scatterplot(x=X[0], y=X[1], hue=gridsearchcv.predict(X))