from google.colab import files
files.upload()
!ls -lha kaggle.json
-rw-r--r-- 1 root root 68 May 4 10:24 kaggle.json
!pip install -q kaggle
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!ls
kaggle.json sample_data
!kaggle datasets list
Warning: Looks like you're using an outdated API Version, please consider updating (server 1.5.12 / client 1.5.4)
ref title size lastUpdated downloadCount
---------------------------------------------------------- ------------------------------------------------ ----- ------------------- -------------
gpreda/reddit-vaccine-myths Reddit Vaccine Myths 227KB 2021-05-03 13:54:12 4179
crowww/a-large-scale-fish-dataset A Large Scale Fish Dataset 3GB 2021-04-28 17:03:01 2397
promptcloud/careerbuilder-job-listing-2020 Careerbuilder Job Listing 2020 42MB 2021-03-05 06:59:52 541
mathurinache/twitter-edge-nodes Twitter Edge Nodes 342MB 2021-03-08 06:43:04 251
dhruvildave/wikibooks-dataset Wikibooks Dataset 1GB 2021-02-18 10:08:27 1482
imsparsh/musicnet-dataset MusicNet Dataset 22GB 2021-02-18 14:12:19 845
alsgroup/end-als End ALS Kaggle Challenge 12GB 2021-04-08 12:16:37 571
nickuzmenkov/nih-chest-xrays-tfrecords NIH Chest X-rays TFRecords 11GB 2021-03-09 04:49:23 387
simiotic/github-code-snippets GitHub Code Snippets 7GB 2021-03-03 11:34:39 93
fatiimaezzahra/famous-iconic-women Famous Iconic Women 838MB 2021-02-28 14:56:00 486
coloradokb/dandelionimages DandelionImages 4GB 2021-02-19 20:03:47 216
mathurinache/the-lj-speech-dataset The LJ Speech Dataset 3GB 2021-02-15 09:19:54 112
stuartjames/lights LightS: Light Specularity Dataset 18GB 2021-02-18 14:32:26 39
landrykezebou/lvzhdr-tone-mapping-benchmark-dataset-tmonet LVZ-HDR Tone Mapping Benchmark Dataset (TMO-Net) 24GB 2021-03-01 05:03:40 57
imsparsh/accentdb-core-extended AccentDB - Core & Extended 6GB 2021-02-17 14:22:54 49
nickuzmenkov/ranzcr-clip-kfold-tfrecords RANZCR CLiP KFold TFRecords 2GB 2021-02-21 13:29:51 64
datasnaek/youtube-new Trending YouTube Video Statistics 201MB 2019-06-03 00:56:47 138187
zynicide/wine-reviews Wine Reviews 51MB 2017-11-27 17:08:04 135285
datasnaek/chess Chess Game Dataset (Lichess) 3MB 2017-09-04 03:09:09 17470
rtatman/188-million-us-wildfires 1.88 Million US Wildfires 168MB 2020-05-12 21:03:49 15320
!kaggle datasets download -d rashmiranu/banking-dataset-classification
Downloading banking-dataset-classification.zip to /content
0% 0.00/409k [00:00<?, ?B/s]
100% 409k/409k [00:00<00:00, 46.3MB/s]
!unzip banking-dataset-classification.zip
Archive: banking-dataset-classification.zip
inflating: new_test.csv
inflating: new_train.csv
!ls
banking-dataset-classification.zip new_test.csv sample_data
kaggle.json new_train.csv
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
df = pd.read_csv('new_train.csv')
# test_df = pd.read_csv('new_test.csv')
df.drop_duplicates(inplace=True)
df
df.describe()
df.dropna(inplace=True)
df.describe()
df.drop(['pdays', 'previous'], 1, inplace=True)
df.head()
# test_df.head()
# print(test_df.marital.unique(), df.marital.unique())
df.job.unique()
df_obj = df.select_dtypes(include='object').copy()
df_obj.columns
# df['y'].replace(['yes', 'no'], [0, 1], inplace=True)
df.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 32942 entries, 0 to 32949
Data columns (total 14 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 age 32942 non-null int64
1 job 32942 non-null object
2 marital 32942 non-null object
3 education 32942 non-null object
4 default 32942 non-null object
5 housing 32942 non-null object
6 loan 32942 non-null object
7 contact 32942 non-null object
8 month 32942 non-null object
9 day_of_week 32942 non-null object
10 duration 32942 non-null int64
11 campaign 32942 non-null int64
12 poutcome 32942 non-null object
13 y 32942 non-null object
dtypes: int64(3), object(11)
memory usage: 3.8+ MB
df_obj.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 32942 entries, 0 to 32949
Data columns (total 11 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 job 32942 non-null object
1 marital 32942 non-null object
2 education 32942 non-null object
3 default 32942 non-null object
4 housing 32942 non-null object
5 loan 32942 non-null object
6 contact 32942 non-null object
7 month 32942 non-null object
8 day_of_week 32942 non-null object
9 poutcome 32942 non-null object
10 y 32942 non-null object
dtypes: object(11)
memory usage: 3.0+ MB
# plotting bar chart for each categorical variable
plt.style.use("ggplot")
for column in df_obj:
plt.figure(figsize=(20,4))
plt.subplot(121)
df[column].value_counts().plot(kind="bar")
plt.xlabel(column)
plt.ylabel("number of customers")
plt.title(column)
len(df.columns)
for column in df_obj:
mode = df[column].mode()[0]
df[column]=df[column].replace('unknown', mode)
df.job.unique()
for column in df_obj:
plt.figure(figsize=(20,4))
plt.subplot(121)
df[column].value_counts().plot(kind="bar")
plt.xlabel(column)
plt.ylabel("number of customers")
plt.title(column)
plt.style.use("ggplot")
for column in df_obj:
plt.figure(figsize=(20,4))
plt.subplot(121)
sns.countplot(df[column], hue=df["y"])
plt.title(column)
plt.xticks(rotation=90)
# plt.show()
/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
FutureWarning
/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
FutureWarning
/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
FutureWarning
/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
FutureWarning
/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
FutureWarning
/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
FutureWarning
/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
FutureWarning
/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
FutureWarning
/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
FutureWarning
/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
FutureWarning
/usr/local/lib/python3.7/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
FutureWarning
df_encoded = pd.get_dummies(df, columns=['job', 'marital', 'education', 'default', 'housing', 'loan', 'contact',
'month', 'day_of_week', 'poutcome'])
df_encoded
df_encoded.head()
X = df_encoded.drop('y', 1).copy()
y = df_encoded['y'].copy()
X
X_train, X_test,y_train, y_test = train_test_split(X, y, test_size=0.2)
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import RandomizedSearchCV
#KNN Algorthm
clf = KNeighborsClassifier()
clf.fit(X_train, y_train)
Accuracy = clf.score(X_test,y_test)
Accuracy
print([int(x) for x in np.linspace(100, 1200, num=6)])
# print(x)
[100, 320, 540, 760, 980, 1200]
n_estimator = [int(x) for (x) in np.linspace(100, 1200, num=12)]
max_depth = [int(x) for x in np.linspace(5, 30, num=6)]
min_samples_split = [2, 5, 10, 15, 100]
min_samples_leaf = [1, 2, 5, 10]
criterion = ['gini', 'entropy']
n_neighbors = [int(x) for x in np.linspace(start = 1, stop = 100, num = 50)]
weights = ['uniform','distance']
metric = ['euclidean','manhattan','chebyshev','seuclidean','minkowski']
random_grid = {
'n_neighbors': n_neighbors,
'weights': weights,
'metric': metric,
}
knn = KNeighborsClassifier()
KNN = RandomizedSearchCV(knn, param_distributions=random_grid, verbose=2, cv=3, random_state=42, n_iter=10, scoring='accuracy')
KNN.fit(X_train, y_train)
KNN.best_params_
best_knn = KNeighborsClassifier(metric= 'manhattan', n_neighbors= 55, weights= 'distance')
best_knn.fit(X_train, y_train)
best_knn.score(X_test, y_test)
clf = LogisticRegression()
clf.fit(X_train, y_train)
Accuracy = clf.score(X_test,y_test)
Accuracy
/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_logistic.py:940: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)
param_dist = {
'penalty' : ['l1', 'l2'],
'C' : [0, 1, 2, 3, 4]
}
logreg = LogisticRegression()
LG = RandomizedSearchCV(logreg, param_distributions=param_dist, verbose=2, cv=3, random_state=42, n_iter=10, scoring='accuracy')
LG.fit(X_train, y_train)
LG.best_params_
LG.best_score_
lg_best = LogisticRegression(C = 3, penalty='l2')
lg_best.fit(X_train, y_train)
lg_best.score(X_test, y_test)
/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_logistic.py:940: ConvergenceWarning: lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.
Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)
clf = GaussianNB()
clf.fit(X_train, y_train)
clf.score(X_test, y_test)
params_NB = {'var_smoothing': np.logspace(0,-9, num=100)}
NB = GaussianNB()
NB_ran = RandomizedSearchCV(NB, param_distributions=params_NB, verbose=2, cv=3, random_state=42, n_iter=10, scoring='accuracy')
NB_ran.fit(X_train, y_train)
Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV] var_smoothing=2.848035868435799e-08 .............................
[CV] .............. var_smoothing=2.848035868435799e-08, total= 0.1s
[CV] var_smoothing=2.848035868435799e-08 .............................
[CV] .............. var_smoothing=2.848035868435799e-08, total= 0.1s
[CV] var_smoothing=2.848035868435799e-08 .............................
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 0.1s remaining: 0.0s
[CV] .............. var_smoothing=2.848035868435799e-08, total= 0.1s
[CV] var_smoothing=1.5199110829529332e-05 ............................
[CV] ............. var_smoothing=1.5199110829529332e-05, total= 0.1s
[CV] var_smoothing=1.5199110829529332e-05 ............................
[CV] ............. var_smoothing=1.5199110829529332e-05, total= 0.1s
[CV] var_smoothing=1.5199110829529332e-05 ............................
[CV] ............. var_smoothing=1.5199110829529332e-05, total= 0.1s
[CV] var_smoothing=4.3287612810830526e-07 ............................
[CV] ............. var_smoothing=4.3287612810830526e-07, total= 0.1s
[CV] var_smoothing=4.3287612810830526e-07 ............................
[CV] ............. var_smoothing=4.3287612810830526e-07, total= 0.1s
[CV] var_smoothing=4.3287612810830526e-07 ............................
[CV] ............. var_smoothing=4.3287612810830526e-07, total= 0.1s
[CV] var_smoothing=8.111308307896872e-05 .............................
[CV] .............. var_smoothing=8.111308307896872e-05, total= 0.1s
[CV] var_smoothing=8.111308307896872e-05 .............................
[CV] .............. var_smoothing=8.111308307896872e-05, total= 0.1s
[CV] var_smoothing=8.111308307896872e-05 .............................
[CV] .............. var_smoothing=8.111308307896872e-05, total= 0.1s
[CV] var_smoothing=0.0001 ............................................
[CV] ............................. var_smoothing=0.0001, total= 0.1s
[CV] var_smoothing=0.0001 ............................................
[CV] ............................. var_smoothing=0.0001, total= 0.1s
[CV] var_smoothing=0.0001 ............................................
[CV] ............................. var_smoothing=0.0001, total= 0.1s
[CV] var_smoothing=0.0002848035868435802 .............................
[CV] .............. var_smoothing=0.0002848035868435802, total= 0.1s
[CV] var_smoothing=0.0002848035868435802 .............................
[CV] .............. var_smoothing=0.0002848035868435802, total= 0.1s
[CV] var_smoothing=0.0002848035868435802 .............................
[CV] .............. var_smoothing=0.0002848035868435802, total= 0.1s
[CV] var_smoothing=0.01 ..............................................
[CV] ............................... var_smoothing=0.01, total= 0.1s
[CV] var_smoothing=0.01 ..............................................
[CV] ............................... var_smoothing=0.01, total= 0.1s
[CV] var_smoothing=0.01 ..............................................
[CV] ............................... var_smoothing=0.01, total= 0.1s
[CV] var_smoothing=5.336699231206302e-08 .............................
[CV] .............. var_smoothing=5.336699231206302e-08, total= 0.1s
[CV] var_smoothing=5.336699231206302e-08 .............................
[CV] .............. var_smoothing=5.336699231206302e-08, total= 0.1s
[CV] var_smoothing=5.336699231206302e-08 .............................
[CV] .............. var_smoothing=5.336699231206302e-08, total= 0.1s
[CV] var_smoothing=0.12328467394420659 ...............................
[CV] ................ var_smoothing=0.12328467394420659, total= 0.1s
[CV] var_smoothing=0.12328467394420659 ...............................
[CV] ................ var_smoothing=0.12328467394420659, total= 0.1s
[CV] var_smoothing=0.12328467394420659 ...............................
[CV] ................ var_smoothing=0.12328467394420659, total= 0.1s
[CV] var_smoothing=1.0 ...............................................
[CV] ................................ var_smoothing=1.0, total= 0.1s
[CV] var_smoothing=1.0 ...............................................
[CV] ................................ var_smoothing=1.0, total= 0.1s
[CV] var_smoothing=1.0 ...............................................
[CV] ................................ var_smoothing=1.0, total= 0.1s
[Parallel(n_jobs=1)]: Done 30 out of 30 | elapsed: 2.2s finished
NB_ran.best_params_
NB_ran.best_score_
NB_best = GaussianNB(var_smoothing=1.51991108295293320)
NB_best.fit(X_train, y_train)
NB_best.score(X_test, y_test)
clf = RandomForestClassifier()
clf.fit(X_train, y_train)
clf.score(X_test, y_test)
param_dist = {
"n_estimators" : n_estimator,
"max_depth" : max_depth,
"min_samples_leaf":min_samples_leaf,
"criterion":criterion,
"min_samples_split":min_samples_split
}
rf = RandomForestClassifier()
rb_hyper = RandomizedSearchCV(rf, param_distributions=param_dist, verbose=2, cv=3, random_state=42, n_iter=10, scoring='accuracy')
rb_hyper.fit(X_train, y_train)
Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV] n_estimators=900, min_samples_split=5, min_samples_leaf=5, max_depth=20, criterion=gini
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[CV] n_estimators=900, min_samples_split=5, min_samples_leaf=5, max_depth=20, criterion=gini, total= 14.9s
[CV] n_estimators=900, min_samples_split=5, min_samples_leaf=5, max_depth=20, criterion=gini
[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 14.9s remaining: 0.0s
[CV] n_estimators=900, min_samples_split=5, min_samples_leaf=5, max_depth=20, criterion=gini, total= 14.9s
[CV] n_estimators=900, min_samples_split=5, min_samples_leaf=5, max_depth=20, criterion=gini
[CV] n_estimators=900, min_samples_split=5, min_samples_leaf=5, max_depth=20, criterion=gini, total= 14.9s
[CV] n_estimators=1100, min_samples_split=10, min_samples_leaf=2, max_depth=30, criterion=gini
[CV] n_estimators=1100, min_samples_split=10, min_samples_leaf=2, max_depth=30, criterion=gini, total= 19.4s
[CV] n_estimators=1100, min_samples_split=10, min_samples_leaf=2, max_depth=30, criterion=gini
[CV] n_estimators=1100, min_samples_split=10, min_samples_leaf=2, max_depth=30, criterion=gini, total= 19.5s
[CV] n_estimators=1100, min_samples_split=10, min_samples_leaf=2, max_depth=30, criterion=gini
[CV] n_estimators=1100, min_samples_split=10, min_samples_leaf=2, max_depth=30, criterion=gini, total= 19.4s
[CV] n_estimators=300, min_samples_split=100, min_samples_leaf=5, max_depth=25, criterion=gini
[CV] n_estimators=300, min_samples_split=100, min_samples_leaf=5, max_depth=25, criterion=gini, total= 4.6s
[CV] n_estimators=300, min_samples_split=100, min_samples_leaf=5, max_depth=25, criterion=gini
[CV] n_estimators=300, min_samples_split=100, min_samples_leaf=5, max_depth=25, criterion=gini, total= 4.5s
[CV] n_estimators=300, min_samples_split=100, min_samples_leaf=5, max_depth=25, criterion=gini
[CV] n_estimators=300, min_samples_split=100, min_samples_leaf=5, max_depth=25, criterion=gini, total= 4.6s
[CV] n_estimators=400, min_samples_split=5, min_samples_leaf=5, max_depth=25, criterion=gini
[CV] n_estimators=400, min_samples_split=5, min_samples_leaf=5, max_depth=25, criterion=gini, total= 6.7s
[CV] n_estimators=400, min_samples_split=5, min_samples_leaf=5, max_depth=25, criterion=gini
[CV] n_estimators=400, min_samples_split=5, min_samples_leaf=5, max_depth=25, criterion=gini, total= 6.8s
[CV] n_estimators=400, min_samples_split=5, min_samples_leaf=5, max_depth=25, criterion=gini
[CV] n_estimators=400, min_samples_split=5, min_samples_leaf=5, max_depth=25, criterion=gini, total= 6.9s
[CV] n_estimators=700, min_samples_split=5, min_samples_leaf=10, max_depth=5, criterion=entropy
[CV] n_estimators=700, min_samples_split=5, min_samples_leaf=10, max_depth=5, criterion=entropy, total= 5.8s
[CV] n_estimators=700, min_samples_split=5, min_samples_leaf=10, max_depth=5, criterion=entropy
[CV] n_estimators=700, min_samples_split=5, min_samples_leaf=10, max_depth=5, criterion=entropy, total= 5.7s
[CV] n_estimators=700, min_samples_split=5, min_samples_leaf=10, max_depth=5, criterion=entropy
[CV] n_estimators=700, min_samples_split=5, min_samples_leaf=10, max_depth=5, criterion=entropy, total= 5.7s
[CV] n_estimators=1000, min_samples_split=2, min_samples_leaf=1, max_depth=20, criterion=entropy
[CV] n_estimators=1000, min_samples_split=2, min_samples_leaf=1, max_depth=20, criterion=entropy, total= 19.9s
[CV] n_estimators=1000, min_samples_split=2, min_samples_leaf=1, max_depth=20, criterion=entropy
[CV] n_estimators=1000, min_samples_split=2, min_samples_leaf=1, max_depth=20, criterion=entropy, total= 20.1s
[CV] n_estimators=1000, min_samples_split=2, min_samples_leaf=1, max_depth=20, criterion=entropy
[CV] n_estimators=1000, min_samples_split=2, min_samples_leaf=1, max_depth=20, criterion=entropy, total= 19.9s
[CV] n_estimators=1100, min_samples_split=15, min_samples_leaf=10, max_depth=10, criterion=gini
[CV] n_estimators=1100, min_samples_split=15, min_samples_leaf=10, max_depth=10, criterion=gini, total= 13.0s
[CV] n_estimators=1100, min_samples_split=15, min_samples_leaf=10, max_depth=10, criterion=gini
[CV] n_estimators=1100, min_samples_split=15, min_samples_leaf=10, max_depth=10, criterion=gini, total= 13.2s
[CV] n_estimators=1100, min_samples_split=15, min_samples_leaf=10, max_depth=10, criterion=gini
[CV] n_estimators=1100, min_samples_split=15, min_samples_leaf=10, max_depth=10, criterion=gini, total= 13.0s
[CV] n_estimators=300, min_samples_split=15, min_samples_leaf=1, max_depth=30, criterion=gini
[CV] n_estimators=300, min_samples_split=15, min_samples_leaf=1, max_depth=30, criterion=gini, total= 5.3s
[CV] n_estimators=300, min_samples_split=15, min_samples_leaf=1, max_depth=30, criterion=gini
[CV] n_estimators=300, min_samples_split=15, min_samples_leaf=1, max_depth=30, criterion=gini, total= 5.6s
[CV] n_estimators=300, min_samples_split=15, min_samples_leaf=1, max_depth=30, criterion=gini
[CV] n_estimators=300, min_samples_split=15, min_samples_leaf=1, max_depth=30, criterion=gini, total= 5.2s
[CV] n_estimators=700, min_samples_split=10, min_samples_leaf=2, max_depth=10, criterion=gini
[CV] n_estimators=700, min_samples_split=10, min_samples_leaf=2, max_depth=10, criterion=gini, total= 8.5s
[CV] n_estimators=700, min_samples_split=10, min_samples_leaf=2, max_depth=10, criterion=gini
[CV] n_estimators=700, min_samples_split=10, min_samples_leaf=2, max_depth=10, criterion=gini, total= 8.5s
[CV] n_estimators=700, min_samples_split=10, min_samples_leaf=2, max_depth=10, criterion=gini
[CV] n_estimators=700, min_samples_split=10, min_samples_leaf=2, max_depth=10, criterion=gini, total= 8.6s
[CV] n_estimators=700, min_samples_split=15, min_samples_leaf=1, max_depth=5, criterion=entropy
[CV] n_estimators=700, min_samples_split=15, min_samples_leaf=1, max_depth=5, criterion=entropy, total= 6.0s
[CV] n_estimators=700, min_samples_split=15, min_samples_leaf=1, max_depth=5, criterion=entropy
[CV] n_estimators=700, min_samples_split=15, min_samples_leaf=1, max_depth=5, criterion=entropy, total= 5.7s
[CV] n_estimators=700, min_samples_split=15, min_samples_leaf=1, max_depth=5, criterion=entropy
[CV] n_estimators=700, min_samples_split=15, min_samples_leaf=1, max_depth=5, criterion=entropy, total= 5.6s
[Parallel(n_jobs=1)]: Done 30 out of 30 | elapsed: 5.2min finished
rb_hyper.best_params_
rb_hyper.best_score_
best_rf = RandomForestClassifier(criterion= 'gini',
max_depth=30,
min_samples_leaf= 1,
min_samples_split= 15,
n_estimators= 300)
best_rf.fit(X_train, y_train)
best_rf.score(X_test, y_test)
clf = SVC()
clf.fit(X_train, y_train)
clf.score(X_test, y_test)
param_dist = {'C': [0.1, 1, 10, 100, 1000],
'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
'kernel': ['rbf']}
svc_hyper = RandomizedSearchCV(SVC(), param_distributions=param_dist, verbose=2, cv=3, random_state=42, n_iter=10, scoring='accuracy')
svc_hyper.fit(X_train, y_train)
Fitting 3 folds for each of 10 candidates, totalling 30 fits
[CV] kernel=rbf, gamma=0.001, C=1 ....................................
[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[CV] ..................... kernel=rbf, gamma=0.001, C=1, total= 14.1s
[CV] kernel=rbf, gamma=0.001, C=1 ....................................
[Parallel(n_jobs=1)]: Done 1 out of 1 | elapsed: 14.1s remaining: 0.0s
[CV] ..................... kernel=rbf, gamma=0.001, C=1, total= 14.8s
[CV] kernel=rbf, gamma=0.001, C=1 ....................................
[CV] ..................... kernel=rbf, gamma=0.001, C=1, total= 14.7s
[CV] kernel=rbf, gamma=0.1, C=100 ....................................
[CV] ..................... kernel=rbf, gamma=0.1, C=100, total= 1.5min
[CV] kernel=rbf, gamma=0.1, C=100 ....................................
[CV] ..................... kernel=rbf, gamma=0.1, C=100, total= 1.4min
[CV] kernel=rbf, gamma=0.1, C=100 ....................................
[CV] ..................... kernel=rbf, gamma=0.1, C=100, total= 1.4min
[CV] kernel=rbf, gamma=1, C=0.1 ......................................
[CV] ....................... kernel=rbf, gamma=1, C=0.1, total= 57.8s
[CV] kernel=rbf, gamma=1, C=0.1 ......................................
[CV] ....................... kernel=rbf, gamma=1, C=0.1, total= 57.5s
[CV] kernel=rbf, gamma=1, C=0.1 ......................................
[CV] ....................... kernel=rbf, gamma=1, C=0.1, total= 57.9s
[CV] kernel=rbf, gamma=0.001, C=1000 .................................
[CV] .................. kernel=rbf, gamma=0.001, C=1000, total= 57.7s
[CV] kernel=rbf, gamma=0.001, C=1000 .................................
[CV] .................. kernel=rbf, gamma=0.001, C=1000, total= 55.9s
[CV] kernel=rbf, gamma=0.001, C=1000 .................................
[CV] .................. kernel=rbf, gamma=0.001, C=1000, total= 55.9s
[CV] kernel=rbf, gamma=0.1, C=10 .....................................
[CV] ...................... kernel=rbf, gamma=0.1, C=10, total= 1.5min
[CV] kernel=rbf, gamma=0.1, C=10 .....................................
[CV] ...................... kernel=rbf, gamma=0.1, C=10, total= 1.4min
[CV] kernel=rbf, gamma=0.1, C=10 .....................................
[CV] ...................... kernel=rbf, gamma=0.1, C=10, total= 1.4min
[CV] kernel=rbf, gamma=0.0001, C=1 ...................................
[CV] .................... kernel=rbf, gamma=0.0001, C=1, total= 12.7s
[CV] kernel=rbf, gamma=0.0001, C=1 ...................................
[CV] .................... kernel=rbf, gamma=0.0001, C=1, total= 12.8s
[CV] kernel=rbf, gamma=0.0001, C=1 ...................................
[CV] .................... kernel=rbf, gamma=0.0001, C=1, total= 12.4s
[CV] kernel=rbf, gamma=0.001, C=10 ...................................
[CV] .................... kernel=rbf, gamma=0.001, C=10, total= 15.3s
[CV] kernel=rbf, gamma=0.001, C=10 ...................................
[CV] .................... kernel=rbf, gamma=0.001, C=10, total= 16.0s
[CV] kernel=rbf, gamma=0.001, C=10 ...................................
[CV] .................... kernel=rbf, gamma=0.001, C=10, total= 15.6s
[CV] kernel=rbf, gamma=0.1, C=0.1 ....................................
[CV] ..................... kernel=rbf, gamma=0.1, C=0.1, total= 55.8s
[CV] kernel=rbf, gamma=0.1, C=0.1 ....................................
[CV] ..................... kernel=rbf, gamma=0.1, C=0.1, total= 56.1s
[CV] kernel=rbf, gamma=0.1, C=0.1 ....................................
[CV] ..................... kernel=rbf, gamma=0.1, C=0.1, total= 55.2s
[CV] kernel=rbf, gamma=0.01, C=1000 ..................................
[CV] ................... kernel=rbf, gamma=0.01, C=1000, total= 48.0s
[CV] kernel=rbf, gamma=0.01, C=1000 ..................................
[CV] ................... kernel=rbf, gamma=0.01, C=1000, total= 58.9s
[CV] kernel=rbf, gamma=0.01, C=1000 ..................................
[CV] ................... kernel=rbf, gamma=0.01, C=1000, total= 47.2s
[CV] kernel=rbf, gamma=1, C=1 ........................................
[CV] ......................... kernel=rbf, gamma=1, C=1, total= 1.5min
[CV] kernel=rbf, gamma=1, C=1 ........................................
[CV] ......................... kernel=rbf, gamma=1, C=1, total= 1.5min
[CV] kernel=rbf, gamma=1, C=1 ........................................
[CV] ......................... kernel=rbf, gamma=1, C=1, total= 1.5min
[Parallel(n_jobs=1)]: Done 30 out of 30 | elapsed: 26.2min finished
svc_hyper.best_params_
svc_hyper.best_score_
best_svc = SVC(C=1000, gamma=0.001, kernel='rbf')
best_svc.fit(X_train, y_train)
best_svc.score(X_test, y_test)