## required libraries
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
pd.set_option('display.max_columns', None)
sns.set_style('darkgrid')
## reading dataset
df = pd.read_csv('creditcard.csv')
## displaying first five rows
df.head()
## shape of dataset
df.shape
## checking null values
df.isnull().sum()
## count the occurance of unique values in class column
df.Class.value_counts()
## countplot of classes
plt.figure(figsize=(10, 5))
sns.countplot(df['Class'], log=True);
C:\Users\jgaur\anaconda3\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
warnings.warn(
## checking correlation
plt.figure(figsize=(25,25))
plt.title("Correlation Matrix")
sns.heatmap(round(df.corr(), 2), annot=True);
## checking correlation of 'dependent' variable with each "independent" variable
df.corr()[['Class']].sort_values(by='Class')[:-1]
## dependent and independent variables
X = df.iloc[:, :-1]
y = df.iloc[:, -1]
X.head()
y.head()
## train test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)
## Standard Scaler
sc= StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
print(X_train.shape)
print(X_test.shape)
(213605, 30)
(71202, 30)
lg = LogisticRegression()
## fit on training data
lg.fit(X_train, y_train)
## prediction
pred = lg.predict(X_test)
print('Classification Report: \n', classification_report(y_test, pred))
print("-" * 100)
print()
print('Accuracy Score: ', accuracy_score(y_test, pred))
print("-" * 100)
print()
plt.figure(figsize=(10, 10))
sns.heatmap(confusion_matrix(y_test, pred), annot=True, fmt='g');
Classification Report:
precision recall f1-score support
0 1.00 1.00 1.00 71082
1 0.88 0.66 0.75 120
accuracy 1.00 71202
macro avg 0.94 0.83 0.88 71202
weighted avg 1.00 1.00 1.00 71202
----------------------------------------------------------------------------------------------------
Accuracy Score: 0.9992696834358585
----------------------------------------------------------------------------------------------------
rf = RandomForestClassifier()
## fit on training data
rf.fit(X_train, y_train)
## prediction
pred = rf.predict(X_test)
print('Classification Report: \n', classification_report(y_test, pred))
print("-" * 100)
print()
print('Accuracy Score: ', accuracy_score(y_test, pred))
print("-" * 100)
print()
plt.figure(figsize=(10, 10))
sns.heatmap(confusion_matrix(y_test, pred), annot=True, fmt='g');
Classification Report:
precision recall f1-score support
0 1.00 1.00 1.00 71082
1 0.94 0.78 0.85 120
accuracy 1.00 71202
macro avg 0.97 0.89 0.92 71202
weighted avg 1.00 1.00 1.00 71202
----------------------------------------------------------------------------------------------------
Accuracy Score: 0.9995365298727564
----------------------------------------------------------------------------------------------------