## required libraries
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
pd.set_option('display.max_columns', None)
sns.set_style('darkgrid')
## reading dataset
df = pd.read_csv('creditcard.csv')
## displaying first five rows
df.head()
## shape of dataset
df.shape
## checking null values
df.isnull().sum()
## count the occurance of unique values in class column
df.Class.value_counts()
## countplot of classes
plt.figure(figsize=(10, 5))
sns.countplot(df['Class'], log=True);
## checking correlation
plt.figure(figsize=(25,25))
plt.title("Correlation Matrix")
sns.heatmap(round(df.corr(), 2), annot=True);
## checking correlation of 'dependent' variable with each "independent" variable
df.corr()[['Class']].sort_values(by='Class')[:-1]
## dependent and independent variables
X = df.iloc[:, :-1]
y = df.iloc[:, -1]
X.head()
y.head()
## train test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)
## Standard Scaler
sc= StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
print(X_train.shape)
print(X_test.shape)
lg = LogisticRegression()
## fit on training data
lg.fit(X_train, y_train)
## prediction
pred = lg.predict(X_test)
print('Classification Report: \n', classification_report(y_test, pred))
print("-" * 100)
print()
print('Accuracy Score: ', accuracy_score(y_test, pred))
print("-" * 100)
print()
plt.figure(figsize=(10, 10))
sns.heatmap(confusion_matrix(y_test, pred), annot=True, fmt='g');
rf = RandomForestClassifier()
## fit on training data
rf.fit(X_train, y_train)
## prediction
pred = rf.predict(X_test)
print('Classification Report: \n', classification_report(y_test, pred))
print("-" * 100)
print()
print('Accuracy Score: ', accuracy_score(y_test, pred))
print("-" * 100)
print()
plt.figure(figsize=(10, 10))
sns.heatmap(confusion_matrix(y_test, pred), annot=True, fmt='g');