Detecting credit card fraud using TensorFlow and Keras

from datetime import date, datetime, timedelta import os import math import plotly.express as px import plotly.graph_objs as go from plotly.subplots import make_subplots import matplotlib.pyplot as plt import matplotlib as mpl import numpy as np import tensorflow as tf from tensorflow import keras import pandas as pd import sklearn from sklearn.metrics import confusion_matrix from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler

mpl.rcParams['figure.figsize'] = (12, 10) colors = plt.rcParams['axes.prop_cycle'].by_key()['color']

transactions_df = pd.read_feather("data/transactions.feather") transactions_df.sample(10, random_state=0)

not_fraud_count, fraud_count = np.bincount(transactions_df["TX_FRAUD"]) total_count = not_fraud_count + fraud_count print( ( f"Data:\n" f" Total: {total_count}\n" f" Fraud: {fraud_count} ({100 * fraud_count / total_count:.2f}% of total)\n" ) )

df = pd.concat( [ transactions_df[transactions_df["TX_FRAUD"] == 0].sample(1000, random_state=0), transactions_df[transactions_df["TX_FRAUD"] == 1].sample(1000, random_state=0), ] ) fig = px.histogram( df, title="Transaction count for different amounts", x="TX_AMOUNT", color="TX_FRAUD", marginal="box", ) fig.update_traces(opacity=0.75) fig.update_layout(barmode="overlay") fig.show()

cleaned_df = pd.DataFrame()

cleaned_df["amount"] = transactions_df["TX_AMOUNT"] cleaned_df["is_fraud"] = transactions_df["TX_FRAUD"] cleaned_df["is_weekend"] = transactions_df["TX_DATETIME"].dt.weekday >= 5 cleaned_df["is_night"] = transactions_df["TX_DATETIME"].dt.hour <= 6

cleaned_df["customer_num_transactions_1_day"] = transactions_df.groupby( "CUSTOMER_ID" ).apply( lambda x: x[["TX_DATETIME", "TX_AMOUNT"]].rolling("1d", on="TX_DATETIME").count() )[ "TX_AMOUNT" ] cleaned_df["customer_num_transactions_7_day"] = transactions_df.groupby( "CUSTOMER_ID" ).apply( lambda x: x[["TX_DATETIME", "TX_AMOUNT"]].rolling("7d", on="TX_DATETIME").count() )[ "TX_AMOUNT" ] cleaned_df["customer_num_transactions_30_day"] = transactions_df.groupby( "CUSTOMER_ID" ).apply( lambda x: x[["TX_DATETIME", "TX_AMOUNT"]].rolling("30d", on="TX_DATETIME").count() )[ "TX_AMOUNT" ] cleaned_df["customer_avg_amount_1_day"] = transactions_df.groupby("CUSTOMER_ID").apply( lambda x: x[["TX_DATETIME", "TX_AMOUNT"]].rolling("1d", on="TX_DATETIME").mean() )["TX_AMOUNT"] cleaned_df["customer_avg_amount_7_day"] = transactions_df.groupby("CUSTOMER_ID").apply( lambda x: x[["TX_DATETIME", "TX_AMOUNT"]].rolling("7d", on="TX_DATETIME").mean() )["TX_AMOUNT"] cleaned_df["customer_avg_amount_30_day"] = transactions_df.groupby("CUSTOMER_ID").apply( lambda x: x[["TX_DATETIME", "TX_AMOUNT"]].rolling("30d", on="TX_DATETIME").mean() )["TX_AMOUNT"]

DAY_DELAY = 7 def get_count_risk_rolling_window( terminal_transactions, window_size, delay_period=DAY_DELAY ): frauds_in_delay = terminal_transactions.rolling( str(delay_period) + "d", on="TX_DATETIME" )["TX_FRAUD"].sum() transactions_in_delay = terminal_transactions.rolling( str(delay_period) + "d", on="TX_DATETIME" )["TX_FRAUD"].count() frauds_until_window = terminal_transactions.rolling( str(delay_period + window_size) + "d", on="TX_DATETIME" )["TX_FRAUD"].sum() transactions_until_window = terminal_transactions.rolling( str(delay_period + window_size) + "d", on="TX_DATETIME" )["TX_FRAUD"].count() frauds_in_window = frauds_until_window - frauds_in_delay transactions_in_window = transactions_until_window - transactions_in_delay terminal_transactions["fraud_risk"] = ( frauds_in_window / transactions_in_window ).fillna(0) return terminal_transactions cleaned_df["terminal_num_transactions_1_day"] = transactions_df.groupby( "TERMINAL_ID" ).apply( lambda x: x[["TX_DATETIME", "TX_AMOUNT"]].rolling("1d", on="TX_DATETIME").count() )[ "TX_AMOUNT" ] cleaned_df["terminal_num_transactions_7_day"] = transactions_df.groupby( "TERMINAL_ID" ).apply( lambda x: x[["TX_DATETIME", "TX_AMOUNT"]].rolling("7d", on="TX_DATETIME").count() )[ "TX_AMOUNT" ] cleaned_df["terminal_num_transactions_30_day"] = transactions_df.groupby( "TERMINAL_ID" ).apply( lambda x: x[["TX_DATETIME", "TX_AMOUNT"]].rolling("30d", on="TX_DATETIME").count() )[ "TX_AMOUNT" ] cleaned_df["terminal_fraud_risk_1_day"] = transactions_df.groupby("TERMINAL_ID").apply( lambda x: get_count_risk_rolling_window(x, 1, 7) )["fraud_risk"] cleaned_df["terminal_fraud_risk_7_day"] = transactions_df.groupby("TERMINAL_ID").apply( lambda x: get_count_risk_rolling_window(x, 7, 7) )["fraud_risk"] cleaned_df["terminal_fraud_risk_30_day"] = transactions_df.groupby("TERMINAL_ID").apply( lambda x: get_count_risk_rolling_window(x, 30, 7) )["fraud_risk"]

cleaned_df["day"] = transactions_df["TX_TIME_DAYS"] cleaned_df["datetime"] = transactions_df["TX_DATETIME"] cleaned_df["customer_id"] = transactions_df["CUSTOMER_ID"] cleaned_df["id"] = transactions_df["TRANSACTION_ID"]

pd.concat( # show some fraudulent and non-fraudulent transactions [ cleaned_df[cleaned_df["is_fraud"] == 1].sample(5, random_state=0), cleaned_df[cleaned_df["is_fraud"] == 0].sample(5, random_state=0), ] ).sample(10, random_state=0)

# this is adapted from get_train_test_set at # https://fraud-detection-handbook.github.io/fraud-detection-handbook/Chapter_References/shared_functions.html#get-train-test-set def get_train_test_set( df, start_date_training, delta_train=7, delta_delay=DAY_DELAY, delta_test=7, random_state=0, ): # Get the training set data train_df = df[ (df["datetime"] >= start_date_training) & (df["datetime"] < start_date_training + timedelta(days=delta_train)) ] # Get the test set data test_df = [] # Note: Cards known to be compromised after the delay period are removed from the test set # That is, for each test day, all frauds known at (test_day-delay_period) are removed # First, get known defrauded customers from the training set known_defrauded_customers = set(train_df[train_df["is_fraud"] == 1]["customer_id"]) # Get the relative starting day of training set (easier than TX_DATETIME to collect test data) start_tx_time_days_training = train_df["day"].min() # Then, for each day of the test set for day in range(delta_test): # Get test data for that day test_df_day = df[ df["day"] == start_tx_time_days_training + delta_train + delta_delay + day ] # Compromised cards from that test day, minus the delay period, are added to the pool of known defrauded customers test_df_day_delay_period = df[ df["day"] == start_tx_time_days_training + delta_train + day - 1 ] new_defrauded_customers = set( test_df_day_delay_period[test_df_day_delay_period["is_fraud"] == 1][ "customer_id" ] ) known_defrauded_customers = known_defrauded_customers.union( new_defrauded_customers ) test_df_day = test_df_day[ ~test_df_day["customer_id"].isin(known_defrauded_customers) ] test_df.append(test_df_day) test_df = pd.concat(test_df) # Sort data sets by ascending order of transaction ID train_df = train_df.sort_values("id") test_df = test_df.sort_values("id") return (train_df, test_df) train_df, test_df = get_train_test_set( cleaned_df, datetime(2018, 7, 25), delta_train=21 ) train_df, val_df = get_train_test_set(train_df, datetime(2018, 7, 25))

label_columns = ["is_fraud"] feature_columns = [ "amount", "is_weekend", "is_night", "customer_num_transactions_1_day", "customer_num_transactions_7_day", "customer_num_transactions_30_day", "customer_avg_amount_1_day", "customer_avg_amount_7_day", "customer_avg_amount_30_day", "terminal_num_transactions_1_day", "terminal_num_transactions_7_day", "terminal_num_transactions_30_day", "terminal_fraud_risk_1_day", "terminal_fraud_risk_7_day", "terminal_fraud_risk_30_day", ] train_labels = np.array(train_df[label_columns]) val_labels = np.array(val_df[label_columns]) test_labels = np.array(test_df[label_columns]) train_features = np.array(train_df[feature_columns]) val_features = np.array(val_df[feature_columns]) test_features = np.array(test_df[feature_columns])

scaler = StandardScaler() train_features = scaler.fit_transform(train_features) val_features = scaler.transform(val_features) test_features = scaler.transform(test_features) print('Training labels shape:', train_labels.shape) print('Validation labels shape:', val_labels.shape) print('Test labels shape:', test_labels.shape) print('Training features shape:', train_features.shape) print('Validation features shape:', val_features.shape) print('Test features shape:', test_features.shape)

weight_for_not_fraud = (1.0 / not_fraud_count) * total_count / 2.0 weight_for_fraud = (1.0 / fraud_count) * total_count / 2.0 class_weight = {0: weight_for_not_fraud, 1: weight_for_fraud} class_weight

# bias fix to speed up training # see https://www.tensorflow.org/tutorials/structured_data/imbalanced_data#optional_set_the_correct_initial_bias output_bias = tf.keras.initializers.Constant(np.log([fraud_count / not_fraud_count])) model = keras.Sequential( [ keras.layers.Dense( 500, activation="relu", input_shape=(train_features.shape[-1],) ), keras.layers.Dense( 500, activation="relu", input_shape=(train_features.shape[-1],) ), keras.layers.Dropout(0.2), keras.layers.Dense(1, activation="sigmoid", bias_initializer=output_bias), ] ) model.compile( optimizer=keras.optimizers.Adam(learning_rate=1e-3), loss=keras.losses.BinaryCrossentropy(), metrics=[ keras.metrics.Precision(name="precision"), keras.metrics.Recall(name="recall"), keras.metrics.AUC(name="auc"), keras.metrics.AUC(name="prc", curve="PR"), ], ) model.summary()

BATCH_SIZE = 64 early_stopping = tf.keras.callbacks.EarlyStopping( monitor="val_prc", verbose=1, patience=10, mode="max", restore_best_weights=True ) training_history = model.fit( train_features, train_labels, batch_size=BATCH_SIZE, epochs=40, callbacks=[early_stopping], validation_data=(val_features, val_labels), class_weight=class_weight, )

res = [] metrics_to_plot = [ ("loss", "Loss"), ("precision", "Precision"), ("recall", "Recall"), ("auc", "Area under ROC curve"), ("prc", "Area under PR curve"), ] fig = make_subplots(rows=len(metrics_to_plot), cols=1) for metric, name in metrics_to_plot: fig = go.Figure( data=[ go.Scatter( x=training_history.epoch, y=training_history.history[metric], mode="lines", name="Training", ), go.Scatter( x=training_history.epoch, y=training_history.history["val_" + metric], mode="lines", line={"dash": "dash"}, name="Validation", ), ] ) fig.update_yaxes(title=name) fig.update_xaxes(title="Epoch") if (metric, name) == metrics_to_plot[0]: fig.update_layout( height=250, title="Training history", margin={"b": 0, "t": 50} ) else: fig.update_layout(height=200, margin={"b": 0, "t": 0}) fig.show()

train_predictions = model.predict(train_features, batch_size=BATCH_SIZE) test_predictions = model.predict(test_features, batch_size=BATCH_SIZE) predictions_df = pd.DataFrame( {"Prediction": train_predictions.ravel(), "Label": train_labels.ravel()} ) predictions_df = pd.concat( [ predictions_df[predictions_df["Label"] == 0].sample(5000, random_state=0), predictions_df[predictions_df["Label"] == 1].sample(500, random_state=0), ] ) fig = px.histogram( predictions_df, x="Prediction", title="Prediction values", color="Label", marginal="box", labels={"0": "Legitimate", "1": "Fraudulent"}, ) fig.update_traces(opacity=0.75) fig.update_layout(barmode="overlay") fig.show()

def make_roc_df(name, predictions, labels): fp, tp, _ = sklearn.metrics.roc_curve(labels, predictions) return pd.DataFrame({"fp": fp * 100, "tp": tp * 100, "Dataset": name}) roc_df = pd.concat( [ make_roc_df("Training", train_predictions, train_labels), make_roc_df("Test", test_predictions, test_labels), ] ) fig = px.line( roc_df, title="ROC Curve", x="fp", y="tp", color="Dataset", labels={"fp": "False Positives (%)", "tp": "True Positives (%)"}, ) fig.update_yaxes(range=[60, 100]) fig.update_traces(line={"dash": "dash"}, selector={"name": "test"}) fig.show()