Python - Travel Insurance Marketing Analysis

import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns

df = pd.read_csv('travel_insurance.csv') df.head()

df.info()

df_dummy = pd.get_dummies(df, drop_first=True) # Rename columns for readability; attempted to keep the same naming convention used df_dummy.columns = ['Age', 'AnnualIncome(k$)', 'FamilyMembers', 'HealthIssues', 'Insured', 'PrivateSector', 'Graduate', 'FrequentFlyer', 'TraveledAbroad'] # Converting AnnualIncome to be in k$ df_dummy['AnnualIncome(k$)'] = df_dummy['AnnualIncome(k$)'] / 1000

# Data after column rename df_dummy.describe().round(3)

cols = ['HealthIssues', 'PrivateSector', 'Graduate', 'FrequentFlyer', 'TraveledAbroad'] for i in cols: a = df_dummy[df_dummy[i] == 1] b = a.Insured.value_counts(normalize=True).round(3) print('{} customer rate: {}%'.format(i, b[1] * 100))

sns.heatmap(df_dummy.corr(), annot=True) plt.title('Correlation Heatmap - All') plt.tight_layout()

cust = df_dummy[df_dummy['Insured'] == 1].drop(columns='Insured') non_cust = df_dummy[df_dummy['Insured'] == 0].drop(columns='Insured')

fig, ax = plt.subplots(1, 2, figsize=(11,4), sharey=True) fig.suptitle('Ages of Non-customers vs. Customers') sns.countplot(x='Age', data=non_cust, ax=ax[0]) ax[0].set(xlabel='Age - Non-Customer') sns.countplot(x='Age', data=cust, ax=ax[1]) ax[1].set(xlabel='Age - Customer') plt.tight_layout()

twenty_8 = df_dummy[df_dummy['Age'] == 28].drop(columns='Age') twenty_8 = (twenty_8.Insured.value_counts(normalize=True) * 100).round(2) print('{}% of 28 year old travelers are not customers'.format(twenty_8[0]))

non_cust_28 = non_cust[non_cust['Age'] == 28].drop(columns='Age') non_cust_28.describe().round(3)

plt.hist('AnnualIncome(k$)', data=cust, label='Customer', alpha=0.4) plt.hist('AnnualIncome(k$)', data=non_cust, label='Non-customer', alpha=0.4) plt.title('Annual Income by Customer') plt.xlabel('Annual Income (Thousands of $)') plt.ylabel('Count') plt.legend() plt.tight_layout()

over_130k = df_dummy[df_dummy['AnnualIncome(k$)'] > 1300] cust_over = (over_130k.Insured.value_counts(normalize=True) * 100).round(2) under_130k = df_dummy[df_dummy['AnnualIncome(k$)'] < 1300] cust_under = (under_130k.Insured.value_counts(normalize=True) * 100).round(2) print('{}% of travelers that earn over $1.3million annually are customers'.format(cust_over[1])) print('{}% of travelers that earn under $1.3million annually are not customers'.format(cust_under[0]))

# Annual income by insured status scatterplot ax = sns.scatterplot(x='Age', y='AnnualIncome(k$)', hue='Insured', data=df_dummy, alpha=0.4) plt.title('Age vs Income by Insured Status') plt.ylabel('Annual Income (Thousands of $)') # sns.move_legend(ax, "upper left", bbox_to_anchor=(1, 1)) plt.tight_layout()

sns.catplot(x='TraveledAbroad', y='AnnualIncome(k$)', data=df_dummy, hue='Insured', alpha=0.4) plt.xticks([0, 1], ['No', 'Yes']) plt.suptitle('Income distribution by Travel and Insurance') plt.tight_layout()

sns.catplot(x='FrequentFlyer', y='AnnualIncome(k$)', data=df_dummy, hue='Insured', alpha=0.4) plt.xticks([0, 1], ['No', 'Yes']) plt.suptitle('Income distribution by Frequent flyers and Insurance') plt.tight_layout()

sns.catplot(x='PrivateSector', y='AnnualIncome(k$)', data=df_dummy, hue='Insured', alpha=0.3) plt.xticks([0, 1], ['No', 'Yes']) plt.suptitle('Income distribution by Private Sector and Insurance') plt.tight_layout()

sns.countplot(x='PrivateSector', data=df_dummy, hue='Insured', alpha=0.4) plt.xticks([0, 1], ['No', 'Yes']) plt.suptitle('Income distribution by Private Sector and Insurance') plt.tight_layout()

gov_sec = df_dummy[(df_dummy['PrivateSector'] == 0)] gov_sec = 1 - (gov_sec.Insured.value_counts(normalize=True)).round(3) print('{}% of Government sector are not customers'.format((gov_sec[1] * 100))) priv_sec = df_dummy[(df_dummy['PrivateSector'] == 1)] priv_sec = 1 - (priv_sec.Insured.value_counts(normalize=True)).round(3) print('{}% of Private sector are not customers'.format(priv_sec[1] * 100))

cust.describe().round(3)

non_cust.describe().round(3)