Financial Inclusion in Africa - Exploratory Data Analysis

# import important modules import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns plt.rcParams["axes.labelsize"] = 18 import warnings warnings.filterwarnings('ignore') %matplotlib inline

# Import data data = pd.read_csv('/work/data/data.csv')

# print shape print('train data shape :', data.shape)

# Inspect Data by shing the first five rows data.head()

#show list of columns list(data.columns)

## show Some information about the dataset print(data.info())

# Check for missing values print('missing values:', data.isnull().sum())

# Frequency table of a variable will give us the count of each category in that Target variable. data['bank_account'].value_counts()

# Explore Target distribution sns.catplot(x="bank_account", kind="count", data= data)

# Explore Country distribution sns.catplot(x="country", kind="count", data=data)

# Explore Location distribution sns.catplot(x="location_type", kind="count", data=data)

# Explore Years distribution sns.catplot(x="year", kind="count", data=data)

# Explore cellphone_access distribution sns.catplot(x="cellphone_access", kind="count", data=data)

# Explore gender_of_respondents distribution sns.catplot(x="gender_of_respondent", kind="count", data=data)

# Explore relationship_with_head distribution sns.catplot(x="relationship_with_head", kind="count", data=data); plt.xticks( rotation=45, horizontalalignment='right', fontweight='light', fontsize='x-large' )

# Explore marital_status distribution sns.catplot(x="marital_status", kind="count", data=data); plt.xticks( rotation=45, horizontalalignment='right', fontweight='light', fontsize='x-large' )

# Explore education_level distribution sns.catplot(x="education_level", kind="count", data=data); plt.xticks( rotation=45, horizontalalignment='right', fontweight='light', fontsize='x-large' )

# Explore job_type distribution sns.catplot(x="job_type", kind="count", data=data); plt.xticks( rotation=45, horizontalalignment='right', fontweight='light', fontsize='x-large' )

# Explore household_size distribution plt.figure(figsize=(16, 6)) data.household_size.hist() plt.xlabel('Household size')

# Explore age_of_respondent distribution plt.figure(figsize=(16, 6)) data.age_of_respondent.hist() plt.xlabel('Age of Respondent')

#Explore location type vs bank account plt.figure(figsize=(16, 6)) sns.countplot(x='location_type', hue= 'bank_account', data=data) plt.xticks( fontweight='light', fontsize='x-large' )

#Explore gender_of_respondent vs bank account plt.figure(figsize=(16, 6)) sns.countplot(x='gender_of_respondent', hue= 'bank_account', data=data) plt.xticks( fontweight='light', fontsize='x-large' )

#Explore cellphone_accesst vs bank account plt.figure(figsize=(16, 6)) sns.countplot(x='cellphone_access', hue= 'bank_account', data=data) plt.xticks( fontweight='light', fontsize='x-large' )

#Explore relationship_with_head vs bank account plt.figure(figsize=(16, 6)) sns.countplot(x='relationship_with_head', hue= 'bank_account', data=data) plt.xticks( rotation=45, horizontalalignment='right', fontweight='light', fontsize='x-large' )

#Explore 'marital_status vs bank account plt.figure(figsize=(16, 6)) sns.countplot(x='marital_status', hue= 'bank_account', data=data) plt.xticks( rotation=45, horizontalalignment='right', fontweight='light', fontsize='x-large' )

#Explore 'education_level vs bank account plt.figure(figsize=(16, 6)) sns.countplot(x='education_level', hue= 'bank_account', data=data) plt.xticks( rotation=45, horizontalalignment='right', fontweight='light', fontsize='x-large' )

#Explore job_type vs bank account plt.figure(figsize=(16, 6)) sns.countplot(x='job_type', hue= 'bank_account', data=data) plt.xticks( rotation=45, horizontalalignment='right', fontweight='light', fontsize='x-large' )