import pandas as pd #for data manipulations import numpy as np # for computations import seaborn as sns # for visualization import matplotlib.pyplot as plt # for visualizations import warnings warnings.filterwarnings('ignore') %matplotlib inline pd.set_option("display.max_colwidth", 50)

Loading data

data = pd.read_csv("data/df.csv")

# Let's preview the datasets data.head()

As we can observe that our data frame has proper standard names and we can understand what they do mean no need of renaming our columns

# let's see the shape of datasets data.shape

The dataset contain 23524 records and 13 columns

# To understand what each column , lets use variable definition file var_def = pd.read_csv("data/VariableDefinitions.csv") var_def.T

`var_def.T` gives records in a transpose manner, which is the best to see each record in small space rather than occupying large space

# let's see information of our dataset data.info()

# Let's check for missing data data.isnull().sum().any()

Univariate Analysis

# Let's start explore age distribution using histogram plt.figure(figsize=(14,7)) data.age_of_respondent.hist() plt.title("Age of Respondent Distribution") plt.xlabel('age_of_respondent') plt.show()

# Let's start explore household_size distribution using histogram plt.figure(figsize=(14,7)) data.household_size.hist() plt.title("Household Size Distribution") plt.xlabel('household_size') plt.show()

# Let's explore the distribution of our target column using count sns.catplot(x="bank_account", kind="count", data=data) plt.title("Banck Account Distribution")

# Let's explore the distribution of Gender of respondents using count sns.catplot(x="gender_of_respondent", kind="count", data=data) plt.title("Gender of Respondent Distribution")

# Let's explore the distribution of country of respondents using count sns.catplot(x="country", kind="count", data=data) plt.title("Country of Respondent Distribution")

# Let's explore the distribution of Year of respondents using count sns.catplot(x="year", kind="count", data=data) plt.title("Year Distribution")

# Let's explore the distribution of location_type of respondents using count sns.catplot(x="location_type", kind="count", data=data) plt.title("Location Distribution")

# Let's explore the distribution of cellphone_access using count sns.catplot(x="cellphone_access", kind="count", data=data) plt.title("Cellphone Acess Distribution")

# Let's explore the distribution of Education level using count sns.catplot(x="education_level", kind="count", data=data) plt.title("Education Level Distribution") plt.xticks( rotation=45, horizontalalignment='right', fontweight='light', fontsize='x-large' )

# Let's explore the distribution of Job Type using count sns.catplot(x="job_type", kind="count", data=data) plt.title("Job Type Distribution") plt.xticks( rotation=45, horizontalalignment='right', fontweight='light', fontsize='x-large' )

# Let's explore the distribution of marital_status using count sns.catplot(x="marital_status", kind="count", data=data) plt.title("Marital Status Distribution") plt.xticks( rotation=45, horizontalalignment='right', fontweight='light', fontsize='x-large' )

.css-15w88e5{color:var(--chakra-colors-fg-neutral-primary);font-weight:inherit;letter-spacing:-0.09px;}Loading data

Univariate Analysis

Loading data