Business Transaction EDA
#Max Alejandro Baldiviezo Morón
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import datetime
Data upload
df = pd.read_csv('/work/Business transaction.csv')
df.describe()
df
3.1 Analyzing distribution of price and transaction value (transaction value = price *quantity): distribution of price should have range from $1.5 to $5,distribution of transaction value has range from $0 to $40
df['TransactionValue'] = df['Price'] * df['Quantity']
df.describe()
This chart is empty
Chart was probably not set up properly in the notebook
plt.figure(figsize = (15,7));
sns.scatterplot(data=df,x='Quantity',y='TransactionValue')
plt.figure(figsize=(15,7));
sns.histplot(df['Price'],stat="percent",binwidth=0.5,binrange = [1.5,5])
plt.title("Distribution of Price")
plt.xlabel('Price')
plt.ylabel('Percent')
dfp = df[df.Price <= 5]
mean = dfp['Price'].mean()
ax = plt.subplot()
ax.axvline(mean,color='red',label='Average price '+ str(round(mean, 2)))
plt.legend();
plt.show()
Distribution of price
plt.figure(figsize=(15,7));
sns.histplot(df['TransactionValue'],stat="percent",binwidth=5,binrange = [0,40])
plt.title("Distribution of Transaction Value")
plt.xlabel('Transaction Value')
plt.ylabel('Percent')
dftv = df[df.TransactionValue <= 40]
mean = dftv['TransactionValue'].mean()
ax = plt.subplot()
ax.axvline(mean,color='red',label='Average transaction value '+ str(round(mean, 2)))
plt.legend();
plt.show()
Distribution of transaction value
3.2 Analyzing revenue by country and average revenue by weekday (revenue = sum up from transaction value)
def percentplot(plot,total):
for p in ax.patches:
percentage = '{:.1f}%'.format(100 * p.get_height()/total)
x = p.get_x() + p.get_width() / 2 - 0.05
y = p.get_y() + p.get_height()
ax.annotate(percentage, (x, y), size = 12)
plt.show()
plt.figure(figsize=(15,7));
dfc = df.groupby(['Country']).sum()
ax = sns.barplot(x= dfc.index,
y= dfc['TransactionValue'])
percentplot(ax,dfc.TransactionValue.sum())
Revenue by country
df['Dayweek'] = df['Date'].apply(lambda x :datetime.datetime.strptime(x, "%m/%d/%Y").strftime("%A"))
plt.figure(figsize=(15,7));
dfdw = df.groupby(['Dayweek']).sum()
ax = sns.barplot(x= dfdw.index,
y= dfdw['TransactionValue'])
percentplot(ax,dfdw.TransactionValue.sum())
revenue by weekday
Analysis of number of clients and transactions by country
plt.figure(figsize=(15,7));
CC = df.groupby('Country')['CustomerNo'].nunique()
ax = sns.barplot(x = CC.index, y=CC.values )
percentplot(ax,CC.values.sum())
plt.figure(figsize=(15,7));
CT = df.groupby('Country')['TransactionNo'].nunique()
ax = sns.barplot(x = CT.index, y=CT.values )
percentplot(ax,CT.values.sum())
Revenue analysis per month
df['Month'] = df['Date'].apply(lambda x :datetime.datetime.strptime(x, "%m/%d/%Y").strftime("%B"))
plt.figure(figsize=(15,7));
dfm = df.groupby(['Month']).sum()
ax = sns.barplot(x= dfm.index,
y= dfm['TransactionValue'])
percentplot(ax,dfm.TransactionValue.sum())