import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
pd.set_option('precision',2)
data['Credit Card']
data = pd.read_csv('Ecommerce Purchases.csv')
data.head()
data.info()
data['AM or PM'].value_counts()
display(data.groupby('AM or PM').agg({'Purchase Price':'mean'}).\
rename(columns = {'Purchase Price':'Avg. Spent'}),
data.groupby('AM or PM').agg({'Purchase Price':'sum'}).\
rename(columns = {'Purchase Price':'Tot. Spent'})
)
data['Browser Info'].value_counts()
data['Browser Info'].str.split('/').str.get(0).value_counts(normalize = True)
data['Company'].value_counts().head(7)
data.groupby('Company').agg({'Purchase Price':'sum'}).\
sort_values('Purchase Price', ascending = False).\
rename(columns = {'Purchase Price':'Revenue'}).head(7)
display(data['CC Provider'].value_counts(),
data.groupby('CC Provider').agg({'Purchase Price':'sum'}).\
sort_values('Purchase Price',ascending =False).\
rename(columns = {'Purchase Price':'Tot. Spent'}),
data.groupby('CC Provider').agg({'Purchase Price':'mean'}).\
sort_values('Purchase Price',ascending =False).\
rename(columns = {'Purchase Price':'Avg. Spent'})
)
data['Job'].value_counts().head(10)
print('The top 10 most common jobs within the customers are:')
for job in data['Job'].value_counts().head(10).index:
print(job)
job_top_10_total_rv = data.groupby('Job').agg({'Purchase Price':'sum'}).\
sort_values('Purchase Price', ascending = False).head(10).\
rename(columns ={'Purchase Price':' Total Rev.'})
job_top_10_avg_rv = data.groupby('Job').agg({'Purchase Price':'mean'}).\
sort_values('Purchase Price', ascending = False).head(10).\
rename(columns = {'Purchase Price':'Average Rev.'})
display(job_top_10_total_rv, job_top_10_avg_rv)
plt.figure(figsize = (18,6), facecolor = 'lightblue')
plt.suptitle('Purcahse Price Distribution')
plt.subplot(1,2,1)
sns.histplot(x = data['Purchase Price'], bins = 30)
plt.title('Purchase Price Histogram')
plt.subplot(1,2,2)
sns.boxplot(x = data['Purchase Price'])
plt.title('Purchase Price Box Plot')
plt.show()
print('Maximum Purchase Price')
print(data['Purchase Price'].max())
print('Minimum Purchase Price')
print(data['Purchase Price'].min())
print('Average Purchase Price')
print(data['Purchase Price'].mean())
data.loc[data ['Purchase Price'] == data['Purchase Price'].min()]
((data['Language']=='en') & (data['Job'] == 'Social researcher')).sum()
data.loc[(data['Job'] == 'Investment analyst') & (data['Language'] == 'en'), ['Email']]
data.loc[data['IP Address'] == '156.210.0.254', ['Email']]
len(data.loc[(data['CC Provider'] == 'VISA 16 digit') & (data['Purchase Price'] > 70)])
data.loc[data['Credit Card'].isin([6011578504430710,6011456623207998,180003348082930]), ['Email']]
print('Number of customer with cardit cards expiring in 2022')
len(data.loc[data['CC Exp Date'].str.split('/').str.get(1) == '22'])
print('The most popualr email provider for the customer is')
data['Email'].str.split('@').str.get(1).str.split('.').str.get(0).value_counts().head(1)