import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.read_csv('/work/largest_us_retailers.csv')
df
Companyobject
Walmart US4%
Kroger4%
23 others92%
Salesint64
16592 - 658119
10
Sam's Club
56828
11
Apple incl. Online
37664
12
Best Buy
34980
13
Publix
34408
14
Rite Aid
27486
15
Ahold
26903
16
Macy's
26028
17
TJX
25012
18
Aldi
24402
19
Dollar General
22234
average = round(df['Sales'][1:].mean(), 2)
print(f'The average of sales is: {average} millions dollars')
df = df.sort_values('Sales', ascending=0)
plt.subplots(figsize=(6,8))
xaver = [df['Company'][0], 'Average of sales of other companies']
yaver = [df['Sales'][0], average]
plt.bar(xaver,yaver)
plt.title('Average of sales', fontsize='25', fontweight='bold')
plt.ylabel('Millions of dollars', fontweight='bold')
plt.show
The average of sales is: 46602.42 millions dollars
plt.subplots(figsize=(6,10))
df = df.sort_values('Sales', ascending=0)
sns.barplot(x=df['Sales'], y = df['Company'])
#sns.despine(left=True, bottom=True)
plt.title('Money generated by company', fontsize='25', fontweight='bold')
plt.xlabel('Sales', fontweight='bold')
plt.ylabel('Companies', fontweight='bold')
physical = df[(df['Stores'] > 0)]
df = df.sort_values('Stores', ascending=0)
plt.subplots(figsize=(6,10))
sns.barplot(x = physical['Stores'], y = df['Company'])
plt.title('Number of stores by company', fontsize='25',fontweight='bold')
plt.xlabel('number of stores', fontweight='bold')
plt.ylabel('Companies', fontweight='bold')
ax, g = plt.subplots(figsize=(20,9))
physical = physical.sort_values('Stores', ascending=0)
sns.set_theme(style='whitegrid')
sns.barplot(data = physical, x = 'Sales', y = 'Company', label='Sales', color='b')
sns.barplot(data = physical, x = 'Stores', y = 'Company', label ='Number of stores', color='red')
plt.xlabel('Sales', fontweight='bold')
plt.ylabel('Companies', fontweight='bold')
plt.title('Sales vs number of stores', fontweight='bold', fontsize='25')
ax.legend(ncol=2, loc='upper center', frameon =True)
sns.boxplot(data=df, x='Sales', width=0.5, fliersize=10)
sns.despine(offset=20, trim=True)
plt.title('Range of sales', fontsize='25',fontweight='bold')
plt.xlabel('Sales', fontweight='bold')
plt.subplots(figsize=(12,7))
physical = physical.sort_values('Stores', ascending=0)
sns.barplot(data=physical, x='Stores', y=physical['Company'][:5], palette='viridis')
plt.title('Top 5 companies that have more stores', fontsize='25',fontweight='bold')
plt.xlabel('Number of stores', fontweight='bold')
plt.ylabel('Companies', fontweight='bold')
categories = df.groupby('Category').sum()
plt.subplots(figsize=(8,8))
sns.scatterplot(data=categories, x='Sales', y='Category', size='Sales', hue='Category', sizes=(100, 5000), legend=False)
plt.xlabel('Sales', fontweight='bold')
plt.ylabel('Categories of stores', fontweight='bold')
plt.title('Sales by categories', fontsize='25', fontweight='bold')
plt.subplots(figsize=(6,10))
df = df.sort_values('Sales', ascending=0)
sns.barplot(x=df['Sales'], y = df['Company'][:15])
plt.ylabel('Company', fontweight='bold')
plt.xlabel('Sales', fontweight='bold')
plt.title('Sales', fontweight='bold', fontsize='25')