import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.read_csv('/work/largest_us_retailers.csv')
df
average = round(df['Sales'][1:].mean(), 2)
print(f'The average of sales is: {average} millions dollars')
df = df.sort_values('Sales', ascending=0)
plt.subplots(figsize=(6,8))
xaver = [df['Company'][0], 'Average of sales of other companies']
yaver = [df['Sales'][0], average]
plt.bar(xaver,yaver)
plt.title('Average of sales', fontsize='25', fontweight='bold')
plt.ylabel('Millions of dollars', fontweight='bold')
plt.show
plt.subplots(figsize=(6,10))
df = df.sort_values('Sales', ascending=0)
sns.barplot(x=df['Sales'], y = df['Company'])
#sns.despine(left=True, bottom=True)
plt.title('Money generated by company', fontsize='25', fontweight='bold')
plt.xlabel('Sales', fontweight='bold')
plt.ylabel('Companies', fontweight='bold')
physical = df[(df['Stores'] > 0)]
df = df.sort_values('Stores', ascending=0)
plt.subplots(figsize=(6,10))
sns.barplot(x = physical['Stores'], y = df['Company'])
plt.title('Number of stores by company', fontsize='25',fontweight='bold')
plt.xlabel('number of stores', fontweight='bold')
plt.ylabel('Companies', fontweight='bold')
ax, g = plt.subplots(figsize=(20,9))
physical = physical.sort_values('Stores', ascending=0)
sns.set_theme(style='whitegrid')
sns.barplot(data = physical, x = 'Sales', y = 'Company', label='Sales', color='b')
sns.barplot(data = physical, x = 'Stores', y = 'Company', label ='Number of stores', color='red')
plt.xlabel('Sales', fontweight='bold')
plt.ylabel('Companies', fontweight='bold')
plt.title('Sales vs number of stores', fontweight='bold', fontsize='25')
ax.legend(ncol=2, loc='upper center', frameon =True)
sns.boxplot(data=df, x='Sales', width=0.5, fliersize=10)
sns.despine(offset=20, trim=True)
plt.title('Range of sales', fontsize='25',fontweight='bold')
plt.xlabel('Sales', fontweight='bold')
plt.subplots(figsize=(12,7))
physical = physical.sort_values('Stores', ascending=0)
sns.barplot(data=physical, x='Stores', y=physical['Company'][:5], palette='viridis')
plt.title('Top 5 companies that have more stores', fontsize='25',fontweight='bold')
plt.xlabel('Number of stores', fontweight='bold')
plt.ylabel('Companies', fontweight='bold')
categories = df.groupby('Category').sum()
plt.subplots(figsize=(8,8))
sns.scatterplot(data=categories, x='Sales', y='Category', size='Sales', hue='Category', sizes=(100, 5000), legend=False)
plt.xlabel('Sales', fontweight='bold')
plt.ylabel('Categories of stores', fontweight='bold')
plt.title('Sales by categories', fontsize='25', fontweight='bold')
plt.subplots(figsize=(6,10))
df = df.sort_values('Sales', ascending=0)
sns.barplot(x=df['Sales'], y = df['Company'][:15])
plt.ylabel('Company', fontweight='bold')
plt.xlabel('Sales', fontweight='bold')
plt.title('Sales', fontweight='bold', fontsize='25')