Análisis de los 25 retailers más grandes de Estados Unidos
retail_df = pd.read_csv('largest_us_retailers.csv')
retail_df.loc[6,'Category']='Online'
retail_df.loc[11,'Category']='Online'
retail_df.loc[10,'Category']='Warehouse Club'
retail_df = retail_df.rename(columns={'Store Count Growth': 'Store_Growth'})
retail_df = retail_df.fillna(0)
retail_df
I. Preguntas del negocio
retail_sales_mean_df = retail_df[['Company', 'Sales']].sort_values('Sales', ascending=0).tail(retail_df.shape[0]-1)
retail_sales_mean_df.style.format({"Sales": "{:,d}"})
retail_sales_mean_without_walmart = retail_sales_mean_df['Sales'].mean()
sales_mean_without_walmart = "{:,.2f}".format(retail_sales_mean_without_walmart)
print(f'El promedio de ventas sin contar con Walmart es de {sales_mean_without_walmart} millones de dólares')
sales_values = retail_sales_mean_df['Sales']
sales_labels = retail_sales_mean_df['Company']
plt.bar(sales_labels,sales_values, color='y')
plt.xticks(rotation=90)
plt.title('Retailers Sales without Walmart')
plt.ylabel('Sales in Millons')
plt.xlabel('Companies')
plt.axhline(y=retail_sales_mean_without_walmart, color='blue')
plt.show()
sns.displot(retail_df, x='Sales', hue='Company', multiple='stack')
retail_with_stores_df = retail_df[retail_df['Stores'].notnull()]
retail_with_stores_df.style.format({"Sales": "{:,d}"})
sns.displot(retail_with_stores_df, x='Stores', hue='Company', multiple='stack')
sns.jointplot(data=retail_with_stores_df, x='Sales', y='Stores', hue='Category', xlim=(0, 700000))
retail_top_stores_df = retail_with_stores_df[['Company', 'Stores', 'Sales']].sort_values('Stores', ascending=0).head(10).reset_index().drop(['index'], axis=1)
retail_top_stores_df.style.format({"Sales": "{:,d}","Stores": "{:,.0f}"}).hide_index()
#sns.set(rc={'figure.figsize':(11.7,8.27)})
f, (ax_hist, ax_box) = plt.subplots(2, sharex=True, gridspec_kw={"height_ratios": (.6, .4)})
sns.histplot(retail_df['Sales'], ax=ax_hist, color='g')
sns.boxplot(retail_df['Sales'], ax=ax_box, color='y')
retail_top_5_stores = retail_with_stores_df[['Company', 'Stores', 'Sales']].sort_values('Stores', ascending=0).head(5).reset_index().drop(['index'], axis=1)
retail_top_5_stores.style.format({"Sales": "{:,d}","Stores": "{:,.0f}"}).hide_index()
retail_top_5_sales = retail_df[['Company', 'Stores', 'Sales']].sort_values('Sales', ascending=0).head(5).reset_index().drop(['index'], axis=1)
retail_top_5_sales.style.format({"Sales": "{:,d}","Stores": "{:,.0f}"}).hide_index()
pd.merge(retail_top_5_stores, retail_top_5_sales)
retail_top_5_store_performace = retail_df[['Company', 'Sales/Avg. Store', 'Stores', 'Category']].sort_values(by='Sales/Avg. Store', ascending=False).head(5)
retail_top_5_store_performace.style.format({"Sales/Avg. Store": "{:,.2f}","Stores": "{:,.0f}"}).hide_index()
retail_df.set_index(['Category', 'Company']).loc['Online']
fig, ax = plt.subplots(figsize=(16, 9))
ax.barh(sales_category_labels, sales_category_values, color='g')
plt.xlabel("Sales")
plt.ylabel("Category")
plt.title('Sales x Category')
for i in ax.patches:
plt.text(i.get_width() + 0.2, i.get_y() + 0.4,
str('{:,.0f}'.format(i.get_width())),
fontsize=10, fontweight='bold',
color='grey')
fig.text(0.43, 0.66, '<==== Apple and Amazon', fontsize=12,
color='red', ha='right', va='bottom', fontweight='bold',
alpha=0.7)
plt.show()
retail_sales_top_df = retail_df[['Company', 'Sales']].sort_values('Sales', ascending=0).head(10)
retail_sales_top_df.style.format({"Sales": "{:,d}"})
sales_values = retail_sales_top_df['Sales']
sales_labels = retail_sales_top_df['Company']
plt.bar(sales_labels,sales_values, color='y')
plt.xticks(rotation=90)
plt.title('Top 10 US Retailers Sales')
plt.ylabel('Sales in Millons')
plt.xlabel('Companies')
plt.show()
print(retail_sales_top_df.iloc[0,0]+' es la compañía dominante con '+str(retail_sales_top_df.iloc[0,1])+' millones de dólares en ventas')
retail_store_growth_df = retail_df.query('Store_Growth>0').sort_values('Store_Growth', ascending=0)
retail_store_growth_df.head(5).style.format({"Sales": "{:,d}","Stores": "{:,.0f}", "Store_Growth": "{:.2%}"}).hide_index()
print('La compañía {} es la que ha tenido un mayor incremento en tiendas, creció un {:.2%} en tiendas'.format(retail_store_growth_df.iloc[0,0], retail_store_growth_df.iloc[0,4]))
retail_category_stores_df = retail_df.groupby('Category')['Stores'].aggregate('sum').reset_index()
retail_category_stores_df.plot.bar(x='Category', y='Stores')
retail_category_stores_df.sort_values('Stores', ascending=0).head(5).style.format({"Stores": "{:,.0f}"}).hide_index()