Análisis de los 25 retailers más grandes de Estados Unidos
# Importar librerías aquí
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from colors import *
# Importar datos aquí
df = pd.read_csv('largest_us_retailers.csv')
df
I. Preguntas del negocio
#Ordeno la lista para detectar el que más vendió
df_sorted = df.sort_values('Sales',ascending=0)
df_mean_sales = df_sorted[1:]['Sales'].mean()
print(FG_BLUE)
print('Promedio de ventas sin Walmart: %i USD'%int(df_mean_sales) )
minval = df_sorted.quantile(0)
Q1 = df_sorted.quantile(0.25)
median = Q1 = df_sorted.quantile(0.50)
Q3 = df_sorted.quantile(0.75)
maxval = df_sorted.quantile(1)
print(minval,Q1,median,Q3, maxval)
from colors import *
walmart = df_sorted.iloc[0]['Sales'] #ventas de walmart
all_no_walmart = df_sorted[1:]['Sales'].sum() #ventas de todas menos walmart
all_companies = all_no_walmart + walmart #ventas totales
print(FG_BLUE)
print('Ventas de Walmart: %i USD'%int(walmart) )
print(FG_RED)
print('Ventas de todas las compañias (menos Walmart): %i USD'%int(all_no_walmart))
print(FG_GREEN)
print('Ventas de todas las compañia: %i USD'%int(all_companies))
Ventas de todas las compañias menos Walmart
companies = df_sorted['Company'][1:][::-1]
sales = df_sorted['Sales'][1:][::-1]
fig = plt.figure(figsize = (10,10))
ax = fig.add_subplot()
plt.stem(sales, orientation='horizontal',linefmt='k',markerfmt='C1o')
ax.set_yticks([i for i in range(len(companies))])
ax.set_yticklabels(companies)
plt.ylabel('Companies')
plt.xlabel('Sales (USD)')
plt.title('Company sales')
plt.show()
#Sin Walmart
fig = plt.figure(figsize=(10,10))
plt.grid(axis='both',zorder=0)
sns.histplot(df_sorted[1:], x = 'Sales', y='Company',hue='Company',zorder=10)
plt.legend('off')
plt.xlabel('Sales (USD)')
Las ventas de Walmart es equivalente a las ventas de las siguientes 15 compañías.
#Comparativo
acum = df_sorted[1:-16]['Sales'].sum() #Suman lo mismo que walmart
walmart = df_sorted.iloc[0]['Sales']
resto = df_sorted[16:]['Sales'].sum()
comp = [walmart,acum,resto]
plt.bar(['Walmart','Top 15 Companies','Last Companies'],comp,color=['lightskyblue','crimson','yellowgreen'])
plt.ylabel('Sales (USD)')
plt.title('Company sales')
df_sorted
Walmart no es la compañía con más tiendas
df_store_sorted = df_sorted.sort_values('Stores')
df_store_sorted = df_store_sorted[df_store_sorted['Stores'].notna()]
companies =df_store_sorted['Company']
stores = df_store_sorted['Stores']
fig = plt.figure(figsize = (10,10))
ax = fig.add_subplot()
plt.stem(stores, orientation='horizontal',linefmt='k',markerfmt='C2o')
ax.set_yticks([i for i in range(len(companies))])
ax.set_yticklabels(companies)
plt.ylabel('Companies')
plt.xlabel('Stores')
plt.grid(axis='y')
y_range = np.arange(1, len(df_store_sorted.index) + 1)
for (_, row), y in zip(df_store_sorted.iterrows(), y_range):
plt.annotate(int(row['Stores']), (row["Stores"]+300, y-1.1))
plt.xlim(-20, max(stores)+ 2000)
plt.title('Company stores')
plt.show()
Ventas vs Tiendas(incluyendo Walmart)
#Las compañias que más venden
fig = plt.figure(figsize=(7,7))
ax = fig.add_subplot()
jp = sns.scatterplot(data=df_sorted,x='Stores',
y ='Sales',hue='Company',s=100)
plt.tight_layout()
#plt.legend('off')
plt.legend(loc='upper right', title='Companies',bbox_to_anchor=(1.4, 1))
"""
Walmart is the top one in sales but doesn't have too much stores as others
"""
#Las compañias que más venden
fig = plt.figure(figsize=(7,7))
ax = fig.add_subplot()
jp = sns.regplot(data=df_sorted[1:],x='Stores',
y ='Sales')
plt.tight_layout()
#plt.legend('off')
plt.legend(loc='upper right', title='Companies',bbox_to_anchor=(1.4, 1))
#Las compañias que más venden
fig = plt.figure(figsize=(7,7))
ax = fig.add_subplot()
legends= []
for category in set(df_sorted[1:]['Category']):
df_category = df_sorted[1:][df_sorted['Category']==category]
if len(df_category)>2:
legends.append(category)
jp = sns.regplot(data=df_category,x='Stores',
y ='Sales',label=category)
plt.legend()
plt.tight_layout()
#plt.legend('off')
plt.show()
Si se observa una correlación entre las tiendas físicas y las ventas entre compañías de la misma categoría.
companies = df_sorted['Company']
sales = df_sorted['Sales']
categories = set(df_sorted['Category'][df_sorted['Category'].notna()])
rango = sales.iloc[0]-sales.iloc[-1]
print(FG_BLUE)
print('El rango de ventas de todas las compañias: %i USD'%int(rango))
rango = sales.iloc[1]-sales.iloc[-1]
print('El rango para todas las compañias (sin Walmart) is: %i USD'%int(rango))
print('--------------------------------')
print(' Rango por categoría ')
print('--------------------------------')
for category in categories:
df_category = df_sorted[df_sorted['Category']==category]
if len(df_category)>1:
print(FG_BLUE)
rango = df_category['Sales'].iloc[0] - df_category['Sales'].iloc[-1]
print('El rango por categoría %s is %i USD'%(category,int(rango)))
else:
print(FG_RED)
print('La categoría %s tiene sólo una compañia'%category)
df_top_stores = df_sorted.sort_values('Stores')[df_sorted['Stores'].notna()][::-1][:5]
df_top_stores
df_top_sales = df_sorted[:5]
int_df = pd.merge(df_top_stores, df_top_sales, how ='inner', on =['Company', 'Sales','Stores'])
int_df
df_sorted.groupby('Category').sum().sort_values('Sales',ascending=False).index[0]
df_sales = df_sorted['Sales'][:]
df_stores = df_sorted['Stores'][:]
df_sales_by_store = df_sales / df_stores
df_sorted['Sales by store'] = df_sales_by_store
df_sorted.sort_values('Sales by store', ascending = False)
sales_by_store = df_sorted.sort_values('Sales by store')[df_sorted['Sales by store'].notna()]['Sales by store']
companies = df_sorted.sort_values('Sales by store')[df_sorted['Sales by store'].notna()]['Company']
df_sorted_salesbystore = df_sorted.sort_values('Sales by store')[df_sorted['Sales by store'].notna()]
fig = plt.figure(figsize = (10,10))
ax = fig.add_subplot()
plt.stem(sales_by_store, orientation='horizontal',linefmt='k',markerfmt='C2o')
ax.set_yticks([i for i in range(len(companies))])
ax.set_yticklabels(companies)
plt.ylabel('Companies')
plt.xlabel('Sales/Stores')
plt.grid(axis='y')
y_range = np.arange(1, len(df_sorted_salesbystore.index) + 1)
for (_, row), y in zip(df_sorted_salesbystore.iterrows(), y_range):
plt.annotate(int(row['Sales by store']), (row["Sales by store"]+10, y-1.1))
plt.xlim(-20, max(sales_by_store)+ 100)
plt.title('Company ratio sales/stores')
plt.show()
no_store_companies = df_sorted[df_sorted['Stores'].isna()]['Company']
no_store_companies
for comp in no_store_companies:
print("La compañia %s esta en la posición n°%i en ventas"%( comp ,df_sorted.index[df_sorted['Company']==comp][0]))
df_category_means = df_sorted[df_sorted['Stores'].notna()].groupby('Category').mean().sort_values('Stores', ascending = False)
df_category_means
stores_by_category = df_category_means['Stores'][::-1]
category = df_category_means.index[::-1]
fig = plt.figure(figsize = (10,10))
ax = fig.add_subplot()
plt.stem(stores_by_category, orientation='horizontal',linefmt='k',markerfmt='C3o')
ax.set_yticks([i for i in range(len(category))])
ax.set_yticklabels(category)
plt.ylabel('Category')
plt.xlabel('Av.Stores')
plt.grid(axis='y')
y_range = np.arange(1, len(category) + 1)
for stor, y in zip(stores_by_category, y_range):
plt.annotate(int(stor), (stor+300, y-1.1))
plt.xlim(-20, max(stores_by_category)+ 2000)
plt.title('Av.Stores by category')
plt.show()
df_company_growth = df_sorted.sort_values('Store Count Growth', ascending=False)[df_sorted['Store Count Growth'].notna()]
df_company_growth.iloc[0]
store_growth = df_company_growth['Store Count Growth'][::-1]*100
company = df_company_growth['Company'][::-1]
fig = plt.figure(figsize = (10,10))
ax = fig.add_subplot()
plt.stem(store_growth, orientation='horizontal',linefmt='k',markerfmt='C3o')
ax.set_yticks([i for i in range(len(company))])
ax.set_yticklabels(company)
plt.ylabel('Company')
plt.xlabel('Store Growth (%)')
plt.grid(axis='y')
y_range = np.arange(1, len(company) + 1)
for stor, y in zip(store_growth, y_range):
x = stor +2 if stor >0 else stor-8
plt.annotate("{:10.2f}".format(stor), (x, y-1.1))
plt.xlim(-35, 35)
plt.title('Store growth by company')
plt.show()
growth_by_sales = store_growth/sales
df_company_growth['Store Growth by Sales'] = growth_by_sales
df_company_growth = df_company_growth.sort_values('Store Growth by Sales',ascending=False)
sales = df_company_growth['Sales'][::1]
growth_by_sales = df_company_growth['Store Growth by Sales'][::-1]
company = df_company_growth['Company'][::-1]
fig = plt.figure(figsize = (10,10))
ax = fig.add_subplot()
plt.stem(growth_by_sales, orientation='horizontal',linefmt='k',markerfmt='C3o')
ax.set_yticks([i for i in range(len(company))])
ax.set_yticklabels(company)
plt.ylabel('Company')
plt.xlabel('Store Growth by Sales (%)')
plt.grid(axis='y')
y_range = np.arange(1, len(company) + 1)
for stor, y in zip(growth_by_sales, y_range):
x = stor +0.0001 if stor >0 else stor-0.0002
plt.annotate("{:.2e}".format(stor), (x, y-1.1))
plt.xlim(-0.001,0.001)
plt.title('Store growth by Sales company')
plt.show()