Análisis de los 25 retailers más grandes de Estados Unidos
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.read_csv('/work/largest_us_retailers.csv')
df_sales = df.sort_values('Sales', ascending=0)
df_sales.head()
I. Preguntas del negocio
#Promedio sin la compañía dominante
avg = df_sales['Sales'][1:].mean()
print(avg)
#Promedio con la compañia dominante
df['Sales'].mean()
x = ['Con Dominante', 'Sin Dominante']
y = [71063, 46602]
plt.bar(x, y)
plt.show()
x = ['Más Alto', 'Promedio', 'Más Bajo']
y = [115000, 46602, 16572]
plt.bar(x, y)
plt.style.use('ggplot')
x = df_sales['Sales'][1:]
plt.hist(x)
plt.style.use('ggplot')
df_stores = df.sort_values('Stores', ascending=True)
x = df_stores['Stores']
plt.hist(x)
plt.style.use('ggplot')
bins = [i for i in range(0, 2001, 100)]
plt.hist(x, bins)
plt.style.use('ggplot')
bins = [i for i in range(2000, 4001, 100)]
plt.hist(x, bins)
plt.style.use('ggplot')
plt.scatter(df_sales['Stores'], df_sales['Sales'])
plt.title('Sales & Stores')
plt.xlabel('Stores')
plt.ylabel('Sales')
plt.show()
max_sales = df_sales['Sales'][1:].max()
min_sales = df_sales['Sales'].min()
print(max_sales)
print(min_sales)
x = ['Max', 'Min']
y = [max_sales, min_sales]
plt.bar(x, y)
#Top 5 empresas que más tiendas físicas tienen
df_stores = df.sort_values('Stores', ascending=False)
x = df_stores['Company'][:5]
y = df_stores['Stores'][:5]
print(df_stores['Company'][:5])
plt.bar(x, y)
plt.xticks(rotation=45)
plt.show()
#Top 5 empresas que más ventas tienen
x = df_sales['Company'][:5]
y = df_sales['Sales'][:5]
print(df_sales['Company'][:5])
plt.bar(x,y)
plt.xticks(rotation=10)
plt.show()
category = df.groupby('Category')['Sales'].sum()
print(category.sort_values(ascending=False))
x = ['Supercenters', 'Grocery', 'Drug Stores', 'Home Improvment', 'Warehouse Club', 'Electronic/Mail Order', 'Warehouse Clubs', 'Department Stores', 'Dollar Stores', 'Electronics', 'Apparel']
y = category.sort_values(ascending=False)
plt.bar(x, y)
plt.xticks(rotation=90)
plt.show()
df_sales_avg = df.sort_values('Sales/Avg. Store', ascending=False)
df_sales_avg.head()
x = df_sales_avg['Company'][:15]
y = df_sales_avg['Sales/Avg. Store'][:15]
plt.bar(x, y)
plt.xticks(rotation = 90)
plt.show()
df[df.Category == 'Electronic/Mail Order']
df.sort_values('Company', ascending=True)
df_company = df.sort_values('Company', ascending=True)
df_company = df_company.drop(6)
df_company = df_company.drop(11)
df_company = df_company.drop(0)
df_company['Sales'].mean()
# Promedio de ventas de compañias con solo tiendas físicas
x = ['Amazon', 'Apple incl. Online', 'Average']
y = [71687, 37664, 45868]
plt.bar(x, y)
plt.ylabel('Sales')
plt.show()
y = df.groupby('Category')['Stores'].sum()
x = df['Category'].drop_duplicates()
x = x.sort_values(ascending=True)[:11]
print(y.sort_values(ascending=False))
plt.bar(x, y)
plt.xticks(rotation=90)
plt.show()
stores_per_category = df.groupby('Category')['Stores'].sum()
bins = [i for i in range(0,25,10)]
print(stores_per_category.sort_values(ascending=True))
plt.hist(stores_per_category)
plt.xlabel('Stores')
plt.show()
This chart is empty
Chart was probably not set up properly in the notebook
x = df.groupby('Category')['Sales/Avg. Store'].mean()
list = df['Category'].drop_duplicates().sort_values(ascending=True)
list = list[:11]
print(x.sort_values(ascending=True))
plt.bar(list, x)
plt.xticks(rotation=90)
plt.show()