Análisis de los 25 retailers más grandes de Estados Unidos
# Importamos las librerías necesarias
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# Importamos los datos con Pandas
retail = pd.read_csv("largest_us_retailers.csv")
retail.head(10)
retail.shape
retail.dtypes
I. Preguntas del negocio
retail[["Sales","Company"]].max()
retail_without_wallmart = retail[retail["Company"] != "Walmart US"].sort_values("Sales", ascending=0)
retail.sort_values("Sales", ascending=0)
x = retail['Company'][:5]
y = retail['Sales'][:5]
plt.bar(x, y)
plt.title('Top 5 retailers')
plt.xlabel('Company')
plt.ylabel('Sales')
plt.xticks(rotation='vertical')
plt.show()
La media de las ventas sin contar a la compañía dominante.
round(retail_without_wallmart["Sales"].mean(),2)
sns.histplot(retail, x="Sales")
print(f"La compañía con menos tiendas tiene {retail['Stores'].min()}")
sns.histplot(retail,x="Stores")
retail[["Stores","Sales"]].corr()
sns.scatterplot(data=retail, x="Stores", y="Sales")
minimo_ventas = retail["Sales"].min()
maximo_ventas = retail["Sales"].max()
print(f"El rango de las ventas está entre {minimo_ventas} y {maximo_ventas}")
top5_tiendas = retail[["Company","Stores"]][:5].sort_values("Stores",ascending=False)
top5_tiendas
top_mayor_ventas = retail.sort_values("Sales", ascending=False)
top5_mayor_ventas = top_mayor_ventas[:5]
stores = top5_tiendas["Company"].values
sales = top5_mayor_ventas["Company"].values
companies = []
for index,company in enumerate(stores):
if company in sales:
companies.append(company)
companies
retail["Category"].isnull().count()
retail["Category"].unique
clean_detail = retail.drop(retail[retail['Category'].isnull()].index)
x = clean_detail['Category']
y = clean_detail['Sales']
plt.bar(x, y)
plt.title('Category on more sales')
plt.xlabel('Category')
plt.ylabel('Sales')
plt.xticks(rotation='vertical')
plt.show()
sns.scatterplot(data=clean_detail,x="Stores", y="Sales", hue="Company")
sns.boxplot(data=clean_detail, x="Stores",y="Sales",orient="vertical")
online = retail[retail["Stores"].isnull()]
online["Company"]
x = online['Company']
y = online['Sales']
plt.bar(x, y)
plt.title('Online retailers')
plt.xlabel('Company')
plt.ylabel('Sales')
plt.xticks(rotation='vertical')
plt.show()