Análisis de los 25 retailers más grandes de Estados Unidos
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.read_csv("/work/largest_us_retailers_-2.csv")
df.head()
df.describe()
df_sorted = df.sort_values('Sales', ascending=False)
df_sorted
I. Preguntas del negocio
df_clipping2['Sales'].mean()
df["Sales/Avg. Store"].median()
df.groupby("Company").count()
sns.scatterplot(data = df_sorted, x = 'Sales', y = 'Stores', size = 'Sales/Avg. Store', color = "yellow")
rango = df["Sales"].max() - df["Sales"].min()
rango
df_clipping = df_sorted.iloc[ : 5 , : ]
df_clipping
chart = sns.barplot(data = df_clipping, x = 'Company', y = 'Sales')
chart.set_title('Sales of the main 5 retailers in the USA')
chart.set_xlabel('Top 5 retails in sales')
chart.set_ylabel('Sales in millions of dollars')
sns.set(rc = {'figure.figsize':(18,8)})
df.groupby("Sales").count()
df_sorted['Sales/Avg. Store_2'] = df_sorted['Sales'] / df_sorted['Stores']
df_sorted
df[df['Stores'].isna()]
df["Sales/Avg. Store"].mean()