Análisis de los 25 retailers más grandes de Estados Unidos
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(color_codes=True)
from matplotlib import colors
from matplotlib.ticker import PercentFormatter
from matplotlib import rcParams
df = pd.read_csv('/work/largest_us_retailers_9b00dc73-a938-46cd-af17-fcb2bd67301f.csv')
df.head()
df.info()
df.tail()
plt.figure(figsize=(15,5))
ax = sns.barplot(x = 'Company', y = 'Sales', data = df)
plt.xticks(rotation = 75)
plt.show()
plt.figure(figsize=(15,5))
ax = sns.barplot(x = 'Company', y = 'Sales', data = df.iloc[1:23])
plt.xticks(rotation = 75)
plt.show()
I. Preguntas del negocio
plt.figure(figsize=(15,5))
sns.lineplot(data=df.iloc[1:23], x='Company', y='Sales')
plt.xticks(rotation = 75)
plt.show()
media_comp = df.iloc[1:23]['Sales'].mean()
print(media_comp)
plt.figure(figsize=(8,9))
sns.displot(data=df.iloc[1:23], x='Company', y='Sales', cumulative=False, height=4, aspect=3)
plt.xticks(rotation = 75)
plt.show()
suma_ventas = df.iloc[1:23]['Sales'].sum()
print(suma_ventas)
plt.figure(figsize=(15,5))
sns.histplot(data=df.iloc[1:23], x='Company', y='Stores', cumulative=False)
plt.xticks(rotation = 75)
plt.show()
suma_tiendas = df.iloc[1:23]['Stores'].sum()
print(suma_tiendas)
sns.scatterplot(data=df, x='Sales', y='Stores', hue='Company')
rcParams['figure.figsize']=12, 12
min_ventas = min(df['Sales'])
max_ventas = max(df['Sales'])
dif_ventas = max_ventas - min_ventas
print(min_ventas, max_ventas, dif_ventas)
df.sort_values(['Stores'], ascending=False, inplace=True)
df_tiendas_ordenadas = df['Stores']
print(df_tiendas_ordenadas)
sns.scatterplot(data=df[0:5], x='Stores', y='Sales', hue='Company')
rcParams['figure.figsize']=12, 12