Análisis de los 25 retailers más grandes de Estados Unidos
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(color_codes=True)
from matplotlib import colors
from matplotlib.ticker import PercentFormatter
from matplotlib import rcParams
df = pd.read_csv('/work/largest_us_retailers_9b00dc73-a938-46cd-af17-fcb2bd67301f.csv')
df.head()
Companyobject
Salesint64
0
Walmart US
658119
1
Kroger
115037
2
Costco
90048
3
Home Depot
83976
4
Walgreen Boots
78924
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25 entries, 0 to 24
Data columns (total 6 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Company 25 non-null object
1 Sales 25 non-null int64
2 Stores 23 non-null float64
3 Sales/Avg. Store 23 non-null float64
4 Store Count Growth 23 non-null float64
5 Category 24 non-null object
dtypes: float64(3), int64(1), object(2)
memory usage: 1.3+ KB
df.tail()
Companyobject
Salesint64
20
Dollar Tree
21464
21
HEB
21384
22
Kohl's
19060
23
Delhaize
18201
24
Meijer
16592
plt.figure(figsize=(15,5))
ax = sns.barplot(x = 'Company', y = 'Sales', data = df)
plt.xticks(rotation = 75)
plt.show()
plt.figure(figsize=(15,5))
ax = sns.barplot(x = 'Company', y = 'Sales', data = df.iloc[1:23])
plt.xticks(rotation = 75)
plt.show()
I. Preguntas del negocio
plt.figure(figsize=(15,5))
sns.lineplot(data=df.iloc[1:23], x='Company', y='Sales')
plt.xticks(rotation = 75)
plt.show()
media_comp = df.iloc[1:23]['Sales'].mean()
print(media_comp)
49257.5
plt.figure(figsize=(8,9))
sns.displot(data=df.iloc[1:23], x='Company', y='Sales', cumulative=False, height=4, aspect=3)
plt.xticks(rotation = 75)
plt.show()
suma_ventas = df.iloc[1:23]['Sales'].sum()
print(suma_ventas)
1083665
plt.figure(figsize=(15,5))
sns.histplot(data=df.iloc[1:23], x='Company', y='Stores', cumulative=False)
plt.xticks(rotation = 75)
plt.show()
suma_tiendas = df.iloc[1:23]['Stores'].sum()
print(suma_tiendas)
73646.0
sns.scatterplot(data=df, x='Sales', y='Stores', hue='Company')
rcParams['figure.figsize']=12, 12
min_ventas = min(df['Sales'])
max_ventas = max(df['Sales'])
dif_ventas = max_ventas - min_ventas
print(min_ventas, max_ventas, dif_ventas)
16592 658119 641527
df.sort_values(['Stores'], ascending=False, inplace=True)
df_tiendas_ordenadas = df['Stores']
print(df_tiendas_ordenadas)
20 14250.0
19 13350.0
5 9813.0
4 8002.0
0 4574.0
14 4553.0
1 3931.0
17 2770.0
9 2326.0
18 2021.0
3 1965.0
8 1828.0
7 1772.0
12 1389.0
13 1351.0
23 1280.0
22 1169.0
16 889.0
15 794.0
10 655.0
2 495.0
21 323.0
24 231.0
6 NaN
11 NaN
Name: Stores, dtype: float64
sns.scatterplot(data=df[0:5], x='Stores', y='Sales', hue='Company')
rcParams['figure.figsize']=12, 12