Análisis de los 25 retailers más grandes de Estados Unidos

import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns import matplotlib import squarify # pip install squarify (algorithm for treemap)

data = pd.read_csv('largest_us_retailers.csv') data

sns.displot(data['Sales'], bins = 30)

df = data.copy(deep = True) #Dataframe Copy df.sort_values('Sales',ascending= False,inplace=True) #Sort values beacause to biggest goes first and keep yourself df.drop(df.index[0],inplace = True) #remove to biggest, the first df

I. Preguntas del negocio

prom =df['Sales'].mean() #mean total = round(prom,2) print(total)

f, ax = plt.subplots() #Create box dimensions sns.histplot(df['Sales'],bins = 10,edgecolor="black", color="#69b3a2") #Show displot ax.axvline(np.mean(df['Sales']), color='black', ls= '--', label = "Mean") #Show mean whith color Black ax.legend();

sns.kdeplot(df['Sales'], shade=True, bw=0.05, color='olive');

rangSal = df[df['Sales'] <= 40000] rangSal['Sales'].sum()

sns.kdeplot(df['Stores'],shade=True, bw=0.05,vertical = False, color='skyblue')

rangStor = df[df['Stores'] <= 3800] rangStor['Stores'].sum()

sns.regplot(x=df['Stores'], y=df['Sales'],line_kws={"color":"r","alpha":0.5,"lw":5})

maxim =df['Sales'].max() minim = df['Sales'].min() rango =maxim - minim rango

expenses = [maxim, rango, minim] labels = ['Maxim', 'Range', 'Minim'] colors = [ '#B7C3F3', '#DD7596', '#8EB897'] def func(pct): return "{:1.1f}%".format(pct) plt.pie(expenses, labels=labels, autopct=lambda pct: func(pct), explode=[0,0.2,0], shadow=True, colors = colors) plt.title('Sales Range') plt.axis('equal') plt.show()

df.sort_values('Stores', ascending= False).iloc[:5]

five = df.sort_values('Stores', ascending= False).iloc[:5] expenses = five['Stores'] labels = ['D Gener', 'D Tree', 'Csv', 'walgreen', 'Ride'] def func(pct): return "{:1.1f}%".format(pct) plt.pie(expenses, labels=labels, autopct=lambda pct: func(pct), explode=[0,0,0,0.2,0.2], shadow=True) plt.title('Five') plt.axis('equal') plt.show()

group = df.groupby('Category')['Sales'].sum() group.sort_values(ascending = False)

sizes = group #Utilise matplotlib to scale our goal numbers between the min and max, then assign this scale to our values. norm = matplotlib.colors.Normalize(vmin=min(group), vmax=max(group)) colors = [matplotlib.cm.Blues(norm(value)) for value in group] #Create our plot and resize it. fig = plt.gcf() ax = fig.add_subplot() fig.set_size_inches(16, 4.5) #Use squarify to plot our data, label it and add colours. We add an alpha layer to ensure black labels show through squarify.plot(label=df['Category'],sizes=group, color = colors, alpha=.6) plt.title("Five",fontsize=23,fontweight="bold") #Remove our axes and display the plot plt.axis('off') plt.show()

df['Sales/Avg. Store_2'] = df['Sales'] / df['Stores'] org = df['Sales/Avg. Store_2'].sort_values(ascending=False) org

sns.barplot(org, df['Company']); plt.xticks(rotation = 'vertical');

df[df['Stores'].isna()]

isOnline = df[df['Stores'].isna()] isOnline2 = isOnline.Sales.sum() notOnline =df[df['Stores'].notna()] notOnline2 = notOnline.Sales.sum() notOnline2 - isOnline2

x = np.arange(0,1) label = ['Is Online', 'Not Online'] fig, ax = plt.subplots(figsize=(10, 10)) # Define bar width. We'll use this to offset the second bar. bar_width = 0.02 # Note we add the `width` parameter now which sets the width of each bar. b1 = ax.bar(x, isOnline2, width=bar_width) b2 = ax.bar(x + bar_width, notOnline2, width=bar_width)

df.groupby('Category')['Store Count Growth'].sum()

.css-15w88e5{color:var(--chakra-colors-fg-neutral-primary);font-weight:inherit;letter-spacing:-0.09px;}Análisis de los 25 retailers más grandes de Estados Unidos

I. Preguntas del negocio

Análisis de los 25 retailers más grandes de Estados Unidos