# RESOLUCIÓN ANALÍTICA
df2 = my_df.groupby('article_name').sum()
por_cant = df2.sort_values('quantity', ascending=False)
print(por_cant['quantity'].head(1))
print(df2.head())
article_name
HDD 413
Name: quantity, dtype: int64
week quantity total_amount
article_name
CPU 66 266 37138.92
Case 54 206 7807.40
Chair 56 207 69477.48
Desk 60 223 29012.30
Fan Cooler 64 205 871.25
# RESOLUCIÓN GRÁFICA
sns.barplot(x=df2.index,y=df2['quantity'],data=df2, saturation=.8, order=df2.sort_values('quantity', ascending=False).index).set_title("Artículo más vendido")
sns.set( rc = {'figure.figsize' :(20, 10), 'axes.labelsize': 12 },style='whitegrid',font_scale =1.9)
plt.xticks(rotation=90)
plt.xlabel('Artículo')
plt.ylabel('Cantidad')
plt.show()
# RESOLUCIÓN ANALÍTICA
df3=(my_df.groupby('article_name').sum()).sort_values('total_amount', ascending=False).head(5)
print(df3['total_amount'])
article_name
Full Pc 538335.93
Notebook 251000.00
Smartphone 152250.00
Chair 69477.48
Tablet 48620.00
Name: total_amount, dtype: float64
# RESOLUCIÓN GRÁFICA
fig = plt.figure()
fig.set_figheight(8)
colors = ['#ff9999','#66b3ff','#99ff99','#ffcc99','#ff9950']
textprops = {"fontsize":15}
plt.pie(x=df3['total_amount'],labels=df3.index,textprops =textprops, colors=colors, autopct='%.1f%%',pctdistance=0.8, labeldistance=1)
plt.legend(fontsize=10, loc='upper center', bbox_to_anchor=(0.5, -0.04), ncol=2)
plt.title('5 artículos con más ingresos',fontweight='bold')
centre_circle = plt.Circle((0, 0), 0.70, fc='white')
fig = plt.gcf()
fig.gca().add_artist(centre_circle)
plt.show()
# RESOLUCIÓN ANALÍTICA
df4 =(my_df.groupby('seller_name').sum()).sort_values('total_amount', ascending=False)
print('Respuesta:', df4.head(1) )
print()
print(df4[['quantity']+['total_amount']])
Respuesta: week quantity total_amount
seller_name
Janel O'Curran 174 703 192832.47
quantity total_amount
seller_name
Janel O'Curran 703 192832.47
Brockie Patience 441 142709.88
Oliviero Charkham 555 141329.76
Vasily Danilyuk 521 129157.55
Daisie Slograve 554 120520.11
Aveline Swanwick 629 118874.33
Arnold Kilkenny 583 94552.04
Kati Innot 512 83704.62
Jase Doy 582 80628.31
Ewell Peres 496 78144.32
Onida Cosely 535 77373.37
Milly Christoffe 442 61733.69
Tobin Roselli 519 56984.42
Cornie Wynrehame 523 52253.57
Cirilo Grandham 470 45009.40
# RESOLUCIÓN GRÁFICA2
sns.barplot(x=df4.index,y=df4['total_amount'],data=df4, saturation=.8, order=df4.sort_values('total_amount', ascending=False).index).set_title("Mejor Vendedor")
sns.set( rc = {'figure.figsize' :(20, 10), 'axes.labelsize': 12 },style='whitegrid',font_scale =1.9)
plt.xticks(rotation=90)
plt.xlabel('Vendedor')
plt.ylabel('Monto Vendido')
plt.show()
cross3.plot(kind = 'bar',
stacked = True,
title = 'Barras Apiladas País y Producto',
mark_right = True,
cmap='hsv',
fontsize=13,
figsize= (10,7),
width=0.9,
style='whitegrid').set_facecolor('w')
plt.legend(bbox_to_anchor=(1.05, 1.0), loc='upper left')
plt.rc('legend', fontsize=10)
# RESOLUCIÓN ANALÍTICA
df5 = (my_df.groupby('week').sum()).sort_values('total_amount',ascending=False)
print(df5)
quantity total_amount
week
1 2449 507458.81
2 2444 415364.44
3 2114 329140.03
4 1058 223844.56
# RESOLUCIÓN GRÁFICA 2
sns.barplot(x=df5.index,y=df5['total_amount'],data=df5, palette="Set2",saturation=.8, order=df5.sort_values('total_amount', ascending=False).index).set_title("Ventas Semanales")
sns.set( rc = {'figure.figsize' :(20, 10), 'axes.labelsize': 12 },style='whitegrid',font_scale =1.9)
plt.xticks(rotation=0)
plt.xlabel('Semana')
plt.ylabel('Monto Vendido')
plt.show()
# RESOLUCIÓN ANALÍTICA
df6 = (my_df.groupby('country_name').sum()).sort_values('total_amount',ascending=False)
print('Respuesta:', df6.head(1))
print()
print(df6[['quantity']+['total_amount']].head())
Respuesta: week quantity total_amount
country_name
Brazil 717 2515 441271.85
quantity total_amount
country_name
Brazil 2515 441271.85
Argentina 947 205832.78
Colombia 881 177514.29
Peru 1027 161421.12
Mexico 846 138619.99
#RESOLUCIÓN GRÁFICA
sns.barplot(x=df6.index,y=df6['total_amount'],data=df6, saturation=.8, order=df6.sort_values('total_amount', ascending=False).index).set_title("Ventas por País")
sns.set( rc = {'figure.figsize' :(12, 10), 'axes.labelsize': 12 },style='whitegrid',font_scale =1)
plt.xticks(rotation=90)
plt.xlabel('País')
plt.ylabel('Monto Vendido')
plt.show()
# RESOLUCIÓN
# Para encontrar relación hacemos un pairplot
# Las variables que pudieran relacionarse son:week, quantity y total_amount
sns.pairplot(my_df)
# En el gráfico se puede ver que hay una relación entre quantity y total_amount
# Para encontrar mejor relación, distinguimos por article_name
# Esto es lógico porque total_amount se obtuvo el cálculo de quantity*unit_price
# La relación entre quantity y total_amount se da en diferente proporción para cada artículo
sns.pairplot(my_df, hue='article_name')
plt.rc('legend', fontsize=12)
plt.show()
# RESOLUCIÓN ANALÍTICA
#Creamos una tabla cruzada para ver los productos vendidos en cada país
cross2=pd.crosstab(my_df.country_name, my_df.article_name, my_df.total_amount, aggfunc=np.sum)
print(cross2.head())
article_name CPU Case Chair Desk Fan Cooler Full Pc \
country_name
Argentina 14520.48 758.0 2013.84 1170.9 199.75 95751.45
Bolivia NaN NaN NaN 2081.6 NaN 10639.05
Brazil 11448.84 2690.9 37591.68 12619.7 246.50 134052.03
Chile NaN NaN 4363.32 1040.8 NaN 2127.81
Colombia 2373.54 644.3 8391.00 3772.9 29.75 72345.54
article_name HDD Headphones Heatsink Keyboard ... SDD \
country_name ...
Argentina 3714.16 442.7 NaN 700.6 ... 1606.0
Bolivia NaN NaN NaN 293.8 ... NaN
Brazil 6499.78 1700.9 1030.0 836.2 ... 2574.0
Chile 1147.02 NaN 120.0 67.8 ... NaN
Colombia 1857.08 1281.5 260.0 632.8 ... 1628.0
article_name Sata Cable Scanner Smartphone Tablet Usb Cable Video Card \
country_name
Argentina 81.32 2220.0 13125.0 NaN 94.40 1972.5
Bolivia NaN 2775.0 NaN NaN 61.95 1446.5
Brazil 121.98 14430.0 41475.0 20280.0 188.80 10520.0
Chile 23.54 2775.0 6300.0 1430.0 NaN NaN
Colombia 4.28 7215.0 27300.0 6500.0 150.45 2630.0
article_name Water Cooling Webcam Wi-Fi Card
country_name
Argentina 1687.5 200.70 715.32
Bolivia NaN NaN NaN
Brazil 5535.0 1043.64 2205.57
Chile NaN 280.98 NaN
Colombia 2160.0 1043.64 1490.25
[5 rows x 31 columns]
# RESOLUCIÓN GRÁFICA
cross2.plot(kind = 'bar',
stacked = True,
title = 'Barras Apiladas País y Producto',
mark_right = True,
cmap='hsv',
fontsize=10,
width=0.9,
figsize= (11,6),
style='whitegrid').set_facecolor('w')
sns.set( rc = {'figure.figsize' :(10, 10), 'axes.labelsize': 12 },style='whitegrid',font_scale =1)
plt.legend(bbox_to_anchor=(1.05, 1.0), loc='upper left')
plt.rc('legend', fontsize=12)
8. Análisis del Total_amount
sns.distplot(my_df.total_amount,kde=False, bins=10)
plt.show()
/shared-libs/python3.9/py/lib/python3.9/site-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `histplot` (an axes-level function for histograms).
warnings.warn(msg, FutureWarning)
9. Full_Pc
sns.boxplot(x='article_name', y='quantity', data=my_df)
plt.xticks(rotation=90)
plt.show()
sns.boxplot(x='article_name', y='total_amount', data=my_df)
plt.xticks(rotation=90)
plt.show()