# A veces necesitamos instalar nuevas librerías en nuestros proyectos
!pip install openpyxl==3.0.10
Collecting openpyxl==3.0.10
Downloading openpyxl-3.0.10-py2.py3-none-any.whl (242 kB)
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 242.1/242.1 KB 23.0 MB/s eta 0:00:00
Collecting et-xmlfile
Downloading et_xmlfile-1.1.0-py3-none-any.whl (4.7 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-1.1.0 openpyxl-3.0.10
WARNING: You are using pip version 22.0.4; however, version 22.1.2 is available.
You should consider upgrading via the '/root/venv/bin/python -m pip install --upgrade pip' command.
# imports
import numpy as np
import pandas as pd
import sqlite3 as sql3
import matplotlib.pyplot as plt
import seaborn as sns
# Levanto los datos en 3 diferentes dataframes
#ARTÍCULO
con=sql3.connect('/work/data/articles.db')
sql_query =pd.read_sql_query('SELECT * FROM articles',con)
df_articles=pd.DataFrame(sql_query, columns=['article_id','article_name','unit_price'])
#Reemplazo los datos en las nuevas columnas
for i in range(len(my_df.index)):
#columna article_name
#Cargo el nombre del artículo en una variable
article=df_articles.loc[my_df.loc[i]['article_name']]['article_name']
#Se lo asigno a la columna y registro que corresponde
my_df.loc[i,'article_name']=article
#columna total amount
my_df.loc[i,'total_amount'] = my_df.loc[i,'quantity'] * df_articles.loc[my_df.loc[i]['article_id']]['unit_price']
# columna de seller_name
my_df.loc[i,'seller_name'] = df_sellers.loc[my_df.loc[i]['seller_name']]['seller_name']
print(my_df.head())
order_id week article_id quantity seller_id country_name \
0 15024 1 20039 10 10 Peru
1 15025 1 20029 15 5 Peru
2 15026 1 20024 5 14 Bolivia
3 15027 1 20018 9 14 Brazil
4 15028 1 20035 6 15 Mexico
article_name total_amount seller_name
0 Water Cooling 675.0 Cirilo Grandham
1 Mouse 454.5 Ewell Peres
2 Netbook 725.0 Janel O'Curran
3 Tablet 1170.0 Janel O'Curran
4 Case 227.4 Daisie Slograve
# RESOLUCIÓN ANALÍTICA
my_df2 = my_df.groupby('article_name').sum()
por_cant = my_df2.sort_values('quantity', ascending=False)
print(por_cant['quantity'].head(1))
article_name
HDD 413
Name: quantity, dtype: int64
# RESOLUCIÓN GRÁFICA
sns.set(style='white',palette='dark')
sns.displot(my_df,x='article_name',color='paleturquoise')
plt.title('Artículo más vendido')
plt.ylabel('Cantidad',fontsize=10)
plt.xlabel('Nombre artículo',fontsize=10)
plt.xticks(rotation=90,fontsize=8)
plt.show()
# RESOLUCIÓN ANALÍTICA
my_df3 = (my_df.groupby('article_name').sum()).sort_values('total_amount',ascending=False).head(5)
print(my_df3['total_amount'])
article_name
Full Pc 538335.93
Notebook 251000.00
Smartphone 152250.00
Chair 69477.48
Tablet 48620.00
Name: total_amount, dtype: float64
# RESOLUCIÓN GRÁFICA
plt.pie(my_df3['total_amount'],labels=my_df3.index,colors=['cornflowerblue','slateblue','turquoise','deepskyblue','skyblue','powderblue'])
plt.title('Artículos que más ingresos proporcionaron')
plt.show()
# RESOLUCIÓN ANALÍTICA
my_df4 = (my_df.groupby('seller_name').sum()).sort_values('total_amount',ascending=False)
print(my_df4[['quantity']+['total_amount']])
quantity total_amount
seller_name
Janel O'Curran 703 192832.47
Brockie Patience 441 142709.88
Oliviero Charkham 555 141329.76
Vasily Danilyuk 521 129157.55
Daisie Slograve 554 120520.11
Aveline Swanwick 629 118874.33
Arnold Kilkenny 583 94552.04
Kati Innot 512 83704.62
Jase Doy 582 80628.31
Ewell Peres 496 78144.32
Onida Cosely 535 77373.37
Milly Christoffe 442 61733.69
Tobin Roselli 519 56984.42
Cornie Wynrehame 523 52253.57
Cirilo Grandham 470 45009.40
# RESOLUCIÓN GRÁFICA
plt.bar(my_df4.index,my_df4['total_amount'],color=['aquamarine','powderblue','paleturquoise','darkturquoise','mediumturquoise','mediumaquamarine'])
plt.xticks(rotation=60)
plt.title('Mejor vendedor del mes')
plt.ylabel('Monto total',fontsize=10)
plt.xlabel('Vendedor',fontsize=10)
plt.show()
# RESOLUCIÓN ANALÍTICA
my_df5 = (my_df.groupby('week').sum()).sort_values('total_amount',ascending=False)
print(my_df5)
quantity total_amount
week
1 2449 507458.81
2 2444 415364.44
3 2114 329140.03
4 1058 223844.56
# RESOLUCIÓN GRÁFICA
plt.bar(my_df5.index,my_df5['total_amount'],color=['darkturquoise','mediumturquoise','paleturquoise','powderblue'])
plt.title('Variaciones en ventas a lo largo del mes')
plt.ylabel('Monto total',fontsize=10)
plt.xlabel('Semana',fontsize=10)
plt.show()
# RESOLUCIÓN
my_df6=(my_df.groupby('country_name').sum()).sort_values('total_amount',ascending=False)
print(my_df6)
week quantity total_amount
country_name
Brazil 717 2515 441271.85
Argentina 241 947 205832.78
Colombia 230 881 177514.29
Peru 266 1027 161421.12
Mexico 237 846 138619.99
Venezuela 84 320 77684.52
El Salvador 36 111 57391.26
Guatemala 62 202 52579.25
Honduras 78 303 36763.56
Costa Rica 41 145 34606.50
Chile 70 231 24660.98
Bolivia 45 181 22682.80
Uruguay 31 92 17843.09
Ecuador 44 129 17475.30
Paraguay 39 123 8195.12
Puerto Rico 3 12 1265.43
# RESOLUCIÓN GRÁFICA
plt.bar(my_df6.index,my_df6['total_amount'],color=['darkturquoise','mediumturquoise','paleturquoise','powderblue'])
plt.title('País que más vendió')
plt.ylabel('Monto total',fontsize=10)
plt.xlabel('País',fontsize=10)
plt.xticks(rotation=90)
plt.show()