# Levanto los datos en 3 diferentes dataframes
# ARTÍCULOS
conn = sql3.connect('/work/data/articles.db')
sql_query = pd.read_sql_query('SELECT * FROM articles', conn)
df_articles = pd.DataFrame(sql_query, columns=['article_id', 'article_name', 'unit_price'])
print(df_articles)
article_id article_name unit_price
0 20015 Smartphone 525.00
1 20016 Full Pc 2127.81
2 20017 Monitor 230.00
3 20018 Tablet 130.00
4 20019 Desk 130.10
5 20020 Chair 335.64
6 20021 Modem 67.50
7 20022 Range Extender 20.45
8 20023 Notebook 1000.00
9 20024 Netbook 145.00
10 20025 HDD 54.62
11 20026 SDD 22.00
12 20027 Ram Memory 35.95
13 20028 Motherboard 138.38
14 20029 Mouse 30.30
15 20030 Fan Cooler 4.25
16 20031 Webcam 20.07
17 20032 Keyboard 22.60
18 20033 Headphones 23.30
19 20034 Scanner 185.00
20 20035 Case 37.90
21 20036 Video Card 131.50
22 20037 CPU 139.62
23 20038 Power Supply 43.95
24 20039 Water Cooling 67.50
25 20040 Heatsink 10.00
26 20041 Usb Cable 2.95
27 20042 Sata Cable 2.14
28 20043 Pci Express Port 10.12
29 20044 Wi-Fi Card 59.61
30 20045 Mesh Wi-Fi X 2 32.50
# VENDEDORES
df_sellers = pd.read_excel('/work/data/sellers.xlsx', index_col=0)
print(df_sellers)
seller_name
seller_id
1 Aveline Swanwick
2 Jase Doy
3 Oliviero Charkham
4 Cornie Wynrehame
5 Ewell Peres
6 Milly Christoffe
7 Kati Innot
8 Tobin Roselli
9 Onida Cosely
10 Cirilo Grandham
11 Vasily Danilyuk
12 Brockie Patience
13 Arnold Kilkenny
14 Janel O'Curran
15 Daisie Slograve
# ÓRDENES
df_orders=pd.read_csv('/work/data/orders.csv')
print(df_orders)
order_id week article_id quantity seller_id country_name
0 15024 1 20039 10 10 Peru
1 15025 1 20029 15 5 Peru
2 15026 1 20024 5 14 Bolivia
3 15027 1 20018 9 14 Brazil
4 15028 1 20035 6 15 Mexico
.. ... ... ... ... ... ...
995 16019 4 20021 1 7 Brazil
996 16020 4 20040 15 15 Brazil
997 16021 4 20040 2 11 Colombia
998 16022 4 20018 14 11 Brazil
999 16023 4 20026 12 9 Brazil
[1000 rows x 6 columns]
# Exploración del df de artículos
print('Muestra de los datos')
print(df_articles.head())
print('\nFormato del dataframe')
print(df_articles.shape)
print('\nBúsqueda de valores nulos')
print(df_articles.isnull().sum())
print('\nFormato de los datos por columna')
print(df_articles.dtypes)
Muestra de los datos
article_id article_name unit_price
0 20015 Smartphone 525.00
1 20016 Full Pc 2127.81
2 20017 Monitor 230.00
3 20018 Tablet 130.00
4 20019 Desk 130.10
Formato del dataframe
(31, 3)
Búsqueda de valores nulos
article_id 0
article_name 0
unit_price 0
dtype: int64
Formato de los datos por columna
article_id int64
article_name object
unit_price object
dtype: object
# Exploración del df de vendedores
print('Muestra de los datos')
print(df_sellers.head())
print('\nFormato del dataframe')
print(df_sellers.shape)
print('\nBúsqueda de valores nulos')
print(df_sellers.isnull().sum())
print('\nFormato de los datos por columna')
print(df_sellers.dtypes)
Muestra de los datos
seller_name
seller_id
1 Aveline Swanwick
2 Jase Doy
3 Oliviero Charkham
4 Cornie Wynrehame
5 Ewell Peres
Formato del dataframe
(15, 1)
Búsqueda de valores nulos
seller_name 0
dtype: int64
Formato de los datos por columna
seller_name object
dtype: object
# Exploración del df de órdenes
print('Muestra de los datos')
print(df_orders.head())
print('\nFormato del dataframe')
print(df_orders.shape)
print('\nBúsqueda de valores nulos')
print(df_orders.isnull().sum())
print('\nFormato de los datos por columna')
print(df_orders.dtypes)
Muestra de los datos
order_id week article_id quantity seller_id country_name
0 15024 1 20039 10 10 Peru
1 15025 1 20029 15 5 Peru
2 15026 1 20024 5 14 Bolivia
3 15027 1 20018 9 14 Brazil
4 15028 1 20035 6 15 Mexico
Formato del dataframe
(1000, 6)
Búsqueda de valores nulos
order_id 0
week 0
article_id 0
quantity 0
seller_id 0
country_name 0
dtype: int64
Formato de los datos por columna
order_id int64
week int64
article_id int64
quantity int64
seller_id int64
country_name object
dtype: object
# cambio el tipo de dato de 'unit_price' a float
df_articles['unit_price'] = df_articles['unit_price'].astype(float)
print(df_articles.dtypes)
article_id int64
article_name object
unit_price float64
dtype: object
# cambio el índice del df_articles por la columna article_id
df_articles.set_index('article_id', inplace=True)
print(df_articles.head())
article_name unit_price
article_id
20015 Smartphone 525.00
20016 Full Pc 2127.81
20017 Monitor 230.00
20018 Tablet 130.00
20019 Desk 130.10
# creo una copia del df_orders
my_df = df_orders.copy()
# agrego algunas columnas extra que necesito y las lleno con los códigos id que las vinculan con los otros df
my_df = my_df.assign(article_name = my_df['article_id'])
my_df = my_df.assign(total_amount = my_df['article_id'])
my_df = my_df.assign(seller_name = my_df['seller_id'])
print(my_df.head())
order_id week article_id quantity seller_id country_name article_name \
0 15024 1 20039 10 10 Peru 20039
1 15025 1 20029 15 5 Peru 20029
2 15026 1 20024 5 14 Bolivia 20024
3 15027 1 20018 9 14 Brazil 20018
4 15028 1 20035 6 15 Mexico 20035
total_amount seller_name
0 20039 10
1 20029 5
2 20024 14
3 20018 14
4 20035 15
# Reemplazo los valores en las nuevas columnas del df
for i in range(len(my_df.index)):
article = df_articles.loc[my_df.loc[i]['article_name']]['article_name']
my_df.loc[i,'article_name'] = article
my_df.loc[i,'total_amount'] = my_df.loc[i,'quantity'] * df_articles.loc[my_df.loc[i]['total_amount']]['unit_price']
my_df.loc[i,'seller_name'] = df_sellers.loc[my_df.loc[i]['seller_name']]['seller_name']
print(my_df.head())
order_id week article_id quantity seller_id country_name \
0 15024 1 20039 10 10 Peru
1 15025 1 20029 15 5 Peru
2 15026 1 20024 5 14 Bolivia
3 15027 1 20018 9 14 Brazil
4 15028 1 20035 6 15 Mexico
article_name total_amount seller_name
0 Water Cooling 675.0 Cirilo Grandham
1 Mouse 454.5 Ewell Peres
2 Netbook 725.0 Janel O'Curran
3 Tablet 1170.0 Janel O'Curran
4 Case 227.4 Daisie Slograve
# Borrar las columnas que no voy a utilizar
my_df.drop(['order_id','article_id','seller_id'], axis='columns', inplace=True)
print(my_df.head())
week quantity country_name article_name total_amount seller_name
0 1 10 Peru Water Cooling 675.0 Cirilo Grandham
1 1 15 Peru Mouse 454.5 Ewell Peres
2 1 5 Bolivia Netbook 725.0 Janel O'Curran
3 1 9 Brazil Tablet 1170.0 Janel O'Curran
4 1 6 Mexico Case 227.4 Daisie Slograve
d1=pd.DataFrame({'mes': ['ene','feb','mar','abr'], 'ventas':[10,20,30,15]})
d2=pd.DataFrame({'mes': ['ene','feb','mar','abr'], 'costos':[7,16,25,12]})
print(pd.merge(d1,d2))
mes ventas costos
0 ene 10 7
1 feb 20 16
2 mar 30 25
3 abr 15 12
print(my_df.head())
week quantity country_name article_name total_amount seller_name
0 1 10 Peru Water Cooling 675.0 Cirilo Grandham
1 1 15 Peru Mouse 454.5 Ewell Peres
2 1 5 Bolivia Netbook 725.0 Janel O'Curran
3 1 9 Brazil Tablet 1170.0 Janel O'Curran
4 1 6 Mexico Case 227.4 Daisie Slograve
# RESOLUCIÓN ANALÍTICA
df2 = my_df.groupby('article_name').sum()
por_cant = df2.sort_values('quantity', ascending=False)
print(por_cant['quantity'].head(1))
print(df2.head())
article_name
HDD 413
Name: quantity, dtype: int64
week quantity total_amount
article_name
CPU 66 266 37138.92
Case 54 206 7807.40
Chair 56 207 69477.48
Desk 60 223 29012.30
Fan Cooler 64 205 871.25
# RESOLUCIÓN GRÁFICA
sns.barplot(x=df2.index,y=df2['quantity'],data=df2,order=df2.sort_values('quantity', ascending=False).index)
plt.xticks(rotation=90)
plt.show()
# RESOLUCIÓN ANALÍTICA
df3=(my_df.groupby('article_name').sum()).sort_values('total_amount', ascending=False).head(5)
print(df3['total_amount'])
article_name
Full Pc 538335.93
Notebook 251000.00
Smartphone 152250.00
Chair 69477.48
Tablet 48620.00
Name: total_amount, dtype: float64
# RESOLUCIÓN GRÁFICA
plt.pie(x=df3['total_amount'],labels=df3.index)
plt.show()
# RESOLUCIÓN ANALÍTICA
df4 =(my_df.groupby('seller_name').sum()).sort_values('total_amount', ascending=False)
print('Respuesta:', df4.head(1) )
print()
print(df4[['quantity']+['total_amount']])
Respuesta: week quantity total_amount
seller_name
Janel O'Curran 174 703 192832.47
quantity total_amount
seller_name
Janel O'Curran 703 192832.47
Brockie Patience 441 142709.88
Oliviero Charkham 555 141329.76
Vasily Danilyuk 521 129157.55
Daisie Slograve 554 120520.11
Aveline Swanwick 629 118874.33
Arnold Kilkenny 583 94552.04
Kati Innot 512 83704.62
Jase Doy 582 80628.31
Ewell Peres 496 78144.32
Onida Cosely 535 77373.37
Milly Christoffe 442 61733.69
Tobin Roselli 519 56984.42
Cornie Wynrehame 523 52253.57
Cirilo Grandham 470 45009.40
# RESOLUCIÓN GRÁFICA
plt.bar(df4.index, df4['total_amount'])
plt.xticks(rotation=60)
plt.show()
# RESOLUCIÓN ANALÍTICA
df5 = (my_df.groupby('week').sum()).sort_values('total_amount',ascending=False)
print(df5)
quantity total_amount
week
1 2449 507458.81
2 2444 415364.44
3 2114 329140.03
4 1058 223844.56
# RESOLUCIÓN GRÁFICA
plt.bar(df5.index,df5['total_amount'])
plt.show()
# BY CECILIA
plt.plot(df5.index, df5["total_amount"])
plt.show()
# RESOLUCIÓN
df6 = (my_df.groupby('country_name').sum()).sort_values('total_amount',ascending=False)
print(f"País con el mayor ingreso:\n {df6.head(1)}")
País con el mayor ingreso:
week quantity total_amount
country_name
Brazil 717 2515 441271.85
sns.barplot(x=df6.index,y=df6['total_amount'],data=df6)
plt.title('Ingresos por país')
plt.xlabel('País')
plt.ylabel('Ingresos [$]')
plt.xticks(rotation=90)
plt.show()
# RESOLUCIÓN
df7=my_df
df7=df7.groupby(['country_name','article_name']).sum()
df7=df7.sort_values(['country_name','quantity','total_amount'],ascending=[True,False,False])
print(df7.to_string())
#print(df7)
week quantity total_amount
country_name article_name
Argentina CPU 23 104 14520.48
SDD 12 73 1606.00
HDD 18 68 3714.16
Range Extender 19 58 1186.10
Fan Cooler 13 47 199.75
Ram Memory 7 46 1653.70
Mesh Wi-Fi X 2 17 46 1495.00
Full Pc 10 45 95751.45
Notebook 16 43 43000.00
Netbook 8 39 5655.00
Motherboard 9 39 5396.82
Sata Cable 9 38 81.32
Power Supply 9 32 1406.40
Usb Cable 15 32 94.40
Keyboard 9 31 700.60
Modem 6 27 1822.50
Smartphone 5 25 13125.00
Water Cooling 6 25 1687.50
Case 3 20 758.00
Headphones 6 19 442.70
Video Card 3 15 1972.50
Monitor 1 13 2990.00
Scanner 2 12 2220.00
Wi-Fi Card 4 12 715.32
Webcam 2 10 200.70
Desk 5 9 1170.90
Pci Express Port 1 7 70.84
Chair 2 6 2013.84
Mouse 1 6 181.80
Bolivia Pci Express Port 6 34 344.08
Ram Memory 5 28 1006.60
Usb Cable 6 21 61.95
Motherboard 7 19 2629.22
Desk 5 16 2081.60
Scanner 3 15 2775.00
Keyboard 3 13 293.80
Mesh Wi-Fi X 2 3 12 390.00
Video Card 1 11 1446.50
Netbook 5 7 1015.00
Full Pc 1 5 10639.05
Brazil Tablet 41 156 20280.00
HDD 45 119 6499.78
SDD 33 117 2574.00
Chair 32 112 37591.68
Pci Express Port 38 105 1062.60
Power Supply 26 103 4526.85
Heatsink 30 103 1030.00
Motherboard 34 101 13976.38
Desk 23 97 12619.70
Modem 24 92 6210.00
Ram Memory 21 92 3307.40
Range Extender 25 90 1840.50
Notebook 20 83 83000.00
CPU 16 82 11448.84
Water Cooling 19 82 5535.00
Netbook 28 81 11745.00
Video Card 24 80 10520.00
Smartphone 19 79 41475.00
Scanner 21 78 14430.00
Headphones 23 73 1700.90
Case 21 71 2690.90
Mouse 16 67 2030.10
Mesh Wi-Fi X 2 17 65 2112.50
Usb Cable 22 64 188.80
Full Pc 17 63 134052.03
Fan Cooler 17 58 246.50
Sata Cable 18 57 121.98
Webcam 20 52 1043.64
Wi-Fi Card 14 37 2205.57
Keyboard 8 37 836.20
Monitor 5 19 4370.00
Chile Mouse 6 27 818.10
Pci Express Port 7 23 232.76
HDD 3 21 1147.02
Scanner 10 15 2775.00
Motherboard 3 15 2075.70
Power Supply 7 14 615.30
Webcam 1 14 280.98
Chair 2 13 4363.32
Range Extender 2 13 265.85
Smartphone 3 12 6300.00
Heatsink 2 12 120.00
Tablet 4 11 1430.00
Sata Cable 4 11 23.54
Ram Memory 2 10 359.50
Desk 2 8 1040.80
Modem 3 7 472.50
Keyboard 3 3 67.80
Full Pc 4 1 2127.81
Netbook 2 1 145.00
Colombia SDD 16 74 1628.00
Headphones 17 55 1281.50
Smartphone 13 52 27300.00
Webcam 7 52 1043.64
Usb Cable 16 51 150.45
Tablet 10 50 6500.00
Scanner 7 39 7215.00
Full Pc 11 34 72345.54
HDD 5 34 1857.08
Range Extender 6 33 674.85
Water Cooling 7 32 2160.00
Monitor 7 30 6900.00
Desk 8 29 3772.90
Keyboard 9 28 632.80
Netbook 9 27 3915.00
Motherboard 2 27 3736.26
Heatsink 9 26 260.00
Chair 8 25 8391.00
Wi-Fi Card 8 25 1490.25
Pci Express Port 7 25 253.00
Video Card 7 20 2630.00
Notebook 4 18 18000.00
CPU 9 17 2373.54
Case 4 17 644.30
Power Supply 5 16 703.20
Ram Memory 4 15 539.25
Modem 5 12 810.00
Mouse 2 9 272.70
Fan Cooler 5 7 29.75
Sata Cable 3 2 4.28
Costa Rica Power Supply 7 32 1406.40
Netbook 8 30 4350.00
Sata Cable 4 16 34.24
Monitor 2 14 3220.00
Fan Cooler 2 13 55.25
Full Pc 2 11 23405.91
Case 6 10 379.00
Tablet 1 7 910.00
Video Card 3 4 526.00
Ram Memory 3 4 143.80
Modem 2 2 135.00
Range Extender 1 2 40.90
Ecuador Power Supply 5 24 1054.80
Smartphone 4 15 7875.00
Mouse 3 15 454.50
Monitor 5 13 2990.00
Tablet 7 13 1690.00
Webcam 2 13 260.91
Fan Cooler 3 11 46.75
Water Cooling 2 7 472.50
Mesh Wi-Fi X 2 3 7 227.50
Chair 3 6 2013.84
Netbook 2 2 290.00
Modem 1 1 67.50
SDD 2 1 22.00
Heatsink 2 1 10.00
El Salvador Smartphone 7 22 11550.00
Full Pc 4 19 40428.39
Sata Cable 5 16 34.24
Desk 2 12 1561.20
Video Card 2 10 1315.00
Tablet 2 8 1040.00
Netbook 1 6 870.00
Power Supply 4 5 219.75
Headphones 2 5 116.50
Webcam 4 4 80.28
Water Cooling 2 2 135.00
Range Extender 1 2 40.90
Guatemala Heatsink 5 25 250.00
Sata Cable 6 25 53.50
Pci Express Port 2 21 212.52
Modem 3 19 1282.50
Notebook 2 17 17000.00
Full Pc 4 13 27661.53
Scanner 1 12 2220.00
Desk 1 10 1301.00
Headphones 1 10 233.00
Keyboard 3 10 226.00
Case 6 8 303.20
Usb Cable 4 7 20.65
HDD 3 6 327.72
Mouse 3 6 181.80
CPU 3 4 558.48
Monitor 3 2 460.00
Water Cooling 5 2 135.00
Power Supply 4 2 87.90
SDD 2 2 44.00
Range Extender 1 1 20.45
Honduras Mesh Wi-Fi X 2 9 28 910.00
Usb Cable 7 28 82.60
Mouse 7 27 818.10
HDD 5 26 1420.12
Tablet 6 23 2990.00
Ram Memory 3 23 826.85
Chair 2 18 6041.52
Power Supply 3 15 659.25
Notebook 1 14 14000.00
Motherboard 6 14 1937.32
Water Cooling 6 13 877.50
Sata Cable 1 13 27.82
Netbook 4 12 1740.00
Monitor 2 11 2530.00
Heatsink 2 10 100.00
CPU 4 8 1116.96
Wi-Fi Card 3 7 417.27
Range Extender 3 6 122.70
Webcam 3 5 100.35
Keyboard 1 2 45.20
Mexico Water Cooling 16 63 4252.50
HDD 16 63 3441.06
SDD 12 58 1276.00
Motherboard 12 48 6642.24
Notebook 17 44 44000.00
Case 8 44 1667.60
Video Card 14 39 5128.50
Netbook 7 38 5510.00
Webcam 9 38 762.66
Keyboard 6 34 768.40
Wi-Fi Card 11 33 1967.13
Smartphone 6 29 15225.00
Pci Express Port 7 29 293.48
Mesh Wi-Fi X 2 9 28 910.00
Monitor 7 27 6210.00
Tablet 7 25 3250.00
Range Extender 12 25 511.25
Headphones 5 23 535.90
CPU 4 19 2652.78
Heatsink 7 18 180.00
Usb Cable 6 18 53.10
Modem 6 17 1147.50
Fan Cooler 9 17 72.25
Desk 4 14 1821.40
Full Pc 1 13 27661.53
Mouse 2 12 363.60
Ram Memory 4 11 395.45
Scanner 2 8 1480.00
Sata Cable 5 8 17.12
Power Supply 4 2 87.90
Chair 2 1 335.64
Paraguay Headphones 7 24 559.20
Heatsink 5 22 220.00
Monitor 7 19 4370.00
Ram Memory 3 15 539.25
Tablet 2 14 1820.00
Sata Cable 3 9 19.26
Mouse 2 8 242.40
Case 2 6 227.40
Webcam 3 3 60.21
Usb Cable 2 2 5.90
Video Card 3 1 131.50
Peru Mouse 23 125 3787.50
Sata Cable 22 62 132.68
Heatsink 16 59 590.00
Netbook 14 57 8265.00
HDD 9 56 3058.72
Monitor 12 54 12420.00
Tablet 9 52 6760.00
Modem 10 42 2835.00
Range Extender 6 39 797.55
Fan Cooler 13 39 165.75
Ram Memory 11 36 1294.20
Usb Cable 7 35 103.25
SDD 6 34 748.00
Notebook 9 32 32000.00
Video Card 8 29 3813.50
Desk 10 28 3642.80
Wi-Fi Card 7 27 1609.47
Chair 5 26 8726.64
Full Pc 9 24 51067.44
Water Cooling 8 23 1552.50
Scanner 4 21 3885.00
Case 2 19 720.10
Pci Express Port 11 18 182.16
Headphones 9 17 396.10
CPU 4 16 2233.92
Smartphone 5 15 7875.00
Motherboard 3 13 1798.94
Power Supply 6 13 571.35
Keyboard 2 7 158.20
Webcam 5 5 100.35
Mesh Wi-Fi X 2 1 4 130.00
Puerto Rico CPU 1 9 1256.58
Usb Cable 2 3 8.85
Uruguay Motherboard 3 15 2075.70
Pci Express Port 4 14 141.68
Headphones 7 13 302.90
Smartphone 2 9 4725.00
HDD 3 8 436.96
Case 1 8 303.20
CPU 2 7 977.34
SDD 2 7 154.00
Mesh Wi-Fi X 2 2 6 195.00
Full Pc 2 4 8511.24
Webcam 3 1 20.07
Venezuela Smartphone 10 32 16800.00
Webcam 9 32 642.24
Modem 4 25 1687.50
Full Pc 5 21 44684.01
Scanner 4 21 3885.00
Netbook 7 20 2900.00
Mouse 6 20 606.00
Mesh Wi-Fi X 2 3 17 552.50
Tablet 1 15 1950.00
Pci Express Port 6 15 151.80
Ram Memory 3 13 467.35
Fan Cooler 2 13 55.25
HDD 6 12 655.44
Range Extender 2 12 245.40
Usb Cable 6 12 35.40
Power Supply 1 11 483.45
Sata Cable 2 7 14.98
Monitor 2 6 1380.00
SDD 1 6 132.00
Heatsink 1 4 40.00
Water Cooling 2 3 202.50
Case 1 3 113.70
# RESOLUCIÓN
df8=my_df.groupby('article_name').sum()
df8=df8.sort_values('total_amount',ascending=True)
print(df8)
week quantity total_amount
article_name
Sata Cable 82 264 564.96
Usb Cable 93 273 805.35
Fan Cooler 64 205 871.25
Heatsink 79 280 2800.00
Pci Express Port 89 291 2944.92
Keyboard 44 165 3729.00
Webcam 68 229 4596.03
Headphones 77 239 5568.70
Range Extender 78 281 5746.45
Mesh Wi-Fi X 2 64 213 6922.50
Case 54 206 7807.40
SDD 86 372 8184.00
Wi-Fi Card 47 141 8405.01
Mouse 71 322 9756.60
Ram Memory 66 293 10533.35
Power Supply 81 269 11822.55
Modem 64 244 16470.00
Water Cooling 73 252 17010.00
HDD 113 413 22558.06
Video Card 65 209 27483.50
Desk 60 223 29012.30
CPU 66 266 37138.92
Motherboard 79 291 40268.58
Scanner 54 221 40885.00
Netbook 95 320 46400.00
Monitor 53 208 47840.00
Tablet 90 374 48620.00
Chair 56 207 69477.48
Smartphone 74 290 152250.00
Notebook 69 251 251000.00
Full Pc 70 253 538335.93