# Levanto los datos en 3 diferentes dataframes
# ARTÍCULOS
conn = sql3.connect('/work/data/articles.db')
sql_query = pd.read_sql_query('SELECT * FROM articles', conn)
df_articles = pd.DataFrame(sql_query, columns=['article_id','article_name','unit_price'])
print(df_articles)
article_id article_name unit_price
0 20015 Smartphone 525.00
1 20016 Full Pc 2127.81
2 20017 Monitor 230.00
3 20018 Tablet 130.00
4 20019 Desk 130.10
5 20020 Chair 335.64
6 20021 Modem 67.50
7 20022 Range Extender 20.45
8 20023 Notebook 1000.00
9 20024 Netbook 145.00
10 20025 HDD 54.62
11 20026 SDD 22.00
12 20027 Ram Memory 35.95
13 20028 Motherboard 138.38
14 20029 Mouse 30.30
15 20030 Fan Cooler 4.25
16 20031 Webcam 20.07
17 20032 Keyboard 22.60
18 20033 Headphones 23.30
19 20034 Scanner 185.00
20 20035 Case 37.90
21 20036 Video Card 131.50
22 20037 CPU 139.62
23 20038 Power Supply 43.95
24 20039 Water Cooling 67.50
25 20040 Heatsink 10.00
26 20041 Usb Cable 2.95
27 20042 Sata Cable 2.14
28 20043 Pci Express Port 10.12
29 20044 Wi-Fi Card 59.61
30 20045 Mesh Wi-Fi X 2 32.50
# VENDEDORES
df_sellers = pd.read_excel('/work/data/sellers.xlsx', index_col=0)
print(df_sellers)
seller_name
seller_id
1 Aveline Swanwick
2 Jase Doy
3 Oliviero Charkham
4 Cornie Wynrehame
5 Ewell Peres
6 Milly Christoffe
7 Kati Innot
8 Tobin Roselli
9 Onida Cosely
10 Cirilo Grandham
11 Vasily Danilyuk
12 Brockie Patience
13 Arnold Kilkenny
14 Janel O'Curran
15 Daisie Slograve
# ÓRDENES
df_orders = pd.read_csv('/work/data/orders.csv')
print(df_orders)
order_id week article_id quantity seller_id country_name
0 15024 1 20039 10 10 Peru
1 15025 1 20029 15 5 Peru
2 15026 1 20024 5 14 Bolivia
3 15027 1 20018 9 14 Brazil
4 15028 1 20035 6 15 Mexico
.. ... ... ... ... ... ...
995 16019 4 20021 1 7 Brazil
996 16020 4 20040 15 15 Brazil
997 16021 4 20040 2 11 Colombia
998 16022 4 20018 14 11 Brazil
999 16023 4 20026 12 9 Brazil
[1000 rows x 6 columns]
# Exploración del df de artículos
print('Muestra de datos')
print(df_articles.head())
print('\nFormato del dataframe')
print(df_articles.shape)
print('\nBúsqueda de valores nulos')
print(df_articles.isnull().sum())
print('\nFormato de los datos')
print(df_articles.dtypes)
Muestra de datos
article_id article_name unit_price
0 20015 Smartphone 525.00
1 20016 Full Pc 2127.81
2 20017 Monitor 230.00
3 20018 Tablet 130.00
4 20019 Desk 130.10
Formato del dataframe
(31, 3)
Búsqueda de valores nulos
article_id 0
article_name 0
unit_price 0
dtype: int64
Formato de los datos
article_id int64
article_name object
unit_price object
dtype: object
# Exploración del df de vendedores
print('Muestra de datos')
print(df_sellers.head())
print('\nFormato del dataframe')
print(df_sellers.shape)
print('\nBúsqueda de valores nulos')
print(df_sellers.isnull().sum())
print('\nFormato de los datos')
print(df_sellers.dtypes)
Muestra de datos
seller_name
seller_id
1 Aveline Swanwick
2 Jase Doy
3 Oliviero Charkham
4 Cornie Wynrehame
5 Ewell Peres
Formato del dataframe
(15, 1)
Búsqueda de valores nulos
seller_name 0
dtype: int64
Formato de los datos
seller_name object
dtype: object
# Exploración del df de órdenes
print('Muestra de datos')
print(df_orders.head())
print('\nFormato del dataframe')
print(df_orders.shape)
print('\nBúsqueda de valores nulos')
print(df_orders.isnull().sum())
print('\nFormato de los datos')
print(df_orders.dtypes)
Muestra de datos
order_id week article_id quantity seller_id country_name
0 15024 1 20039 10 10 Peru
1 15025 1 20029 15 5 Peru
2 15026 1 20024 5 14 Bolivia
3 15027 1 20018 9 14 Brazil
4 15028 1 20035 6 15 Mexico
Formato del dataframe
(1000, 6)
Búsqueda de valores nulos
order_id 0
week 0
article_id 0
quantity 0
seller_id 0
country_name 0
dtype: int64
Formato de los datos
order_id int64
week int64
article_id int64
quantity int64
seller_id int64
country_name object
dtype: object
df_articles['unit_price'] = df_articles['unit_price'].astype(float)
print(df_articles.dtypes)
article_id int64
article_name object
unit_price float64
dtype: object
# Creo una copia del df_orders
my_df = df_orders.copy()
# Cambio el índice del df_articles
df_articles.set_index('article_id', inplace=True)
print(df_articles.head())
article_name unit_price
article_id
20015 Smartphone 525.00
20016 Full Pc 2127.81
20017 Monitor 230.00
20018 Tablet 130.00
20019 Desk 130.10
print(my_df.head())
order_id week article_id quantity seller_id country_name
0 15024 1 20039 10 10 Peru
1 15025 1 20029 15 5 Peru
2 15026 1 20024 5 14 Bolivia
3 15027 1 20018 9 14 Brazil
4 15028 1 20035 6 15 Mexico
# agrego las columnas que me faltan
my_df = my_df.assign(article_name = my_df['article_id'])
my_df = my_df.assign(total_amount = my_df['article_id'])
my_df = my_df.assign(seller_name = my_df['seller_id'])
print(my_df.head())
order_id week article_id quantity seller_id country_name article_name \
0 15024 1 20039 10 10 Peru 20039
1 15025 1 20029 15 5 Peru 20029
2 15026 1 20024 5 14 Bolivia 20024
3 15027 1 20018 9 14 Brazil 20018
4 15028 1 20035 6 15 Mexico 20035
total_amount seller_name
0 20039 10
1 20029 5
2 20024 14
3 20018 14
4 20035 15
# reemplazar los datos ne las nuevas columnas
for i in range(len(my_df.index)):
# columna article_name
# cargo el nombre del artículo en una variable
article = df_articles.loc[my_df.loc[i]['article_name']]['article_name']
# se lo asigno a la columna y registro que corresponde
my_df.loc[i,'article_name']= article
# columna total_amount
my_df.loc[i,'total_amount'] = my_df.loc[i,'quantity'] * df_articles.loc[my_df.loc[i]['article_id']]['unit_price']
# columna de seller_name
my_df.loc[i,'seller_name'] = df_sellers.loc[my_df.loc[i]['seller_name']]['seller_name']
print(my_df.head())
order_id week article_id quantity seller_id country_name \
0 15024 1 20039 10 10 Peru
1 15025 1 20029 15 5 Peru
2 15026 1 20024 5 14 Bolivia
3 15027 1 20018 9 14 Brazil
4 15028 1 20035 6 15 Mexico
article_name total_amount seller_name
0 Water Cooling 675.0 Cirilo Grandham
1 Mouse 454.5 Ewell Peres
2 Netbook 725.0 Janel O'Curran
3 Tablet 1170.0 Janel O'Curran
4 Case 227.4 Daisie Slograve
# elimino las columnas que no necesito
my_df.drop(['order_id', 'article_id','seller_id'], axis='columns', inplace=True)
pd.options.display.float_format='$ {:,.2f}'.format
print(my_df.head())
week quantity country_name article_name total_amount seller_name
0 1 10 Peru Water Cooling $ 675.00 Cirilo Grandham
1 1 15 Peru Mouse $ 454.50 Ewell Peres
2 1 5 Bolivia Netbook $ 725.00 Janel O'Curran
3 1 9 Brazil Tablet $ 1,170.00 Janel O'Curran
4 1 6 Mexico Case $ 227.40 Daisie Slograve
# RESOLUCIÓN ANALÍTICA
my_df2 = my_df.groupby('article_name').sum()
por_cant = my_df2.sort_values('quantity', ascending=False)
print(por_cant['quantity'].head(1))
article_name
HDD 413
Name: quantity, dtype: int64
# RESOLUCIÓN GRÁFICA
sns.set(palette="Set2",style='ticks')
sns.displot(my_df, x='article_name')
plt.title('Artículo más vendido',fontweight='bold',fontsize=14)
plt.ylabel('Cantidad',fontstyle='italic')
plt.xlabel('Artículo',fontstyle='italic')
plt.xticks(rotation=90)
sns.despine()
plt.show()
# RESOLUCIÓN ANALÍTICA
my_df3 = (my_df.groupby('article_name').sum()).sort_values('total_amount', ascending=False).head(5)
print(my_df3['total_amount'])
article_name
Full Pc $ 538,335.93
Notebook $ 251,000.00
Smartphone $ 152,250.00
Chair $ 69,477.48
Tablet $ 48,620.00
Name: total_amount, dtype: float64
# RESOLUCIÓN GRÁFICA
plt.pie(x=my_df3['total_amount'],labels=my_df3.index, startangle=90)
centre_cicle = plt.Circle((0,0),0.4, fc='white')
fig=plt.gcf()
fig.gca().add_artist(centre_cicle)
plt.title('Artículos que proporcionaron mayores ingresos',fontweight='bold',fontsize=14)
plt.show()
# RESOLUCIÓN ANALÍTICA
df4 = (my_df.groupby('seller_name').sum()).sort_values('total_amount', ascending=False)
print(df4[['quantity']+['total_amount']])
quantity total_amount
seller_name
Janel O'Curran 703 $ 192,832.47
Brockie Patience 441 $ 142,709.88
Oliviero Charkham 555 $ 141,329.76
Vasily Danilyuk 521 $ 129,157.55
Daisie Slograve 554 $ 120,520.11
Aveline Swanwick 629 $ 118,874.33
Arnold Kilkenny 583 $ 94,552.04
Kati Innot 512 $ 83,704.62
Jase Doy 582 $ 80,628.31
Ewell Peres 496 $ 78,144.32
Onida Cosely 535 $ 77,373.37
Milly Christoffe 442 $ 61,733.69
Tobin Roselli 519 $ 56,984.42
Cornie Wynrehame 523 $ 52,253.57
Cirilo Grandham 470 $ 45,009.40
# RESOLUCIÓN GRÁFICA
plt.bar(df4.index, df4['total_amount'])
plt.xticks(rotation=80)
plt.title('Vendedor estrella del mes',fontweight='bold',fontsize=14)
plt.ylabel('Monto total',fontstyle='italic')
plt.xlabel('Nombre del vendedor',fontstyle='italic')
sns.despine()
plt.show()
# RESOLUCIÓN ANALÍTICA
df5=(my_df.groupby('week').sum()).sort_values('total_amount',ascending=False)
print(df5)
quantity total_amount
week
1 2449 $ 507,458.81
2 2444 $ 415,364.44
3 2114 $ 329,140.03
4 1058 $ 223,844.56
# RESOLUCIÓN GRÁFICA
plt.bar(df5.index,df5['total_amount'],alpha=0.5)
plt.plot(df5.index,df5['total_amount'],marker='*',linestyle='--',markersize=10,color='purple')
plt.title('Ventas mensuales',fontweight='bold',fontsize=14)
plt.ylabel('Monto total',fontstyle='italic')
plt.xlabel('Semana',fontstyle='italic')
sns.despine()
plt.show()
# RESOLUCIÓN ANALÍTICA
df6=(my_df.groupby('country_name').sum()).sort_values('total_amount',ascending=False).head(5)
print(df6[['quantity']+['total_amount']])
quantity total_amount
country_name
Brazil 2515 $ 441,271.85
Argentina 947 $ 205,832.78
Colombia 881 $ 177,514.29
Peru 1027 $ 161,421.12
Mexico 846 $ 138,619.99
# RESOLUCIÓN GRÁFICA
plt.bar(df6.index,df6['total_amount'])
plt.title('TOP 5 de países mejores compradores',fontweight='bold')
plt.ylabel('Monto total',fontstyle='italic')
plt.xlabel('País',fontstyle='italic')
sns.despine()
plt.xticks(rotation=60)
plt.show()
# RESOLUCIÓN ANALÍTICA
pc = my_df[(my_df['article_name']== 'Full Pc')]
pcs=(pc.groupby('country_name').sum()).sort_values('total_amount',ascending=False)
print(pcs)
week quantity total_amount
country_name
Brazil 17 63 $ 134,052.03
Argentina 10 45 $ 95,751.45
Colombia 11 34 $ 72,345.54
Peru 9 24 $ 51,067.44
Venezuela 5 21 $ 44,684.01
El Salvador 4 19 $ 40,428.39
Guatemala 4 13 $ 27,661.53
Mexico 1 13 $ 27,661.53
Costa Rica 2 11 $ 23,405.91
Bolivia 1 5 $ 10,639.05
Uruguay 2 4 $ 8,511.24
Chile 4 1 $ 2,127.81
#RESOLUCIÓN GRÁFICA
sns.barplot(x = pcs.index, y = "quantity", data = pcs)
plt.title('Compradores de Full PC',fontweight='bold',fontsize=14)
plt.ylabel('Cantidad',fontstyle='italic')
plt.xlabel('País',fontstyle='italic')
plt.xticks(rotation=90)
plt.show()
# RESOLUCIÓN ANALÍTICA
my_df7 = (my_df.groupby('article_name').sum()).sort_values('quantity').head(5)
print(my_df7[['quantity']+['total_amount']])
quantity total_amount
article_name
Wi-Fi Card 141 $ 8,405.01
Keyboard 165 $ 3,729.00
Fan Cooler 205 $ 871.25
Case 206 $ 7,807.40
Chair 207 $ 69,477.48
#RESOLUCIÓN GRÁFICA
plt.bar(my_df7.index,my_df7['quantity'])
plt.title('Artículos menos vendidos',fontweight='bold',fontsize=14)
plt.ylabel('Cantidad',fontstyle='italic')
plt.xlabel('Artículo',fontstyle='italic')
plt.xticks(rotation=90)
sns.despine()
plt.show()