# A veces necesitamos instalar nuevas librerías en nuestros proyectos
!pip install openpyxl==3.0.10
Requirement already satisfied: openpyxl==3.0.10 in /root/venv/lib/python3.9/site-packages (3.0.10)
Requirement already satisfied: et-xmlfile in /root/venv/lib/python3.9/site-packages (from openpyxl==3.0.10) (1.1.0)
WARNING: You are using pip version 22.0.4; however, version 22.1.2 is available.
You should consider upgrading via the '/root/venv/bin/python -m pip install --upgrade pip' command.
# imports
import numpy as np
import pandas as pd
import sqlite3 as sql3
import openpyxl # levantar excel
import matplotlib.pyplot as plt
import seaborn as sns
# Levanto los datos en 3 diferentes dataframes
# Artículos
conn = sql3.connect('/work/data/articles.db')
sql_query = pd.read_sql_query('SELECT * FROM articles', conn)
df_articles = pd.DataFrame(sql_query, columns=['article_id', 'article_name', 'unit_price'])
# Vendedores
df_sellers = pd.read_excel('/work/data/sellers.xlsx', index_col=0)
# Ordenes
df_orders = pd.read_csv('/work/data/orders.csv')
# Exploración del df de artículos
print('Muestra de datos')
#print(df_articles.head()) # head() 5 Filas por defecto
print(df_articles) # head() 5 Filas por defecto
print('\nFormato del dataframe')
print(df_articles.shape)
print('\nBúsqueda de valores nulls por columna')
print(df_articles.isnull().sum())
print('\nFormato de los datos por columna')
print(df_articles.dtypes)
Muestra de datos
article_id article_name unit_price
0 20015 Smartphone 525.00
1 20016 Full Pc 2127.81
2 20017 Monitor 230.00
3 20018 Tablet 130.00
4 20019 Desk 130.10
5 20020 Chair 335.64
6 20021 Modem 67.50
7 20022 Range Extender 20.45
8 20023 Notebook 1000.00
9 20024 Netbook 145.00
10 20025 HDD 54.62
11 20026 SDD 22.00
12 20027 Ram Memory 35.95
13 20028 Motherboard 138.38
14 20029 Mouse 30.30
15 20030 Fan Cooler 4.25
16 20031 Webcam 20.07
17 20032 Keyboard 22.60
18 20033 Headphones 23.30
19 20034 Scanner 185.00
20 20035 Case 37.90
21 20036 Video Card 131.50
22 20037 CPU 139.62
23 20038 Power Supply 43.95
24 20039 Water Cooling 67.50
25 20040 Heatsink 10.00
26 20041 Usb Cable 2.95
27 20042 Sata Cable 2.14
28 20043 Pci Express Port 10.12
29 20044 Wi-Fi Card 59.61
30 20045 Mesh Wi-Fi X 2 32.50
Formato del dataframe
(31, 3)
Búsqueda de valores nulls por columna
article_id 0
article_name 0
unit_price 0
dtype: int64
Formato de los datos por columna
article_id int64
article_name object
unit_price object
dtype: object
# Exploración del df de vendedores
print('Muestra de datos')
print(df_sellers.head()) # head() 5 Filas por defecto
print(df_sellers) # head() 5 Filas por defecto
print('\nFormato del dataframe')
print(df_sellers.shape)
print('\nBúsqueda de valores nulls por columna')
print(df_sellers.isnull().sum())
print('\nFormato de los datos por columna')
print(df_sellers.dtypes)
Muestra de datos
seller_name
seller_id
1 Aveline Swanwick
2 Jase Doy
3 Oliviero Charkham
4 Cornie Wynrehame
5 Ewell Peres
seller_name
seller_id
1 Aveline Swanwick
2 Jase Doy
3 Oliviero Charkham
4 Cornie Wynrehame
5 Ewell Peres
6 Milly Christoffe
7 Kati Innot
8 Tobin Roselli
9 Onida Cosely
10 Cirilo Grandham
11 Vasily Danilyuk
12 Brockie Patience
13 Arnold Kilkenny
14 Janel O'Curran
15 Daisie Slograve
Formato del dataframe
(15, 1)
Búsqueda de valores nulls por columna
seller_name 0
dtype: int64
Formato de los datos por columna
seller_name object
dtype: object
# Exploración del df de órdenes
print('Muestra de datos')
print(df_orders.head(10)) # head() 5 Filas por defecto
print('\nFormato del dataframe')
print(df_orders.shape)
print('\nBúsqueda de valores nulls por columna')
print(df_orders.isnull().sum())
print('\nFormato de los datos por columna')
print(df_orders.dtypes)
Muestra de datos
order_id week article_id quantity seller_id country_name
0 15024 1 20039 10 10 Peru
1 15025 1 20029 15 5 Peru
2 15026 1 20024 5 14 Bolivia
3 15027 1 20018 9 14 Brazil
4 15028 1 20035 6 15 Mexico
5 15029 1 20024 6 11 Peru
6 15030 1 20025 13 13 Colombia
7 15031 1 20029 14 14 Peru
8 15032 1 20026 7 3 Mexico
9 15033 1 20027 4 13 Honduras
Formato del dataframe
(1000, 6)
Búsqueda de valores nulls por columna
order_id 0
week 0
article_id 0
quantity 0
seller_id 0
country_name 0
dtype: int64
Formato de los datos por columna
order_id int64
week int64
article_id int64
quantity int64
seller_id int64
country_name object
dtype: object
df_articles['unit_price'] = df_articles['unit_price'].astype(float)
print(df_articles.dtypes)
article_id int64
article_name object
unit_price float64
dtype: object
my_df = df_orders.copy() # shallow copy o copia superficial (otro Objeto alojado en la memoria)
# Cambio el índice del df de artículos
df_articles.set_index('article_id', inplace=True)
my_df = my_df.assign(article_name = my_df['article_id'])
my_df = my_df.assign(total_amount = my_df['article_id'])
my_df = my_df.assign(seller_name = my_df['seller_id'])
# df_articles
# print()
# my_df
for i in range(max(my_df.count())):
# print(i)
# SINTAXIS: df_articles.loc[indice][columna]
# [indice]: va a ser el dato que obtengo de [my_df.loc[i]['article_id']]
# o sea, tomo registro a registro el article_id y lo uso para extraer el nombre del artículo (article_name) de df_articles
# print(df_articles.loc[my_df.loc[i]['article_id']]['article_name'])
article = df_articles.loc[my_df.loc[i]['article_id']]['article_name']
# print(article)
# Asignar a cada valor id de la columna 'article_name' (my_df) el nombre del artículo
my_df.loc[i, 'article_name'] = article
# my_df
# hacemos lo mismo con total_amount
my_df.loc[i, 'total_amount'] = my_df.loc[i, 'quantity']*df_articles.loc[my_df.loc[i]['article_id']]['unit_price']
# Columna de seller name
my_df.loc[i, 'seller_name'] = df_sellers.loc[my_df.loc[i]['seller_id']]['seller_name']
# elimimo las columnas que no necesito del df
my_df.drop(['order_id', 'article_id', 'seller_id'], axis='columns', inplace=True)
print(my_df)
week quantity country_name article_name total_amount seller_name
0 1 10 Peru Water Cooling 675.0 Cirilo Grandham
1 1 15 Peru Mouse 454.5 Ewell Peres
2 1 5 Bolivia Netbook 725.0 Janel O'Curran
3 1 9 Brazil Tablet 1170.0 Janel O'Curran
4 1 6 Mexico Case 227.4 Daisie Slograve
.. ... ... ... ... ... ...
995 4 1 Brazil Modem 67.5 Kati Innot
996 4 15 Brazil Heatsink 150.0 Daisie Slograve
997 4 2 Colombia Heatsink 20.0 Vasily Danilyuk
998 4 14 Brazil Tablet 1820.0 Vasily Danilyuk
999 4 12 Brazil SDD 264.0 Onida Cosely
[1000 rows x 6 columns]
# RESOLUCIÓN ANALÍTICA
#print(my_df['article_name'].value_counts()) # cuenta valores únicos
#print()
print(my_df['article_name'].value_counts().head()) # primeros cinco + vendidos
my_5top = my_df['article_name'].value_counts().head()
print()
print(my_5top)
HDD 47
Netbook 45
SDD 45
Tablet 40
Usb Cable 39
Name: article_name, dtype: int64
HDD 47
Netbook 45
SDD 45
Tablet 40
Usb Cable 39
Name: article_name, dtype: int64
# RESOLUCIÓN GRÁFICA
print("TOP 5 ARTICULOS")
e=(0.25,0,0,0,0)
c=['gold', 'yellowgreen', 'darkorange', 'lightskyblue','seagreen']
plt.pie(my_5top,startangle=145,autopct='%1.2f%%', explode=e, colors=c,)
plt.show()
TOP 5 ARTICULOS
# RESOLUCIÓN ANALÍTICA
df2=(my_df.groupby(by='article_name').sum()).sort_values('total_amount', ascending=False).head(5)
pd.options.display.float_format= '$ {:,.2f}'.format
print(df2['total_amount'])
# 'article_name' es el index
# print(df2) # agrupa (y suma) sólo datos numéricos
# SINTAXIS ALTERNATIVA
# pd.options.display.float_format= '$ {:,.2f}'.format
# print(my_df[['article_name'] + ['total_amount']].groupby(['article_name']).sum().sort_values('total_amount', ascending=False).head(5))
article_name
Full Pc $ 538,335.93
Notebook $ 251,000.00
Smartphone $ 152,250.00
Chair $ 69,477.48
Tablet $ 48,620.00
Name: total_amount, dtype: float64
# RESOLUCIÓN GRÁFICA
print("TOP 5 INGRESOS")
e=(0.5,0,0,0,0)
c=['gold', 'yellowgreen', 'darkorange', 'lightskyblue','seagreen']
plt.pie(x=df2['total_amount'], labels=df2.index,startangle=90,autopct='%1.2f%%', explode=e, colors=c,pctdistance=0.85)
centre_circle = plt.Circle((0,0), 0.5, fc="white")
fig = plt.gcf()
fig.gca().add_artist(centre_circle)
plt.title("TOP 5 INGRESOS")
plt.savefig('top5ingresos.png', dpi=300)
plt.show()
TOP 5 INGRESOS
# RESOLUCIÓN ANALÍTICA
df4 = (my_df.groupby(by='seller_name').sum()).sort_values('total_amount', ascending=False)
pd.options.display.float_format= '$ {:,.2f}'.format
# print(df4[['quantity'] + ['total_amount']])
print(df4[['quantity'] + ['total_amount']].head(5))
# 'seller_anme' es el index
quantity total_amount
seller_name
Janel O'Curran 703 $ 192,832.47
Brockie Patience 441 $ 142,709.88
Oliviero Charkham 555 $ 141,329.76
Vasily Danilyuk 521 $ 129,157.55
Daisie Slograve 554 $ 120,520.11
# RESOLUCIÓN GRÁFICA
#plt.bar(df4.index, df4['total_amount'])
#plt.xticks(rotation=60)
#plt.show()
# 'seller_name' es el index
print("VENDEDOR DEL MES")
df_5ing=df4[['quantity'] + ['total_amount']].head(5)
#print(df_5ing)
#Plot 1
plt.subplot(1, 2, 1)
plt.bar(df_5ing.index, df_5ing['total_amount'],color='yellowgreen')
plt.xticks(rotation=60,color="green")
#Plot 2
plt.subplot(1, 2, 2)
e=(0.5,0,0,0,0)
c=['gold', 'yellowgreen', 'darkorange', 'lightskyblue','seagreen']
plt.pie(df_5ing['quantity'],labels=df2.index,startangle=180,autopct='%1.2f%%', explode=e, colors=c,pctdistance=0.65)
plt.show()
VENDEDOR DEL MES
# RESOLUCIÓN ANALÍTICA
df5 = (my_df.groupby(by='week').sum()).sort_values('total_amount', ascending=False)
pd.options.display.float_format= '$ {:,.2f}'.format
print(df5[['quantity'] + ['total_amount']])
# 'week' es el index
quantity total_amount
week
1 2449 $ 507,458.81
2 2444 $ 415,364.44
3 2114 $ 329,140.03
4 1058 $ 223,844.56
# RESOLUCIÓN GRÁFICA
#print("VENTAS SEMANALES")
plt.bar(df5.index, df5['total_amount'],color=['red','blue','yellow','yellowgreen'])
plt.title("VENTAS SEMANALES")
plt.xlabel("Semana")
plt.ylabel("Monto")
plt.xticks(np.arange(0,5,1))
plt.yticks(np.arange(0,550001,50000))
plt.grid()
plt.show()
# 'week' es el index
# RESOLUCIÓN ANALÍTICA
print("MONTO POR PAIS")
df_pais = my_df
#print(df_pais)
print(df_pais.groupby(['country_name']).agg({'total_amount':'sum'}).sort_values('total_amount', ascending=False))
df_ranking=df_pais.groupby(['country_name']).agg({'total_amount':'sum'}).sort_values('total_amount', ascending=False)
MONTO POR PAIS
total_amount
country_name
Brazil $ 441,271.85
Argentina $ 205,832.78
Colombia $ 177,514.29
Peru $ 161,421.12
Mexico $ 138,619.99
Venezuela $ 77,684.52
El Salvador $ 57,391.26
Guatemala $ 52,579.25
Honduras $ 36,763.56
Costa Rica $ 34,606.50
Chile $ 24,660.98
Bolivia $ 22,682.80
Uruguay $ 17,843.09
Ecuador $ 17,475.30
Paraguay $ 8,195.12
Puerto Rico $ 1,265.43
# RESOLUCIÓN GRÁFICA
print("MONTO POR PAIS")
sns.barplot(df_ranking.index, df_ranking['total_amount'],palette='inferno_r')
plt.xticks(rotation=60)
plt.title("MONTO POR PAIS")
plt.xlabel("País")
plt.ylabel("Monto")
plt.yticks(np.arange(0,450001,50000))
plt.show()
# 'country_name' es el index
MONTO POR PAIS
/shared-libs/python3.9/py/lib/python3.9/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
warnings.warn(
# RESOLUCIÓN ANALÍTICA
print("COMPARACION TOP 2 PAIS")
df_pais2 = (my_df.groupby(by= 'country_name').sum()).sort_values('total_amount', ascending=False)
pd.options.display.float_format= '$ {:,.2f}'.format
# print(df4[['quantity'] + ['total_amount']])
print(df_pais2[['quantity'] + ['total_amount']].head(2))
df_pais3=df_pais2[['quantity'] + ['total_amount']].head(2)
# 'country_name' es el index
COMPARACION TOP 2 PAIS
quantity total_amount
country_name
Brazil 2515 $ 441,271.85
Argentina 947 $ 205,832.78
# RESOLUCIÓN GRÁFICA
print("COMPARATIVA TOP PAISES")
sns.barplot(df_pais3.index, df_pais3['total_amount'],palette='Accent')
plt.xticks(rotation=60)
plt.title("COMPARATIVA TOP PAISES")
plt.xlabel("País")
plt.ylabel("Monto")
plt.yticks(np.arange(0,450001,50000))
plt.show()
# 'country_name' es el index
print("COMPARATIVA TOP PAISES")
sns.barplot(df_pais3.index, df_pais3['quantity'],palette='rainbow_r')
plt.xticks(rotation=60)
plt.title("COMPARATIVA TOP PAISES")
plt.xlabel("País")
plt.ylabel("Cantidad")
plt.yticks(np.arange(0,3001,500))
plt.show()
COMPARATIVA TOP PAISES
/shared-libs/python3.9/py/lib/python3.9/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
warnings.warn(
COMPARATIVA TOP PAISES
/shared-libs/python3.9/py/lib/python3.9/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
warnings.warn(
# RESOLUCIÓN ANALITICA
#dos países con mayores ventas
print("VENTAS POR VENDEDORES EN BRASIL")
df_brasil = my_df.loc[(my_df['country_name'] == 'Brazil')]
print(df_brasil.groupby(['seller_name']).agg({'total_amount':'sum'}).sort_values('total_amount', ascending=False))
df_brasil2=df_brasil.groupby(['seller_name']).agg({'total_amount':'sum'}).sort_values('total_amount', ascending=False)
print()
print("VENTAS POR VENDEDORES EN ARGENTINA")
df_argentina = my_df.loc[my_df['country_name'] == 'Argentina']
print(df_argentina.groupby(['seller_name']).agg({'total_amount':'sum'}).sort_values('total_amount', ascending=False))
df_argentina2=df_argentina.groupby(['seller_name']).agg({'total_amount':'sum'}).sort_values('total_amount', ascending=False)
VENTAS POR VENDEDORES EN BRASIL
total_amount
seller_name
Daisie Slograve $ 65,283.28
Aveline Swanwick $ 58,771.48
Arnold Kilkenny $ 47,243.54
Kati Innot $ 32,087.32
Tobin Roselli $ 31,997.93
Janel O'Curran $ 31,562.86
Onida Cosely $ 29,354.31
Jase Doy $ 28,493.25
Vasily Danilyuk $ 27,495.77
Milly Christoffe $ 21,247.38
Cirilo Grandham $ 21,061.74
Ewell Peres $ 15,253.22
Oliviero Charkham $ 14,951.72
Brockie Patience $ 10,756.96
Cornie Wynrehame $ 5,711.09
VENTAS POR VENDEDORES EN ARGENTINA
total_amount
seller_name
Janel O'Curran $ 34,971.47
Brockie Patience $ 32,553.20
Oliviero Charkham $ 28,985.95
Vasily Danilyuk $ 27,503.50
Onida Cosely $ 17,216.35
Tobin Roselli $ 12,813.08
Daisie Slograve $ 11,004.98
Arnold Kilkenny $ 9,827.01
Aveline Swanwick $ 7,514.88
Cornie Wynrehame $ 6,720.98
Kati Innot $ 5,672.06
Jase Doy $ 5,324.53
Ewell Peres $ 3,649.59
Cirilo Grandham $ 1,354.45
Milly Christoffe $ 720.75
# RESOLUCIÓN GRÁFICA
print("RANKING VENDEDORES BRASIL")
sns.pointplot(df_brasil2.index, df_brasil2['total_amount'],palette='icefire',markers='>')
plt.xticks(rotation=60)
plt.title("RANKING VENDEDORES BRASIL")
plt.xlabel("Vendedor")
plt.ylabel("Monto")
plt.yticks(np.arange(0,70001,5000))
plt.grid()
plt.show()
print("RANKING VENDEDORES ARGENTINA")
sns.pointplot(df_argentina2.index, df_argentina2['total_amount'],palette='icefire_r',markers='*')
plt.xticks(rotation=60)
plt.title("RANKING VENDEDORES ARGENTINA")
plt.xlabel("Vendedor")
plt.ylabel("Monto")
plt.yticks(np.arange(0,35001,5000))
plt.grid()
plt.show()
RANKING VENDEDORES BRASIL
/shared-libs/python3.9/py/lib/python3.9/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
warnings.warn(
RANKING VENDEDORES ARGENTINA
/shared-libs/python3.9/py/lib/python3.9/site-packages/seaborn/_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
warnings.warn(
# RESOLUCIÓN ANALITICA / GRÁFICA
#dos países con mayores ventas
print("VENTAS POR SEMANA EN BRASIL")
df_brasil = my_df.loc[(my_df['country_name'] == 'Brazil')]
print(df_brasil.groupby(['week']).agg({'total_amount':'sum'}).sort_values('total_amount', ascending=False))
df_brasil2=df_brasil.groupby(['week']).agg({'total_amount':'sum'}).sort_values('week', ascending=True)
plt.plot(df_brasil2,color='green')
plt.title("VENTAS POR SEMANA EN BRASIL")
plt.xlabel("Semana")
plt.ylabel("Monto")
plt.yticks(np.arange(0,175001,10000))
plt.xticks(np.arange(1,5,1))
plt.grid()
plt.show()
print()
print("VENTAS POR SEMANA EN ARGENTINA")
df_argentina = my_df.loc[my_df['country_name'] == 'Argentina']
print(df_argentina.groupby(['week']).agg({'total_amount':'sum'}).sort_values('week', ascending=True))
df_argentina2=df_argentina.groupby(['week']).agg({'total_amount':'sum'}).sort_values('week', ascending=True)
plt.plot(df_argentina2,color='blue')
plt.title("VENTAS POR SEMANA EN ARGENTINA")
plt.xlabel("Semana")
plt.ylabel("Monto")
plt.yticks(np.arange(0,100001,10000))
plt.xticks(np.arange(1,5,1))
plt.grid()
plt.show()
VENTAS POR SEMANA EN BRASIL
total_amount
week
1 $ 165,289.90
2 $ 106,803.85
4 $ 89,836.60
3 $ 79,341.50
VENTAS POR SEMANA EN ARGENTINA
total_amount
week
1 $ 63,760.48
2 $ 96,789.13
3 $ 26,601.97
4 $ 18,681.20
# RESOLUCIÓN ANALITICA / GRÁFICA
#dos países con mayores ventas
print("CANTIDAD POR SEMANA EN BRASIL")
df_brasil = my_df.loc[(my_df['country_name'] == 'Brazil')]
print(df_brasil.groupby(['week']).agg({'quantity':'count'}).sort_values('week', ascending=True))
df_brasil2=df_brasil.groupby(['week']).agg({'quantity':'count'}).sort_values('week', ascending=True)
plt.plot(df_brasil2,color='green')
plt.title("VENTAS POR SEMANA EN BRASIL")
plt.xlabel("Semana")
plt.ylabel("Cantidad")
plt.yticks(np.arange(0,101,10))
plt.xticks(np.arange(1,5,1))
plt.grid()
plt.show()
print()
print("CANTIDAD POR SEMANA EN ARGENTINA")
df_argentina = my_df.loc[my_df['country_name'] == 'Argentina']
print(df_argentina.groupby(['week']).agg({'quantity':'count'}).sort_values('week', ascending=True))
df_argentina2=df_argentina.groupby(['week']).agg({'quantity':'count'}).sort_values('week', ascending=True)
plt.plot(df_argentina2,color='blue')
plt.title("CANTIDAD POR SEMANA EN ARGENTINA")
plt.xlabel("Semana")
plt.ylabel("Cantidad")
plt.yticks(np.arange(0,51,10))
plt.xticks(np.arange(1,5,1))
plt.grid()
plt.show()
CANTIDAD POR SEMANA EN BRASIL
quantity
week
1 94
2 89
3 83
4 49
CANTIDAD POR SEMANA EN ARGENTINA
quantity
week
1 32
2 48
3 19
4 14
# RESOLUCIÓN ANALÍTICA
print("TOTAL ARTICULOS POR PAIS")
df_artxpais = my_df
#print(df_artxpais)
print(df_artxpais.groupby(['country_name']).agg({'quantity':'count'}).sort_values('quantity', ascending=False))
df_ranking2=(df_pais.groupby(['country_name']).agg({'quantity':'count'}).sort_values('quantity', ascending=False)).head()
TOTAL ARTICULOS POR PAIS
quantity
country_name
Brazil 315
Peru 125
Argentina 113
Colombia 111
Mexico 101
Honduras 38
Venezuela 37
Guatemala 29
Chile 28
Bolivia 19
Costa Rica 19
Ecuador 18
Paraguay 17
El Salvador 16
Uruguay 12
Puerto Rico 2
# RESOLUCIÓN GRÁFICA
#print("TOP 5 CANTIDAD ARTICULOS POR PAIS")
#Plot 1
plt.subplot(1, 2, 1)
plt.xticks(rotation=60)
c=['navy', 'green', 'turquoise', 'lightskyblue','plum']
plt.bar(df_ranking2.index, df_ranking2['quantity'],color=c)
plt.title("TOP 5 CANTIDAD ARTICULOS POR PAIS")
plt.xlabel("País")
plt.ylabel("Cantidad")
plt.yticks(np.arange(0,351,50))
plt.grid()
#Plot 2
plt.subplot(1, 2, 2)
e=(0.5,0,0,0,0)
c=['gold', 'coral','yellowgreen', 'darkorange', 'lightsalmon']
plt.pie(df_ranking2['quantity'],labels=df_ranking2.index,startangle=180,autopct='%1.1f%%', explode=e, colors=c,pctdistance=0.65)
plt.show() ##sacar afuera brasil en el pie
plt.savefig('top5articulos.png', dpi=300)
# RESOLUCIÓN ANALÍTICA
print("MONTO POR ARTICULOS")
df_articulo = my_df
#print(df_articulo)
print(df_articulo.groupby(['article_name']).agg({'total_amount':'sum'}).sort_values('total_amount', ascending=False))
top5M= (df_articulo.groupby(['article_name']).agg({'total_amount':'sum'}).sort_values('total_amount', ascending=False).head(5))
print()
print('Monto del artículo más vendido:',df_articulo.groupby(['article_name']).agg({'total_amount':'sum'}).sort_values('total_amount', ascending=False).head(1))
MONTO POR ARTICULOS
total_amount
article_name
Full Pc $ 538,335.93
Notebook $ 251,000.00
Smartphone $ 152,250.00
Chair $ 69,477.48
Tablet $ 48,620.00
Monitor $ 47,840.00
Netbook $ 46,400.00
Scanner $ 40,885.00
Motherboard $ 40,268.58
CPU $ 37,138.92
Desk $ 29,012.30
Video Card $ 27,483.50
HDD $ 22,558.06
Water Cooling $ 17,010.00
Modem $ 16,470.00
Power Supply $ 11,822.55
Ram Memory $ 10,533.35
Mouse $ 9,756.60
Wi-Fi Card $ 8,405.01
SDD $ 8,184.00
Case $ 7,807.40
Mesh Wi-Fi X 2 $ 6,922.50
Range Extender $ 5,746.45
Headphones $ 5,568.70
Webcam $ 4,596.03
Keyboard $ 3,729.00
Pci Express Port $ 2,944.92
Heatsink $ 2,800.00
Fan Cooler $ 871.25
Usb Cable $ 805.35
Sata Cable $ 564.96
Monto del artículo más vendido: total_amount
article_name
Full Pc $ 538,335.93
# RESOLUCIÓN ANALÍTICA
print("CANTIDAD POR ARTICULOS")
df_articulo2 = my_df
#print(df_articulo)
print(df_articulo2.groupby(['article_name']).agg({'quantity':'count'}).sort_values('quantity', ascending=False))
top5= (df_articulo2.groupby(['article_name']).agg({'quantity':'count'}).sort_values('quantity', ascending=False).head())
print()
print('Cantidad del artículo más vendido:',df_articulo2.groupby(['article_name']).agg({'quantity':'count'}).sort_values('quantity', ascending=False).head(1))
CANTIDAD POR ARTICULOS
quantity
article_name
HDD 47
Netbook 45
SDD 45
Tablet 40
Usb Cable 39
Sata Cable 38
Pci Express Port 37
Range Extender 36
Smartphone 35
Full Pc 34
Headphones 34
Heatsink 34
Mouse 34
Power Supply 34
Motherboard 33
Water Cooling 32
Video Card 31
Ram Memory 31
Notebook 30
CPU 29
Modem 29
Mesh Wi-Fi X 2 28
Desk 28
Webcam 28
Case 26
Monitor 26
Fan Cooler 25
Scanner 24
Chair 24
Keyboard 22
Wi-Fi Card 22
Cantidad del artículo más vendido: quantity
article_name
HDD 47
# RESOLUCIÓN GRÁFICA
print("MONTOS POR ARTÍCULOS")
#print(top5M)
b=['goldenrod', 'olivedrab','sienna', 'skyblue', 'indigo']
plt.bar(top5M.index, top5M['total_amount'],color=b, alpha=0.75)
plt.xticks(rotation=60)
plt.title("MONTOS POR ARTÍCULOS")
plt.xlabel("Artículo")
plt.ylabel("Monto")
plt.yticks(np.arange(0,550001,50000))
plt.show()
print()
print("ARTÍCULOS MÁS VENDIDOS")
#print(top5)
e=(0.25,0,0,0,0)
a=['lime', 'coral','palegreen', 'darkorange', 'olive']
plt.pie(top5['quantity'],labels=top5.index,startangle=45,autopct='%1.2f%%', explode=e, colors=a,pctdistance=0.65)
centre_circle = plt.Circle((0,0), 0.4, fc="white")
fig = plt.gcf()
fig.gca().add_artist(centre_circle)
plt.show()
MONTOS POR ARTÍCULOS
ARTÍCULOS MÁS VENDIDOS
##para filtrar en un dataset & (es el and) | (es el or)
# RESOLUCIÓN ANALÍTICA
print("MONTO VENDEDOR / PAIS")
df_sample = my_df.set_index(['seller_name','country_name']).sort_index(ascending=[True,True])
#print(df_sample)
pd.options.display.float_format= '$ {:,.2f}'.format
#df = sns.load_dataset('df_sample')
df_sample.groupby(['seller_name','country_name'])['total_amount'].sum().to_frame().sort_values('total_amount', ascending=False)
print(df_sample.groupby(['seller_name','country_name'])['total_amount'].sum().to_frame().sort_values('total_amount', ascending=False).head())
MONTO VENDEDOR / PAIS
total_amount
seller_name country_name
Daisie Slograve Brazil $ 65,283.28
Aveline Swanwick Brazil $ 58,771.48
Arnold Kilkenny Brazil $ 47,243.54
Brockie Patience Peru $ 46,089.88
Janel O'Curran Argentina $ 34,971.47
# RESOLUCIÓN ANALÍTICA
print("MONTO VENDEDOR / ARTICULO")
df_sample1 = my_df.set_index(['seller_name']).sort_index(ascending=[True])
#print(df_sample)
pd.options.display.float_format= '$ {:,.2f}'.format
#df = sns.load_dataset('df_sample')
df_sample1.groupby(['seller_name','article_name'])['total_amount'].sum().to_frame().sort_values('total_amount', ascending=False)
print(df_sample1.groupby(['seller_name','article_name'])['total_amount'].sum().to_frame().sort_values('total_amount', ascending=False).head(5))
MONTO VENDEDOR / ARTICULO
total_amount
seller_name article_name
Janel O'Curran Full Pc $ 119,157.36
Brockie Patience Full Pc $ 97,879.26
Oliviero Charkham Full Pc $ 74,473.35
Vasily Danilyuk Full Pc $ 53,195.25
Aveline Swanwick Full Pc $ 44,684.01
# RESOLUCIÓN ANALÍTICA
print("MONTO VENDEDOR / PAIS / ARTICULO")
df_sample2 = my_df.set_index(['seller_name']).sort_index(ascending=[True])
#print(df_sample)
pd.options.display.float_format= '$ {:,.2f}'.format
#df = sns.load_dataset('df_sample')
df_sample2.groupby(['seller_name','country_name','article_name'])['total_amount'].sum().to_frame().sort_values('total_amount', ascending=False)
print(df_sample2.groupby(['seller_name','country_name','article_name'])['total_amount'].sum().to_frame().sort_values('total_amount', ascending=False).head(5))
df_test=(df_sample2.groupby(['seller_name','country_name','article_name'])['total_amount'].sum().to_frame().sort_values('total_amount', ascending=False).head(5))
df_test
MONTO VENDEDOR / PAIS / ARTICULO
total_amount
seller_name country_name article_name
Aveline Swanwick Brazil Full Pc $ 40,428.39
Brockie Patience Peru Full Pc $ 38,300.58
Argentina Full Pc $ 31,917.15
Janel O'Curran El Salvador Full Pc $ 31,917.15
Vasily Danilyuk Guatemala Full Pc $ 27,661.53
total_amountfloat64
('Aveline Swanwick', 'Brazil', 'Full Pc')
40428.39
('Brockie Patience', 'Peru', 'Full Pc')
38300.58
('Brockie Patience', 'Argentina', 'Full Pc')
31917.15
("Janel O'Curran", 'El Salvador', 'Full Pc')
31917.15
('Vasily Danilyuk', 'Guatemala', 'Full Pc')
27661.53
# RESOLUCIÓN ANALÍTICA
print("CANTIDAD VENDEDOR / PAIS")
df_sample3 = my_df.set_index(['seller_name']).sort_index(ascending=[True])
#print(df_sample)
pd.options.display.float_format= '$ {:,.2f}'.format
#df = sns.load_dataset('df_sample')
df_sample3.groupby(['seller_name','country_name'])['quantity'].count().to_frame().sort_values('quantity', ascending=False)
print(df_sample3.groupby(['seller_name','country_name'])['quantity'].count().to_frame().sort_values('quantity', ascending=False).head(10))
CANTIDAD VENDEDOR / PAIS
quantity
seller_name country_name
Aveline Swanwick Brazil 28
Daisie Slograve Brazil 25
Tobin Roselli Brazil 25
Jase Doy Brazil 25
Arnold Kilkenny Brazil 24
Oliviero Charkham Brazil 24
Onida Cosely Brazil 23
Janel O'Curran Brazil 22
Kati Innot Brazil 22
Cirilo Grandham Brazil 19
# RESOLUCIÓN ANALÍTICA
print("CANTIDAD VENDEDOR / ARTICULO")
df_sample4 = my_df.set_index(['seller_name']).sort_index(ascending=[True])
#print(df_sample)
pd.options.display.float_format= '$ {:,.2f}'.format
#df = sns.load_dataset('df_sample')
df_sample4.groupby(['seller_name','article_name'])['quantity'].count().to_frame().sort_values('quantity', ascending=False)
print(df_sample4.groupby(['seller_name','article_name'])['quantity'].count().to_frame().sort_values('quantity', ascending=False).head(5))
CANTIDAD VENDEDOR / ARTICULO
quantity
seller_name article_name
Tobin Roselli Power Supply 9
Aveline Swanwick Netbook 7
Daisie Slograve Sata Cable 7
Oliviero Charkham Headphones 7
Jase Doy Pci Express Port 6
# RESOLUCIÓN ANALÍTICA
print("CANTIDAD VENDEDOR / PAIS / ARTICULO")
df_sample5= my_df.set_index(['seller_name']).sort_index(ascending=[True])
#print(df_sample)
pd.options.display.float_format= '$ {:,.2f}'.format
#df = sns.load_dataset('df_sample')
df_sample5.groupby(['seller_name','country_name','article_name'])['quantity'].count().to_frame().sort_values('quantity', ascending=False)
print(df_sample5.groupby(['seller_name','country_name','article_name'])['quantity'].count().to_frame().sort_values('quantity', ascending=False).head(10))
CANTIDAD VENDEDOR / PAIS / ARTICULO
quantity
seller_name country_name article_name
Tobin Roselli Brazil Power Supply 5
Brockie Patience Brazil Motherboard 4
Oliviero Charkham Brazil Usb Cable 4
Cornie Wynrehame Brazil HDD 4
Oliviero Charkham Brazil Headphones 4
Cornie Wynrehame Argentina SDD 3
Kati Innot Brazil Modem 3
Tobin Roselli Brazil SDD 3
Aveline Swanwick Brazil Pci Express Port 3
Onida Cosely Brazil Chair 3