print('Muestra de datos')
print(df_articles.head()) # head() 5 Filas por defecto
print('\nFormato del dataframe')
print(df_articles.shape)
print('\nBúsqueda de valores nulls por columna')
print(df_articles.isnull().sum())
print('\nFormato de los datos por columna')
print(df_articles.dtypes)
print('Muestra de datos')
print(df_sellers.head())
print('\nFormato del dataframe')
print(df_sellers.shape)
print('\nBúsqueda de valores nulls por columna')
print(df_sellers.isnull().sum())
print('\nFormato de los datos por columna')
print(df_sellers.dtypes)
print('Muestra de datos')
print(df_orders.head())
print('\nFormato del dataframe')
print(df_orders.shape)
print('\nBúsqueda de valores nulls por columna')
print(df_orders.isnull().sum())
print('\nFormato de los datos por columna')
print(df_orders.dtypes)
my_df = df_orders.copy()
df_articles.set_index('article_id', inplace=True)
my_df = my_df.assign(article_name = my_df['article_id'])
my_df = my_df.assign(total_amount = my_df['article_id'])
my_df = my_df.assign(seller_name = my_df['seller_id'])
for i in range(max(my_df.count())):
article = df_articles.loc[my_df.loc[i]['article_id']]['article_name']
# print(article)
my_df.loc[i, 'article_name'] = article
my_df.loc[i, 'total_amount'] = my_df.loc[i, 'quantity']*df_articles.loc[my_df.loc[i]['article_id']]['unit_price']
my_df.loc[i, 'seller_name'] = df_sellers.loc[my_df.loc[i]['seller_id']]['seller_name']
my_df.drop(['order_id', 'article_id', 'seller_id'], axis='columns', inplace=True)
print(my_df)
print(my_df['article_name'].value_counts())
sns.displot(my_df, x='article_name', color = 'teal',kde=True)
plt.xticks(rotation=90, fontsize=9, fontweight='bold', color='black')
plt.xlabel("Artículo", size = 10)
plt.ylabel("Unidades vendidas", size = 10)
plt.title("Unidades vendidas por artículo", size = 14)
plt.show()
df2=(my_df.groupby(by='article_name').sum()).sort_values('total_amount', ascending=False).head(5)
pd.options.display.float_format= '$ {:,.2f}'.format
print(df2['total_amount'])
c = ['darkturquoise','teal','lightseagreen','aquamarine','paleturquoise']
plt.pie(x=df2['total_amount'],radius=1.1, autopct='%1.2f%%', colors = c,labels=df2.index, startangle=90)
plt.title("Top 5 artículos más vendidos ", size = 11)
plt.show()
df4 = (my_df.groupby(by='seller_name').sum()).sort_values('total_amount', ascending=False)
pd.options.display.float_format= '$ {:,.2f}'.format
print(df4[['quantity'] + ['total_amount']].head(5))
plt.bar(df4.index, df4['total_amount'],color=['darkslategrey','teal','lightseagreen','aquamarine','paleturquoise'])
plt.xticks(rotation=60)
plt.xlabel("Vendedor", size = 10)
plt.ylabel("Monto de ventas($)", size = 10)
plt.title("Top 5 mejores vendedores", size = 12)
plt.show()
df5 = (my_df.groupby(by='week').sum()).sort_values('total_amount', ascending=False)
pd.options.display.float_format= '$ {:,.2f}'.format
print(df5[['quantity'] + ['total_amount']])
plt.bar(df5.index, df5['total_amount'],color=['darkslategrey','teal','lightseagreen','aquamarine'])
plt.xticks([1,2,3,4], ['1', '2', '3', '4'], rotation=0, fontsize=10, fontweight='bold')
plt.xlabel("Semana", size = 10)
plt.ylabel("Monto de ventas($)", size = 10)
plt.title("Ventas por semana", size = 12)
plt.show()
print("TOP 3 PAISES CON MAYORES VENTAS")
df6 = (my_df.groupby(by='country_name').sum()).sort_values('total_amount', ascending=False).head(3)
print(df6[['quantity'] + ['total_amount']])
df7 = my_df[(my_df['country_name'] == 'Brazil')]
df7 = (df7.groupby(by='article_name').sum()).sort_values('total_amount', ascending=False).head(3)
df8 = my_df[(my_df['country_name'] == 'Argentina')]
df8 = (df8.groupby(by='article_name').sum()).sort_values('total_amount', ascending=False).head(3)
df9 = my_df[(my_df['country_name'] == 'Colombia')]
df9 = (df9.groupby(by='article_name').sum()).sort_values('total_amount', ascending=False).head(3)
print()
print("ARTICULOS CON MAYORES VENTAS - BRASIL")
print(df7[['quantity'] + ['total_amount']])
print()
print("ARTICULOS CON MAYORES VENTAS - ARGENTINA")
print(df8[['quantity'] + ['total_amount']])
print()
print("ARTICULOS CON MAYORES VENTAS - COLOMBIA")
print(df9[['quantity'] + ['total_amount']])
df10 = (my_df[(my_df['country_name'] == 'Brazil')]).head(3)
df11 = (my_df[(my_df['country_name'] == 'Argentina')]).head(3)
df12 = (my_df[(my_df['country_name'] == 'Colombia')]).head(3)
plt.bar(df6.index, df6['total_amount'],color=['darkslategrey','teal','aquamarine'])
plt.xticks([0,1,2], ['Brasil', 'Argentina', 'Colombia'], fontsize=10)
plt.xlabel("País", size = 10)
plt.ylabel("Monto de ventas($)", size = 10)
plt.title("Top 3 países con mayores ventas", size = 12)
plt.show()
c = ['violet','pink','blueviolet']
print()
print("Top 3 artículos más vendidos por país")
plt.subplot(1, 3, 1)
plt.pie(x=df10['quantity'],radius=1.0, autopct='%1.2f%%', colors = c,labels=['Notebook','Smartphone','Full Pc'], startangle=90)
plt.title("Brasil", size = 12)
plt.subplot(1, 3, 2)
plt.pie(x=df11['quantity'],radius=1.0, autopct='%1.2f%%', colors = c,labels=['Notebook','CPU','Full Pc'], startangle=90)
plt.title("Argentina", size = 12)
plt.subplot(1, 3, 3)
plt.pie(x=df12['quantity'],radius=1.0, autopct='%1.2f%%', colors = c,labels=['Smartphone','Notebook','Full Pc'], startangle=90)
plt.title("Colombia", size = 12)
plt.show()
print("Correlación entre ingresos y cantidad de unidades vendidas - Full Pc")
df7_pc = my_df[(my_df['article_name'] == 'Full Pc')|(my_df['country_name'] == 'Brazil')]
df8_pc = my_df[(my_df['article_name'] == 'Full Pc')|(my_df['country_name'] == 'Argentina')]
df9_pc = my_df[(my_df['article_name'] == 'Full Pc')|(my_df['country_name'] == 'Colombia')]
plt.title("Full Pc - Ventas vs Cantidad - Brasil,Argentina,Colombia", size = 12)
sns.scatterplot(y= 'quantity', x='total_amount', data = df7_pc)
sns.scatterplot(y= 'quantity', x='total_amount', data = df8_pc)
sns.scatterplot(y= 'quantity', x='total_amount', data = df9_pc)
print("Correlación entre ingresos y cantidad de unidades vendidas - Notebook")
df7_nb = my_df[(my_df['article_name'] == 'Notebook')|(my_df['country_name'] == 'Brazil')]
df8_nb = my_df[(my_df['article_name'] == 'Notebook')|(my_df['country_name'] == 'Argentina')]
df9_nb = my_df[(my_df['article_name'] == 'Notebook')|(my_df['country_name'] == 'Colombia')]
plt.title("Notebook - Ventas vs Cantidad venidas - Brasil,Argentina,Colombia", size = 12)
sns.scatterplot(y= 'quantity', x='total_amount', data = df7_nb)
sns.scatterplot(y= 'quantity', x='total_amount', data = df8_nb)
sns.scatterplot(y= 'quantity', x='total_amount', data = df9_nb)
print("Correlación Brasil,Argentina, Colombia")
df_3p = my_df[(my_df['country_name'] == 'Brazil') | (my_df['country_name'] == 'Argentina')| (my_df['country_name'] == 'Colombia')]
sns.pairplot(df_3p, hue='country_name')
print("5 ARTICULOS MÁS VENDIDOS")
df13 = (my_df.groupby(by='article_name').sum()).sort_values('total_amount', ascending=False).head(5)
print(df13[['quantity'] + ['total_amount']])
print()
print("FULL PC")
df14 = my_df.loc[my_df['article_name'] == 'Full Pc']
df14 = (df14.groupby(by='week').sum()).sort_values('total_amount', ascending=False)
print(df14)
print()
print("NOTEBOOK")
df15 = my_df.loc[my_df['article_name'] == 'Notebook']
df15 = (df15.groupby(by='week').sum()).sort_values('total_amount', ascending=False)
print(df15)
print()
print("SMARTPHONE")
df16 = my_df.loc[my_df['article_name'] == 'Smartphone']
df16 = (df16.groupby(by='week').sum()).sort_values('total_amount', ascending=False)
print(df16)
print()
print("CHAIR")
df17 = my_df.loc[my_df['article_name'] == 'Chair']
df17 = (df17.groupby(by='week').sum()).sort_values('total_amount', ascending=False)
print(df17)
print()
print("TABLET")
df18 = my_df.loc[my_df['article_name'] == 'Tablet']
df18 = (df18.groupby(by='week').sum()).sort_values('total_amount', ascending=False)
print(df18)
print()
print("FULL PC - ARGENTINA")
df19 = my_df.loc[(my_df['article_name'] == 'Full Pc')&(my_df['country_name'] == 'Argentina')]
df19 = (df19.groupby(by='week').sum()).sort_values('week', ascending=True)
print(df19)
print()
print("FULL PC - PERÚ")
df20 = my_df.loc[(my_df['article_name'] == 'Full Pc')&(my_df['country_name'] == 'Peru')]
df20 = (df20.groupby(by='week').sum()).sort_values('week', ascending=True)
print(df20)
print()
print("NOTEBOOK - ARGENTINA")
df21 = my_df.loc[(my_df['article_name'] == 'Notebook')&(my_df['country_name'] == 'Argentina')]
df21 = (df21.groupby(by='week').sum()).sort_values('week', ascending=True)
print(df21)
print()
print("NOTEBOOK - PERÚ")
df22 = my_df.loc[(my_df['article_name'] == 'Notebook')&(my_df['country_name'] == 'Peru')]
df22 = (df22.groupby(by='week').sum()).sort_values('week', ascending=True)
print(df22)
print()
print("SMARTPHONE - ARGENTINA")
df23 = my_df.loc[(my_df['article_name'] == 'Smartphone')&(my_df['country_name'] == 'Argentina')]
df23 = (df23.groupby(by='week').sum()).sort_values('week', ascending=True)
print(df23)
print()
print("SMARTPHONE - PERÚ")
df24 = my_df.loc[(my_df['article_name'] == 'Smartphone')&(my_df['country_name'] == 'Peru')]
df24 = (df24.groupby(by='week').sum()).sort_values('week', ascending=True)
print(df24)
plt.subplot(1, 3, 1)
plt.bar(df14.index, df14['total_amount'], color=['darkslategrey','teal','lightseagreen','aquamarine'])
plt.xticks([1,2,3,4],['1', '2', '3', '4'], rotation=0, fontsize=10)
plt.xlabel("Semana", size = 10)
plt.ylabel("Monto de ventas($)", size = 10)
plt.title("Ventas de Full Pc por semana", size = 12)
plt.subplot(1, 3, 3)
plt.bar(df15.index, df15['total_amount'], color=['darkslategrey','teal','lightseagreen','aquamarine'])
plt.xticks([1,2,3,4], ['1', '2', '3', '4'], rotation=0, fontsize=10)
plt.xlabel("Semana", size = 10)
plt.ylabel("Monto de ventas($)", size = 10)
plt.title("Ventas de Notebook por semana", size = 12)
plt.show()
plt.subplot(1, 3, 1)
plt.bar(df16.index, df16['total_amount'], color=['darkslategrey','teal','lightseagreen','aquamarine'])
plt.xticks([1,2,3,4], ['1', '2', '3', '4'], rotation=0, fontsize=10)
plt.xlabel("Semana", size = 10)
plt.ylabel("Monto de ventas($)", size = 10)
plt.title("Ventas de Smartphone por semana", size = 12)
plt.subplot(1, 3, 3)
plt.bar(df17.index, df17['total_amount'], color=['darkslategrey','teal','lightseagreen','aquamarine'])
plt.xticks([1,2,3,4], ['1', '2', '3', '4'], rotation=0, fontsize=10)
plt.xlabel("Semana", size = 10)
plt.ylabel("Monto de ventas($)", size = 10)
plt.title("Ventas de Chair por semana", size = 12)
plt.show()
plt.subplot(1, 3, 2)
plt.bar(df18.index, df18['total_amount'], color=['darkslategrey','teal','lightseagreen','aquamarine'])
plt.xticks([1,2,3,4], ['1', '2', '3', '4'], rotation=0, fontsize=10)
plt.xlabel("Semana", size = 10)
plt.ylabel("Monto de ventas($)", size = 10)
plt.title("Ventas de Tablet por semana", size = 12)
plt.show()
df_f1 = my_df.loc[(my_df['article_name'] == 'Full Pc')&(my_df['country_name'] == 'Brazil')]
df_f2 = my_df.loc[(my_df['article_name'] == 'Full Pc')&(my_df['country_name'] == 'Argentina')]
df_f3 = my_df.loc[(my_df['article_name'] == 'Notebook')&(my_df['country_name'] == 'Brazil')]
df_f4 = my_df.loc[(my_df['article_name'] == 'Notebook')&(my_df['country_name'] == 'Argentina')]
plt.title("Vendedor vs Semana - Full Pc y Notebook - Argentina y Brasil", size = 12)
sns.scatterplot(y= 'seller_name', x='week', data = df_f1)
sns.scatterplot(y= 'seller_name', x='week', data = df_f2)
sns.scatterplot(y= 'seller_name', x='week', data = df_f3)
sns.scatterplot(y= 'seller_name', x='week', data = df_f4)
print("5 Mejores vendedores")
df25 = (my_df.groupby(by='seller_name').sum()).sort_values('total_amount', ascending=False).head(5)
pd.options.display.float_format= '$ {:,.2f}'.format
print(df25[['quantity'] + ['total_amount']])
print()
print("TOP 3 MEJORES VENDEDORES BRASIL")
df26 = my_df.loc[(my_df['country_name'] == 'Brazil')]
df26 = (df26.groupby(by='seller_name').sum()).sort_values('total_amount', ascending=False).head(3)
print(df26[['quantity'] + ['total_amount']])
print()
print("TOP 3 MEJORES VENDEDORES ARGENTINA")
df27 = my_df.loc[(my_df['country_name'] == 'Argentina')]
df27 = (df27.groupby(by='seller_name').sum()).sort_values('total_amount', ascending=False).head(3)
print(df27[['quantity'] + ['total_amount']])
plt.title("TOP 5 MEJORES VENDEDORES", size = 11)
c = ['darkslategrey','teal','lightseagreen','aquamarine','paleturquoise']
e=(0.3,0,0,0,0)
plt.pie(x=df25['total_amount'],radius=1.0,explode = e, autopct='%1.2f%%', colors = c,labels=df25.index, startangle=90)
plt.show()
plt.subplot(1, 3, 1)
plt.title("Brasil", size = 12)
c = ['violet','pink','blueviolet']
e=(0.2,0,0)
plt.pie(x=df26['total_amount'],radius=1.2, explode = e, autopct='%1.2f%%', colors = c,labels=df26.index, startangle=90)
plt.subplot(1, 3, 3)
plt.title("Argentina", size = 12)
c = ['violet','pink','blueviolet']
e=(0.2,0,0)
plt.pie(x=df27['total_amount'],radius=1.2, explode = e, autopct='%1.2f%%', colors = c,labels=df27.index, startangle=90)
plt.show()
df_f5 = my_df.loc[(my_df['country_name'] == 'Chile')]
df_f6 = my_df.loc[(my_df['country_name'] == 'Mexico')]
plt.title("Vendedores por semana - Chile, México", size = 12)
sns.scatterplot(y= 'seller_name', x='week', data = df_f5)
sns.scatterplot(y= 'seller_name', x='week', data = df_f6)
sns.pairplot(df_f5)
sns.pairplot(df_f6)