TP Final Integrador - 22044 - Maria Laura Iacovo

# Exploración del df de artículos print('Muestra de datos') print(df_articles.head()) # head() 5 Filas por defecto print('\nFormato del dataframe') print(df_articles.shape) print('\nBusqueda de valores nuls por columna') print(df_articles.isnull().sum()) print('\nFormato de los datos por columna') print(df_articles.dtypes)

# Exploración del df de vendedores print('Muestra de datos') print(df_sellers.head()) # head() 5 Filas por defecto print('\nFormato del dataframe') print(df_sellers.shape) print('\nBusqueda de valores nuls por columna') print(df_sellers.isnull().sum()) print('\nFormato de los datos por columna') print(df_sellers.dtypes)

# Exploración del df de órdenes print('Muestra de datos') print(df_orders.head()) # head() 5 Filas por defecto print('\nFormato del dataframe') print(df_orders.shape) print('\nBusqueda de valores nuls por columna') print(df_orders.isnull().sum()) print('\nFormato de los datos por columna') print(df_orders.dtypes)

my_df = df_orders.copy() # shallow copy o copia superficial (otro Objeto alojado en la memoria) # Cambio el indice del df de artículos df_articles.set_index('article_id', inplace=True) my_df = my_df.assign(article_name = my_df['article_id']) my_df = my_df.assign(total_amount = my_df['article_id']) my_df = my_df.assign(seller_name = my_df['seller_id']) # df_articles # print() # my_df for i in range(max(my_df.count())): # SINTAXIS: df_articles.loc[indice][columna] # [indice] va a ser el dato que obtengo de my_df.loc[i]['article_id'] # [indice] -> [my_df.loc[i]['article_id']] # o sea, tomo registro a registro el article_id y lo uso para extraer el nombre del artículo (article_name) # print(df_articles.loc[my_df.loc[i]['article_id']]['article_name']) article = df_articles.loc[my_df.loc[i]['article_id']]['article_name'] # print(article) # Asignar a cada valor id de la columna 'article_name' (my_df) el nombre del artículo my_df.loc[i, 'article_name'] = article # hacemos lo mismo con total amount my_df.loc[i, 'total_amount'] = my_df.loc[i, 'quantity']*df_articles.loc[my_df.loc[i]['article_id']]['unit_price'] # columna de seller_name my_df.loc[i, 'seller_name'] = df_sellers.loc[my_df.loc[i]['seller_id']]['seller_name'] # elimino las columnas que no necesito del df my_df.drop(['order_id', 'article_id', 'seller_id'], axis='columns', inplace=True) print(my_df)

# RESOLUCIÓN ANALÍTICA print("Los artículos y sus cantidades vendidas") print(my_df['article_name'].value_counts()) # cuenta valores únicos

# RESOLUCIÓN GRÁFICA df0 = (my_df.groupby(by='article_name').sum()).sort_values('total_amount', ascending=False) plt.bar(df0.index,df0 ['total_amount'], color='lightsalmon') plt.title("UNIDADES VENDIDAS POR ARTÍCULO") plt.xlabel("UNIDADES VENDIDAS") plt.ylabel("ARTÍCULO") plt.xticks(rotation=90) plt.show()

# RESOLUCIÓN ANALÍTICA df2 = (my_df.groupby(by='article_name').sum()).sort_values('total_amount', ascending=False).head(5) pd.options.display.float_format='$ {:,.2f}'.format print(df2['total_amount'])

# RESOLUCIÓN GRÁFICA c=['crimson', 'gold', 'chocolate', 'plum', 'mediumseagreen'] plt.pie(x=df2['total_amount'], autopct='%1.1f%%', colors=c, labels=df2.index) plt.show() # index es el 'article_name' del df2

# RESOLUCIÓN ANALÍTICA df4 = (my_df.groupby(by='seller_name').sum()).sort_values('total_amount', ascending=False) pd.options.display.float_format='$ {:,.2f}'.format # print(df4[['quantity'] + ['total_amount']]) print(df4[['quantity'] + ['total_amount']].head(5))

# RESOLUCIÓN GRÁFICA plt.bar(df4.index,df4 ['total_amount'], color='darkcyan') plt.title("VENTAS TOTALES POR VENDEDOR") plt.xlabel("Nombre del vendedor") plt.ylabel("Ventas totales") plt.xticks(rotation=90) plt.show()

# RESOLUCIÓN ANALÍTICA df5 = (my_df.groupby(by='week').sum()).sort_values('total_amount', ascending=False) pd.options.display.float_format='$ {:,.1f}'.format print(df5)

# RESOLUCIÓN GRÁFICA plt.bar(df5.index, df5['total_amount'], color='mediumpurple') plt.title("EVOLUCIÓN DE VENTAS POR SEMANA") plt.xlabel("Semanas") plt.ylabel("Ventas totales") plt.xticks(np.arange(0,5,1)) plt.show()

print("Total de ventas por países y cantidad de productos vendidos") df6 = (my_df.groupby(by='country_name').sum()).sort_values('total_amount', ascending=False) print(df6[['total_amount'] + ['quantity']].head(20))

BRA_df = my_df[(my_df['country_name'] == 'Brazil')] BRA_df

PRI_df = my_df[(my_df['country_name'] == 'Puerto Rico')] PRI_df

print("RESOLUCIÓN GRÁFICA") sns.barplot(df6.index, df6['total_amount'],palette='copper') plt.title("VENTAS TOTALES POR PAÍS") plt.xlabel("País") plt.ylabel("Ventas totales") plt.xticks(rotation=90) plt.show()

print("Artículo con más ingresos en ventas en Brasil:") df7 = (BRA_df.groupby(by='article_name').sum()).sort_values('total_amount', ascending=False) print(df7[['total_amount'] + ['quantity']].head(1)) print() print("Artículo más elegido (en unidades):") df7b = (BRA_df.groupby(by='article_name').sum()).sort_values('quantity', ascending=False) print(df7b[['quantity'] + ['total_amount']].head(1)) print() print("Artículo con menos ingresos en ventas en Brasil:") df7c = (BRA_df.groupby(by='article_name').sum()).sort_values('total_amount', ascending=True) print(df7c[['total_amount'] + ['quantity']].head(1)) print() print("Artículo menos elegido (en unidades):") df7d = (BRA_df.groupby(by='article_name').sum()).sort_values('quantity', ascending=True) print(df7d[['quantity'] + ['total_amount']].head(1))

print("RESOLUCIÓN GRÁFICA") sns.barplot(df7.index, df7['total_amount'],palette='Spectral_r') plt.xticks(rotation=90) plt.title("VENTAS TOTALES POR ARTÍCULO EN BRASIL") plt.xlabel("Artículo") plt.ylabel("Ventas totales") plt.show() sns.barplot(df7b.index, df7b['quantity'],palette='afmhot') plt.xticks(rotation=90) plt.title("CANTIDADES VENDIDAS POR ARTÍCULO EN BRASIL") plt.xlabel("Artículo") plt.ylabel("Cantidades totales") plt.show()

ARG_df = my_df[(my_df['country_name'] == 'Argentina')] ARG_df

print("Artículo con más ingresos en ventas en Argentina:") df8 = (ARG_df.groupby(by='article_name').sum()).sort_values('total_amount', ascending=False) print(df8[['total_amount'] + ['quantity']].head(1)) print() print("Artículo más elegido (en unidades):") df8b = (ARG_df.groupby(by='article_name').sum()).sort_values('quantity', ascending=False) print(df8b[['quantity'] + ['total_amount']].head(1)) print() print("Artículo con menos ingresos en ventas en Argentina:") df8c = (ARG_df.groupby(by='article_name').sum()).sort_values('total_amount', ascending=True) print(df8c[['total_amount'] + ['quantity']].head(1)) print() print("Artículo menos elegido (en unidades):") df8d = (ARG_df.groupby(by='article_name').sum()).sort_values('quantity', ascending=True) print(df8d[['quantity'] + ['total_amount']].head(1))

print("RESOLUCIÓN GRÁFICA") sns.barplot(df8.index, df8['total_amount'], palette='vlag') plt.xticks(rotation=90) plt.title("VENTAS TOTALES POR ARTÍCULO EN ARGENTINA") plt.xlabel("Artículo") plt.ylabel("Ventas totales") plt.show() sns.barplot(df8b.index, df8b['quantity'],palette='Wistia') plt.xticks(rotation=90) plt.title("CANTIDADES VENDIDAS POR ARTÍCULO EN ARGENTINA") plt.xlabel("Artículo") plt.ylabel("Cantidades totales") plt.show()

print("Evolución de las ventas del artículo -Full Pc- por semana en Brasil") df9a=BRA_df[(BRA_df['article_name'] == 'Full Pc')] df9b=(df9a.groupby(by='week').sum()).sort_values('total_amount', ascending=False) print(df9b) print() print("Evolución de las ventas del artículo -Full Pc- por semana en Argentina") df9c=ARG_df[(ARG_df['article_name'] == 'Full Pc')] df9d=(df9c.groupby(by='week').sum()).sort_values('week', ascending=True) print(df9d) print() print("Evolución de las ventas del artículo -Sata Cable- por semana en Brasil") df9e=BRA_df[(BRA_df['article_name'] == 'Sata Cable')] df9f=(df9e.groupby(by='week').sum()).sort_values('total_amount', ascending=False) print(df9f) print() print("Evolución de las ventas del artículo -Pci Express Port- por semana en Argentina") df9g=ARG_df[(ARG_df['article_name'] == 'Pci Express Port')] df9h=(df9g.groupby(by='week').sum()).sort_values('total_amount', ascending=False) print(df9h)

print("RESOLUCIÓN GRÁFICA") sns.catplot(x="article_name", y="total_amount",hue="week", kind="box", palette='nipy_spectral',data=df9a) sns.catplot(x="article_name", y="total_amount",hue="week", kind="box", palette='gnuplot2',data=df9e)

sns.catplot(x="article_name", y="total_amount",hue="week", kind="box", palette='CMRmap',data=df9c) sns.catplot(x="article_name", y="total_amount",hue="week", kind="bar", palette='bone',data=df9g)