6 TP Final Integrador

# A veces necesitamos instalar nuevas librerías en nuestros proyectos !pip install openpyxl==3.0.10

# Levanto los datos en 3 diferentes dataframes con = sq3.connect('/work/data/articles.db') sql_query = pd.read_sql_query('SELECT * FROM articles', con) df_articles = pd.DataFrame(sql_query, columns=['article_id', 'article_name', 'unit_price']) df_sellers = pd.read_excel('/work/data/sellers.xlsx', index_col=0) df_orders =pd.read_csv('/work/data/orders.csv') print(df_orders)

#Cambio de indice del df_articles df_articles.set_index('article_id', inplace=True) print(df_articles.head())

#Creo una copia de df_orders y le agrego las columnas que necesito my_df = df_orders.copy() my_df = my_df.assign(article_name = my_df['article_id']) my_df = my_df.assign(total_amount = my_df['article_id']) my_df = my_df.assign(seller_name = my_df['seller_id']) print(my_df.head())

#Reemplazar los valores reales en el df for i in range (len(my_df.index)): #len(my_df.index) devuelva la cantidad de filas (registros) #reemplazo el nombre del articulo usando el id guardado en my_df article = df_articles.loc[my_df.loc[i]['article_id']]['article_name'] my_df.loc[i, 'article_name'] = article #reemplazo el nombre del vendedor usando el id guardado en my_df my_df.loc[i, 'seller_name'] = df_sellers.loc[my_df.loc[i]['seller_id']]['seller_name'] #busco el pecio unitario y lo multiplico por la cantidad de unidades vendidas. my_df.loc[i,'total_amount'] = df_articles.loc[my_df.loc[i]['article_id']]['unit_price']*my_df.loc[i,'quantity'] #elimino las columnas que no utilizo my_df.drop(['article_id', 'seller_id', 'order_id'], axis='columns', inplace=True) print(my_df.head())

# RESOLUCIÓN ANALÍTICA my_df2 = my_df.groupby('article_name').sum() por_cantidad= my_df2.sort_values('quantity', ascending=False) print(por_cantidad['quantity'].head(1)) ''' # Otra RESOLUCIÓN ANALÍTICA my_df2 = my_df.groupby('article_name').sum() print(my_df2['quantity'].max()) De esta otra manera, sólo nos devuelve el número (413) '''

# RESOLUCIÓN GRÁFICA sns.barplot(x=my_df2.index, y=my_df2['quantity'], data=my_df2, order= my_df2.sort_values('quantity', ascending=False).index) plt.xticks(rotation=90) plt.show()

# OTRA RESOLUCIÓN GRÁFICA plt.figure(figsize=(5,8)) plt.barh(por_cantidad.index, por_cantidad['quantity'],color= 'green', alpha= 0.8) plt.xlabel('Cantidad vendida') plt.ylabel('Producto') plt.title('Unidades vendidas por producto') plt.show()

#RESOLUCIÓN ANALÍTICA my_df2 = my_df.groupby('article_name').sum() por_precio = my_df2.sort_values('total_amount', ascending=False).head() print(por_precio['total_amount'].head())

# RESOLUCIÓN GRÁFICA # OJO: Solo graficamos los 5 primeros productos, si ponemos el porcentaje de cada uno en el grafico, # no sería el numero real. plt.pie(x=por_precio['total_amount'], labels=por_precio.index) plt.show()

# RESOLUCIÓN ANALÍTICA df3= my_df.groupby('seller_name').sum().sort_values('total_amount', ascending=False).head() print(df3[['quantity']+['total_amount']])

# RESOLUCIÓN GRÁFICA df3= my_df.groupby('seller_name').sum().sort_values('total_amount', ascending=False).head() plt.bar(df3.index,df3['total_amount']) plt.xticks(rotation=25) plt.show()

# RESOLUCIÓN ANALÍTICA df4= my_df.groupby('week').sum() print(df4)

# RESOLUCIÓN GRÁFICA plt.bar(df4.index, df4['total_amount']) plt.show()

# RESOLUCIÓN df5 = my_df.groupby('country_name').sum() por_cantidad = df5.sort_values('quantity', ascending=False).head(1) por_cantidad['quantity']

df6=my_df[(my_df['country_name'])== 'Brazil'] df11=df6.groupby(['week','seller_name'],as_index=False).sum() print(df11[['week']+['seller_name'] + ['total_amount']].sort_values('total_amount', ascending=False)) sns.set(rc={'figure.figsize':(15,8)}) sns.barplot(x='week', y='total_amount',data=df11, hue='seller_name').set(title='Cantidad de unidades despachadas por vendedor') plt.legend(bbox_to_anchor= (1.2,1)) plt.show()

# RESOLUCIÓN df7=my_df.groupby('seller_name').agg(promedio=('total_amount',np.mean)).round(2) #df7= my_df.groupby('seller_name')[['total_amount']].mean().round(2).sort_values('total_amount', ascending=False) print(df7)