# A veces necesitamos instalar nuevas librerías en nuestros proyectos
!pip install openpyxl==3.0.10
# imports
import numpy as np
import pandas as pd
import sqlite3 as sql3
import matplotlib.pyplot as plt
import seaborn as sns
# Levanto los datos en 3 diferentes dataframes
#Articulos
conn=sql3.connect("/work/data/articles.db")
sql_query = pd.read_sql_query("SELECT * FROM articles",conn)
df_articles = pd.DataFrame(sql_query,columns=["article_id","article_name","unit_price"])
print(df_articles)
#Vendedores
df_sellers =pd.read_excel("/work/data/sellers.xlsx",index_col=0)
print(df_sellers)
#Órdenes
df_orders=pd.read_csv("/work/data/orders.csv")
print(df_orders)
# Exploración del df de artículos
print("Muestra de datos")
print(df_articles.head())
print("\nFormato del dataframe")
print(df_articles.shape)
print("\nBúqueda de valores nulos")
print(df_articles.isnull().sum())
print("\nFormato de los datos")
print(df_articles.dtypes)
# Exploración del df de vendedores
print("Muestra de datos")
print(df_sellers.head())
print("\nFormato del dataframe")
print(df_sellers.shape)
print("\nBúqueda de valores nulos")
print(df_sellers.isnull().sum())
print("\nFormato de los datos")
print(df_sellers.dtypes)
# Exploración del df de órdenes
print("Muestra de datos")
print(df_orders.head())
print("\nFormato del dataframe")
print(df_orders.shape)
print("\nBúqueda de valores nulos")
print(df_orders.isnull().sum())
print("\nFormato de los datos")
print(df_orders.dtypes)
df_articles["unit_price"]= df_articles["unit_price"].astype(float)
print(df_articles.dtypes)
#Creo una copia del df_orders
my_df=df_orders.copy()
#Cambio el índice del df_articles
df_articles.set_index("article_id", inplace=True)
print(df_articles.loc[20017]["article_name"])
print(df_articles.head())
print(my_df.head())
#Agrego las columnas que me faltan
my_df= my_df.assign(article_name=my_df["article_id"])
my_df= my_df.assign(total_amount=my_df["article_id"])
my_df= my_df.assign(seller_name=my_df["seller_id"])
print(my_df.head())
#Reemplazar los datos en las nuevas columnas
#Lista de los índices
#print(my_df.index)
#print(len(my_df.index))
for i in range(len(my_df.index)):
#columna article_name
#Cargo el nombre del artículo en una variable
article=df_articles.loc[my_df.loc[i]["article_name"]]["article_name"]
#se lo asigno a la columna y registro que corresponde
my_df.loc[i,"article_name"]=article
#columna total_amount
my_df.loc[i,"total_amount"]=my_df.loc[i,"quantity"]*df_articles.loc[my_df.loc[i]["total_amount"]]["unit_price"]
#columna de seller_name
my_df.loc[i,"seller_name"]=df_sellers.loc[my_df.loc[i]["seller_name"]]["seller_name"]
print(my_df.head())
#Elimino las coumnas que no necesito
my_df.drop(["order_id","article_id","seller_id"],axis="columns",inplace=True)
print(my_df.head())
#Método merge
d1=pd.DataFrame({"mes":["ene","feb","mar","abr"],"ventas":[10,20,30,15]})
d2=pd.DataFrame({"mes":["ene","feb","mar","abr"],"costos":[7,16,25,12]})
print(pd.merge(d1,d2))
# RESOLUCIÓN ANALÍTICA
my_df2=my_df.groupby("article_name").sum()
por_cant=my_df2.sort_values("quantity",ascending=False)
print(por_cant["quantity"].head(1))
# RESOLUCIÓN GRÁFICA
sns.displot(my_df, x="article_name")
plt.xticks(rotation=90)
plt.show
# RESOLUCIÓN ANALÍTICA
my_df3=(my_df.groupby("article_name").sum()).sort_values("total_amount",ascending=False).head(5)
print(my_df3["total_amount"])
# RESOLUCIÓN GRÁFICA
plt.pie(x=my_df3["total_amount"],autopct="%1.2f%%",labels=my_df3.index)
plt.show()
# RESOLUCIÓN ANALÍTICA
df4=(my_df.groupby("seller_name").sum()).sort_values("total_amount",ascending=False)
print(df4[["quantity"]+["total_amount"]])
# RESOLUCIÓN GRÁFICA
plt.bar(df4.index,df4["total_amount"])
plt.xticks(rotation=60)
plt.show()
# RESOLUCIÓN ANALÍTICA
df5=(my_df.groupby("week").sum()).sort_values("total_amount",ascending=False)
print(df5)
# RESOLUCIÓN GRÁFICA
plt.bar(df5.index,df5["total_amount"])
plt.show()
# RESOLUCIÓN ANALÍTICA
df6=(my_df.groupby("country_name").sum()).sort_values("total_amount",ascending=False).head(5)
print(df6[["quantity"]+["total_amount"]])
#RESOLUCION GRAFICA
plt.barh(df6.index,df6["total_amount"])
plt.xticks(rotation=0)
plt.show()
# RESOLUCIÓN ANALÍTICA 1
df7=(my_df.groupby("country_name").sum()).sort_values("total_amount",ascending=False).tail(6)
print(df7[["quantity"]+["total_amount"]])
#RESOLUCION GRAFICA
plt.barh(df7.index,df7["total_amount"])
plt.xticks(rotation=0)
plt.show()
# RESOLUCIÓN ANALÍTICA 2
df8=(my_df.groupby("article_name").sum()).sort_values("total_amount",ascending=False).tail(6)
print(df8[["quantity"]+["total_amount"]])
#RESOLUCION GRAFICA
plt.barh(df8.index,df8["total_amount"])
plt.xticks(rotation=45)
plt.show()
ff=my_df[((my_df["article_name"]=="Keyboard")|(my_df["article_name"]=="Pci Express Port")|(my_df["article_name"]=="Heatsink")|(my_df["article_name"]=="Fan Cooler")|(my_df["article_name"]=="Usb Cable")|(my_df["article_name"]=="Sata Cable"))&((my_df["country_name"]=="Brazil")|(my_df["country_name"]=="Argentina")|(my_df["country_name"]=="Colombia")|(my_df["country_name"]=="Peru")|(my_df["country_name"]=="Mexico"))]
sns.barplot(x="country_name",y="total_amount",hue="week",data=ff)
sns.relplot(x="total_amount",y="country_name",col="week",data=ff)
bb=my_df[((my_df["article_name"]=="Keyboard")|(my_df["article_name"]=="Pci Express Port")|(my_df["article_name"]=="Heatsink")|(my_df["article_name"]=="Fan Cooler")|(my_df["article_name"]=="Usb Cable")|(my_df["article_name"]=="Sata Cable"))&((my_df["country_name"]=="Chile")|(my_df["country_name"]=="Bolivia")|(my_df["country_name"]=="Uruguay")|(my_df["country_name"]=="Ecuador")|(my_df["country_name"]=="Paraguay")|(my_df["country_name"]=="Puerto Rico"))]
sns.barplot(x="country_name",y="total_amount",hue="week",data=bb)
sns.relplot(x="total_amount",y="country_name",col="week",data=bb)