TP Final Integrador - Paula Farias

# Do an upgrade of pip last_pip_version = '22.1.2' # get pip version installed_pip_version = !pip --version installed_pip_version = installed_pip_version.fields(1) installed_pip_version if(last_pip_version != installed_pip_version[0]): print(f'Installing version {last_pip_version}...') !python3 -m pip install --upgrade pip else: print(f'Version {last_pip_version} is already installed')

# A veces necesitamos instalar nuevas librerías en nuestros proyectos #list all installed packages installed_pkgs = !pip list #save installed packages names in a list installed_pkgs_list = installed_pkgs.fields(0) #delete installed_pkgs_arr[0] and [1] because they are not packages names del installed_pkgs_list[0:2] print(installed_pkgs_list) #initialize variables of packages we require for this project req_pkgs = ['openpyxl','pandas','numpy','matplotlib','seaborn','geopandas','geopy'] installed = [] #for each package in the list for pkg in installed_pkgs_list: #if pkg is already installed if(pkg in req_pkgs): print(f'{pkg} pkg is already installed') installed.append(pkg) #if a required pkg is not installed for pkg in req_pkgs: if(pkg not in installed): print(f'Installing {pkg} pkg...') !pip install {pkg}

# required pkgs imports import pandas as pd import sqlite3 import matplotlib.pyplot as plt import matplotlib as mpl import numpy as np import openpyxl import seaborn as sns import geopandas as gpd from geopandas import GeoDataFrame import geopy

# Levanto los datos en 3 diferentes dataframes #--- DB --- #sql connection articles_data = sqlite3.connect('/work/data/articles.db') #sql query articles_query= "SELECT * from articles" #create dataframe df_articles_original = pd.read_sql(articles_query, articles_data) #close connection articles_data.close() #--- XLSX --- #file path sellers_data = '/work/data/sellers.xlsx' #create dataframe df_sellers_original = pd.read_excel(sellers_data) #--- CSV --- #file path orders_data = '/work/data/orders.csv' #create dataframe df_orders_original = pd.read_csv(orders_data)

#----- Exploración del df de artículos ----- df_articles = df_articles_original.copy() print(df_articles.head())

#check df columns dtypes print(df_articles.dtypes)

#check df info to get entries, nulls and dtypes print(df_articles.info())

#----- Exploración del df de vendedores ----- df_sellers = df_sellers_original.copy() print(df_sellers.head())

#check df columns dtypes print(df_sellers.dtypes)

#check df info to get entries, nulls and dtypes print(df_sellers.info())

#----- Exploración del df de órdenes ----- df_orders = df_orders_original.copy() print(df_orders.head())

#check df columns dtypes print(df_orders.dtypes)

print(df_orders.info())

#set index to article_id and rename axis df_articles.set_index('article_id', inplace=True) df_articles.index.name = None df_articles.rename_axis('article_id', axis="columns", inplace=True) #convert unit_price column to float64 df_articles = df_articles.astype({'unit_price':'float64'}, copy=True, errors='raise') print(df_articles.dtypes)

#set index to seller_id and rename axis df_sellers.set_index('seller_id', inplace=True) df_sellers.index.name = None df_sellers.rename_axis('seller_id', axis="columns", inplace=True) print(df_articles.dtypes)

#set index to order_id and rename axis df_orders.set_index('order_id', inplace=True) df_orders.index.name = None df_orders.rename_axis('order_id', axis="columns", inplace=True) print(df_orders.dtypes)

#merge df_orders with df_sellers on seller_id my_df = pd.merge(df_orders, df_sellers, left_on='seller_id',right_on=df_sellers.index) #merge my_df with df_articles on article_id my_df = pd.merge(my_df, df_articles, left_on='article_id', right_on=df_articles.index) #add total_amount column to my_df my_df['total_amount'] = my_df['quantity'] * my_df['unit_price'] #set index to order_id and rename axis my_df.set_index('article_id', inplace=True) my_df.index.name = None #my_df.rename_axis('article_id', axis="columns", inplace=True) my_df

# --- SEABORN PALETTE FOR PLOTS --- palette='pastel' color_blue = sns.color_palette(palette)[0] color_orange = sns.color_palette(palette)[1] color_green = sns.color_palette(palette)[2] color_red = sns.color_palette(palette)[3] color_purple = sns.color_palette(palette)[4] color_brown = sns.color_palette(palette)[5] color_pink = sns.color_palette(palette)[6] color_grey = sns.color_palette(palette)[7] color_yellow = sns.color_palette(palette)[8] color_turquoise = sns.color_palette(palette)[9] palette='dark' color_blue_dark = sns.color_palette(palette)[0] color_orange_dark = sns.color_palette(palette)[1] color_green_dark = sns.color_palette(palette)[2] color_red_dark = sns.color_palette(palette)[3] color_purple_dark = sns.color_palette(palette)[4] color_brown_dark = sns.color_palette(palette)[5] color_pink_dark = sns.color_palette(palette)[6] color_grey_dark = sns.color_palette(palette)[7] color_yellow_dark = sns.color_palette(palette)[8] color_turquoise_dark = sns.color_palette(palette)[9]

# RESOLUCIÓN ANALÍTICA #groupby article_name on the column quantity and sum them selled_articles = my_df.groupby(['article_name'])[['article_name','quantity']].sum(['quantity']) selled_articles.index.name = 'article_name' #get the most selled article name most_selled = selled_articles.idxmax() #print(most_selled['quantity']) #get the most selled article quantity most_selled_qty = selled_articles.max() #print(most_selled_qty['quantity']) print(f'(ES) El artículo más vendido fue "{most_selled["quantity"]}" con {most_selled_qty["quantity"]} unidades vendidas.') print(f'(EN) The most selled article was "{most_selled["quantity"]}" with {most_selled_qty["quantity"]} units selled.')

# get top 5 articles top_5_articles = selled_articles.sort_values(by="quantity", ascending=False).head() # creating a np array for top_5_articles top_5_articles_arr = np.array([top_5_articles.index, top_5_articles["quantity"]]) print(top_5_articles_arr)

# RESOLUCIÓN GRÁFICA # ----- CREATE PLOT FIGURE ----- plt.figure(figsize=(10,6)) # ----- CREATE PLOT BAR ----- top_5_articles_plot = plt.bar( top_5_articles_arr[0], #x axis top_5_articles_arr[1], #y axis bottom=0, #y axis starts from width=0.7, #bars width align='center', #bars align color=color_orange, #'orange', #bars color edgecolor='black', #bars edgecolor (border) linewidth=0.5, #bars edge line width ) # --- PLOT LABEL --- plt.bar_label( top_5_articles_plot, padding=-80, color='white', fontweight='bold' ) # --- PLOT GRID --- plt.grid(visible=False) # --- PLOT TITLE --- # PLOT TITLE TEXT plot_title = 'Top 5 selled articles' # PLOT TITLE LOCATION plot_title_loc='center' # PLOT TITLE FONT plot_title_font={ 'fontsize': 16, 'fontweight': 'bold', 'fontname':'monospace', 'color': color_orange_dark, 'verticalalignment': 'center', 'horizontalalignment': 'center' } # PLOT TITLE PADDING plot_title_pad = 30 # CREATE PLOT TITLE plt.title( label=plot_title, loc=plot_title_loc, fontdict=plot_title_font, pad=plot_title_pad ) # ----- AXIS X ----- # X LABEL TEXT xlabel_text= 'Articles' # X LABEL LOCATION xlabel_loc = 'center' # X LABEL PADDING xlabel_pad = 20 # X LABEL FONT xlabel_font={ 'fontsize': 12, 'fontweight': 'normal', 'color': 'black', 'verticalalignment': 'center', 'horizontalalignment': 'center' } # CREATE X LABEL plt.xlabel( xlabel=xlabel_text, loc=xlabel_loc, labelpad=xlabel_pad, fontdict=xlabel_font ) # CREATE X TICKS plt.xticks( top_5_articles_arr[0], rotation=0 ) # ----- AXIS Y ----- # Y LABEL TEXT ylabel_text= 'Qty' # Y LABEL LOCATION ylabel_loc = 'center' # Y LABEL LOCATION ylabel_pad = 20 # Y LABEL FONT ylabel_font={ 'fontsize': 12, 'fontweight': 'normal', 'color': 'black', 'verticalalignment': 'center', 'horizontalalignment': 'center' } # CREATE Y LABEL plt.ylabel( ylabel=ylabel_text, loc=ylabel_loc, labelpad=ylabel_pad, fontdict=ylabel_font, ) # --- removing the top and right axes spines sns.despine() # --- SAVE FIG --- plt.savefig('img/top_5_selled_articles.png') # --- SHOW PLOT --- plt.show()

# RESOLUCIÓN ANALÍTICA #groupby article_name on the column total_amount and sum them articles_income = my_df.groupby(['article_name'])[['article_name','total_amount']].sum(['total_amount']) articles_income.index.name = 'article_name' #get the most selled article name most_selled = articles_income.idxmax() #get the most selled article total amount most_selled_total_amount = articles_income.max().astype(int) print(f'(ES) El artículo más vendido fue "{most_selled["total_amount"]}" con un total de ${most_selled_total_amount["total_amount"]}.') print(f'(EN) The most selled article was "{most_selled["total_amount"]}" with a total of ${most_selled_total_amount["total_amount"]}.')

# get top 5 most income articles top_5_income_articles = articles_income.sort_values(by="total_amount", ascending=False).head() # creating a np array for top_5_income_articles top_5_income_articles_arr = np.array([top_5_income_articles.index, top_5_income_articles["total_amount"].astype(int)]) print(top_5_income_articles_arr)

# RESOLUCIÓN GRÁFICA # ----- CREATE PLOT FIGURE ----- plt.figure(figsize=(10,6)) # ----- CREATE PLOT BAR HORIZONTAL ----- top_5_income_articles_plot = plt.barh( top_5_income_articles_arr[0], #x axis top_5_income_articles_arr[1], #y axis #bottom=0, #y axis starts from #width=0.7, #bars width align='center', #bars align color=color_blue, #bars color edgecolor='black', #bars edgecolor (border) linewidth=0.5, #bars edge line width ) # --- PLOT LABEL --- plt.bar_label( top_5_income_articles_plot, padding=10, color=color_blue, fontweight='bold' ) # --- PLOT GRID --- plt.grid(visible=False) # --- PLOT TITLE --- # PLOT TITLE TEXT plot_title = 'Top 5 articles income' # PLOT TITLE LOCATION plot_title_loc='center' # PLOT TITLE FONT plot_title_font={ 'fontsize': 16, 'fontweight': 'bold', 'fontname':'monospace', 'color': color_blue_dark, 'verticalalignment': 'center', 'horizontalalignment': 'center' } # PLOT TITLE PADDING plot_title_pad = 30 # CREATE PLOT TITLE plt.title( label=plot_title, loc=plot_title_loc, fontdict=plot_title_font, pad=plot_title_pad ) # ----- AXIS X ----- # X LABEL TEXT xlabel_text= 'Total amount ($)' # X LABEL LOCATION xlabel_loc = 'center' # X LABEL PADDING xlabel_pad = 20 # X LABEL FONT xlabel_font={ 'fontsize': 12, 'fontweight': 'normal', 'color': 'black', 'verticalalignment': 'center', 'horizontalalignment': 'center' } # CREATE X LABEL plt.xlabel( xlabel=xlabel_text, loc=xlabel_loc, labelpad=xlabel_pad, fontdict=xlabel_font ) # ----- AXIS Y ----- # Y LABEL TEXT ylabel_text= 'Articles' # Y LABEL LOCATION ylabel_loc = 'center' # Y LABEL LOCATION ylabel_pad = 20 # Y LABEL FONT ylabel_font={ 'fontsize': 12, 'fontweight': 'normal', 'color': 'black', 'verticalalignment': 'center', 'horizontalalignment': 'center' } # CREATE Y LABEL plt.ylabel( ylabel=ylabel_text, loc=ylabel_loc, labelpad=ylabel_pad, fontdict=ylabel_font, ) # --- removing the top and right axes spines sns.despine() # --- SAVE FIG --- plt.savefig('img/top_5_articles_income.png') # --- SHOW PLOT --- plt.show()

# RESOLUCIÓN ANALÍTICA #groupby seller_name on the column total_amount and sum them top_month_sellers = my_df.groupby(['seller_name'])[['seller_name','total_amount']].sum(['total_amount']) top_month_sellers.index.name = 'seller_name' #get the top month seller name top_month_seller = top_month_sellers.idxmax() #get the top month seller total amount top_month_seller_total_amount = top_month_sellers.max().astype(int) print(f'(ES) Se debería otorgar el bono al vendedor {top_month_seller["total_amount"]}. Su total de ingresos fue de ${top_month_seller_total_amount["total_amount"]}.') print(f'(EN) The bonus should be awarded to the seller {top_month_seller["total_amount"]}. Their total revenue was ${top_month_seller_total_amount["total_amount"]}.')

# get top 5 month sellers top_5_month_sellers = top_month_sellers.sort_values(by="total_amount", ascending=False).head() # creating a np array for top_5_month_sellers top_5_month_sellers_arr = np.array([top_5_month_sellers.index, top_5_month_sellers["total_amount"].astype(int)]) top_5_sellers_names = top_5_month_sellers_arr[0] top_5_sellers_amount = top_5_month_sellers_arr[1] print(top_5_month_sellers_arr)

# RESOLUCIÓN GRÁFICA # ----- CREATE PLOT FIGURE ----- plt.figure(figsize=(10,6)) # ----- FUNCTION FOR AUTOPCT PARAMETER ----- def create_autopct(values): def my_autopct(pct): total = sum(values) val = int(round(pct*total/100.0)) return '{p:.2f}% \n\n(${v:d})'.format(p=pct,v=val) return my_autopct # ----- CREATE PLOT PIE ----- top_5_month_sellers_plot = plt.pie( top_5_sellers_amount, #wedge sizes labels=top_5_sellers_names, #labels of each wedge colors= sns.color_palette('pastel'), #wedges colors autopct=create_autopct(top_5_sellers_amount), #percentage shadow=True, #shadow of the plot explode=(0.1,0,0,0,0) # ) plt.axis("equal") # --- PLOT TITLE --- # PLOT TITLE TEXT plot_title = 'Top 5 sellers of the month' # PLOT TITLE LOCATION plot_title_loc='center' # PLOT TITLE FONT plot_title_font={ 'fontsize': 16, 'fontweight': 'bold', 'fontname':'monospace', 'color': color_purple_dark, 'verticalalignment': 'center', 'horizontalalignment': 'center' } # PLOT TITLE PADDING plot_title_pad = 30 # CREATE PLOT TITLE plt.title( label=plot_title, loc=plot_title_loc, fontdict=plot_title_font, pad=plot_title_pad ) # --- SAVE FIG --- plt.savefig('img/top_5_sellers_of_the_month.png') # --- SHOW PLOT --- plt.show()

# RESOLUCIÓN ANALÍTICA #groupby week on the column total_amount and sum them weeks_total_amount = my_df.groupby(['week'])[['week','total_amount']].sum(['total_amount']) weeks_total_amount.index.name = 'week' #get the top month seller name week_most_income = weeks_total_amount.idxmax() #get the top month seller total amount week_most_income_total_amount = weeks_total_amount.max().astype(int) print(f'(ES) La semana con mayor cantidad de ventas fue la {week_most_income["total_amount"]}°. El total de ingresos fue de ${week_most_income_total_amount["total_amount"]}.') print(f'(EN) The week with the most sales was the {week_most_income["total_amount"]}° one. Total income was ${week_most_income_total_amount["total_amount"]}.')

# RESOLUCIÓN GRÁFICA # RESOLUCIÓN GRÁFICA # ----- CREATE PLOT FIGURE ----- plt.figure(figsize=(10,6)) # ----- CREATE FIGURE ----- ordered_weeks_by_income_plot = plt.plot( ordered_week_names, #x axis ordered_week_amount, #y axis color=color_turquoise, #line color linewidth=5, # edge line width ) # --- PLOT GRID --- plt.grid(visible=False) # --- PLOT TITLE --- # PLOT TITLE TEXT plot_title = 'Weeks vs income' # PLOT TITLE LOCATION plot_title_loc='center' # PLOT TITLE FONT plot_title_font={ 'fontsize': 16, 'fontweight': 'bold', 'fontname':'monospace', 'color': color_turquoise_dark, 'verticalalignment': 'center', 'horizontalalignment': 'center' } # PLOT TITLE PADDING plot_title_pad = 30 # CREATE PLOT TITLE plt.title( label=plot_title, loc=plot_title_loc, fontdict=plot_title_font, pad=plot_title_pad ) # ----- AXIS X ----- # X LABEL TEXT xlabel_text= 'Weeks' # X LABEL LOCATION xlabel_loc = 'center' # X LABEL PADDING xlabel_pad = 20 # X LABEL FONT xlabel_font={ 'fontsize': 12, 'fontweight': 'normal', 'color': 'black', 'verticalalignment': 'center', 'horizontalalignment': 'center' } # CREATE X LABEL plt.xlabel( xlabel=xlabel_text, loc=xlabel_loc, labelpad=xlabel_pad, fontdict=xlabel_font ) # CREATE X TICKS plt.xticks( ordered_week_names, rotation=0 ) # ----- AXIS Y ----- # Y LABEL TEXT ylabel_text= 'Total amount ($)' # Y LABEL LOCATION ylabel_loc = 'center' # Y LABEL LOCATION ylabel_pad = 20 # Y LABEL FONT ylabel_font={ 'fontsize': 12, 'fontweight': 'normal', 'color': 'black', 'verticalalignment': 'center', 'horizontalalignment': 'center' } # CREATE Y LABEL plt.ylabel( ylabel=ylabel_text, loc=ylabel_loc, labelpad=ylabel_pad, fontdict=ylabel_font, ) # --- removing the top and right axes spines sns.despine() # --- SAVE FIG --- plt.savefig('img/week_income.png') # --- SHOW PLOT --- plt.show()

# RESOLUCIÓN ANALÍTICA #groupby country on the column total_amount and sum them countries_total_amount = my_df.groupby(['country_name'])[['country_name','total_amount']].sum(['total_amount']) countries_total_amount.index.name = 'country_name' #get the top country name country_most_income = countries_total_amount.idxmax() #get the top country total amount country_most_income_total_amount = countries_total_amount.max().astype(int) print(f'(ES) El país con mayor cantidad de ventas fue {country_most_income["total_amount"]}. El total de ingresos fue de ${country_most_income_total_amount["total_amount"]}.') print(f'(EN) The country with the most sales was {country_most_income["total_amount"]}. Total income was ${country_most_income_total_amount["total_amount"]}.')

# get world map from geopandas world_map = gpd.read_file(gpd.datasets.get_path("naturalearth_lowres")) #print(world_map.groupby(['continent']).sum()) # get america countries for map america=['South America','North America'] america_map = world_map.query("continent in @america") # create one figure and one subplot fig, axes = plt.subplots(figsize = (12,8),facecolor = plt.cm.Blues(.2)) axes.set_facecolor(plt.cm.Blues(.2)) #create a countries column to use with the geopandas countries_total_amount['countries'] = countries_total_amount.index #create a geopandas dataframe from the countries_total_amount df countries_total_amount = gpd.GeoDataFrame(countries_total_amount) #add a geometry column to the countries_total_amount df # this is required for using the geopandas countries_total_amount['geometry'] = gpd.tools.geocode(countries_total_amount['countries']).geometry #create a GeoDataFrame plot for the world map with the countries america_map.plot(color="white",ax=axes) # create a geopandas plot countries_total_amount.plot( column = 'total_amount', cmap = 'Greens', ax = axes, legend = True, markersize= 300, legend_kwds = { 'label': "Total amount ($)" }, missing_kwds = { 'facecolor':'Gray' } ) # create figure title fig.suptitle( 'Total amount of sales by country', y=1, fontsize = 'xx-large', fontweight = 'bold' ) # --- SAVE FIG --- plt.savefig('img/sales_per_country_map.png') # --- SHOW PLOT --- plt.show()

# get countries ordered by total_amount top_5_countries = countries_total_amount.sort_values(by="total_amount", ascending=False).head() # creating a np array for top_5_countries top_5_countries_arr = np.array([top_5_countries.index, top_5_countries["total_amount"].astype(int)]) top_5_countries_names = top_5_countries_arr[0] top_5_countries_amount = top_5_countries_arr[1] print(top_5_countries_arr)

# RESOLUCIÓN GRÁFICA # ----- CREATE PLOT FIGURE ----- plt.figure(figsize=(10,6)) # ----- CREATE PLOT BAR ----- top_5_countries_plot = plt.bar( top_5_countries_names, #x axis top_5_countries_amount, #y axis bottom=0, #y axis starts from width=0.7, #bars width align='center', #bars align color=color_green, #bars color edgecolor='black', #bars edgecolor (border) linewidth=0.5, #bars edge line width ) # --- PLOT LABEL --- plt.bar_label( top_5_countries_plot, padding=-80, color='white', fontweight='bold' ) # --- PLOT GRID --- plt.grid(visible=False) # --- PLOT TITLE --- # PLOT TITLE TEXT plot_title = 'Top 5 countries' # PLOT TITLE LOCATION plot_title_loc='center' # PLOT TITLE FONT plot_title_font={ 'fontsize': 16, 'fontweight': 'bold', 'fontname':'monospace', 'color': color_green_dark, 'verticalalignment': 'center', 'horizontalalignment': 'center' } # PLOT TITLE PADDING plot_title_pad = 30 # CREATE PLOT TITLE plt.title( label=plot_title, loc=plot_title_loc, fontdict=plot_title_font, pad=plot_title_pad ) # ----- AXIS X ----- # X LABEL TEXT xlabel_text= 'Countries' # X LABEL LOCATION xlabel_loc = 'center' # X LABEL PADDING xlabel_pad = 20 # X LABEL FONT xlabel_font={ 'fontsize': 12, 'fontweight': 'normal', 'color': 'black', 'verticalalignment': 'center', 'horizontalalignment': 'center' } # CREATE X LABEL plt.xlabel( xlabel=xlabel_text, loc=xlabel_loc, labelpad=xlabel_pad, fontdict=xlabel_font ) # CREATE X TICKS plt.xticks( top_5_countries_arr[0], rotation=0 ) # ----- AXIS Y ----- # Y LABEL TEXT ylabel_text= 'Total amount ($)' # Y LABEL LOCATION ylabel_loc = 'center' # Y LABEL LOCATION ylabel_pad = 20 # Y LABEL FONT ylabel_font={ 'fontsize': 12, 'fontweight': 'normal', 'color': 'black', 'verticalalignment': 'center', 'horizontalalignment': 'center' } # CREATE Y LABEL plt.ylabel( ylabel=ylabel_text, loc=ylabel_loc, labelpad=ylabel_pad, fontdict=ylabel_font, ) # --- removing the top and right axes spines sns.despine() # --- SAVE FIG --- plt.savefig('img/top_5_countries.png') # --- SHOW PLOT --- plt.show()

# RESOLUCIÓN ANALÍTICA #groupby country_name and seller_name on the column total_amount, then sum them best_seller_by_country = my_df.groupby(['country_name','seller_name']).sum(['total_amount']) #get countries from my_df countries = my_df.groupby('country_name').sum().index.values #create a new df df_best_seller_by_country = pd.DataFrame(columns=['seller_name', 'total_amount'], index=countries) for country in countries: #search for country in df loc_country = best_seller_by_country.loc[country] #get max total_amount for that country total_amount = loc_country.max()['total_amount'] #get max total_amount seller for that country seller = loc_country[(loc_country['total_amount'] == total_amount)].index[0] #add seller and total amount to final df df_best_seller_by_country.loc[country] = {'seller_name': seller, 'total_amount': total_amount} df_best_seller_by_country = df_best_seller_by_country.astype({'total_amount':'int'}) print(df_best_seller_by_country)

# RESOLUCIÓN ANALÍTICA #groupby country_name and article_name on the column total_amount, then sum them most_selled_article_by_country = my_df.groupby(['country_name','article_name']).sum(['total_amount']) #get countries from my_df countries = my_df.groupby('country_name').sum().index.values #create a new df df_most_selled_article_by_country = pd.DataFrame(columns=['article_name', 'total_amount'], index=countries) for country in countries: #search for country in df loc_country = most_selled_article_by_country.loc[country] #get max total_amount for that country total_amount = loc_country.max()['total_amount'] #get max total_amount article for that country article = loc_country[(loc_country['total_amount'] == total_amount)].index[0] #add article and total amount to final df df_most_selled_article_by_country.loc[country] = {'article_name': article, 'total_amount': total_amount} df_most_selled_article_by_country = df_most_selled_article_by_country.astype({'total_amount':'int'}) print(df_most_selled_article_by_country)

#groupby country_name and article_name on the column total_amount, then sum them most_selled_article_by_country = my_df.groupby(['country_name','article_name']).sum(['total_amount']).reset_index() most_selled_article_by_country = most_selled_article_by_country.sort_values(by='total_amount', ascending=False) most_selled_article_by_country = most_selled_article_by_country.head(20) #print(most_selled_article_by_country) countries = most_selled_article_by_country['country_name'] articles = most_selled_article_by_country['article_name'] total_amount = most_selled_article_by_country['total_amount'] # ----- CREATE PLOT FIGURE ----- # create one figure and one subplot fig, axes = plt.subplots(figsize = (12,8)) most_selled_article_by_country_plot = sns.barplot( x=countries, y=total_amount, hue=articles, palette="pastel", edgecolor=".6", data=most_selled_article_by_country, dodge=False, #ax=axes ) # --- PLOT TITLE --- # PLOT TITLE TEXT plot_title = 'Most selled articles by country' # PLOT TITLE LOCATION plot_title_loc='center' # PLOT TITLE FONT plot_title_font={ 'fontsize': 16, 'fontweight': 'bold', 'fontname':'monospace', 'color': color_brown_dark, 'verticalalignment': 'center', 'horizontalalignment': 'center' } # PLOT TITLE PADDING plot_title_pad = 30 # CREATE PLOT TITLE plt.title( label=plot_title, loc=plot_title_loc, fontdict=plot_title_font, pad=plot_title_pad ) # ----- AXIS X ----- # X LABEL TEXT xlabel_text= 'Countries' # X LABEL LOCATION xlabel_loc = 'center' # X LABEL PADDING xlabel_pad = 20 # X LABEL FONT xlabel_font={ 'fontsize': 12, 'fontweight': 'normal', 'color': 'black', 'verticalalignment': 'center', 'horizontalalignment': 'center' } # CREATE X LABEL plt.xlabel( xlabel=xlabel_text, loc=xlabel_loc, labelpad=xlabel_pad, fontdict=xlabel_font ) # ----- AXIS Y ----- # Y LABEL TEXT ylabel_text= 'Total amount ($)' # Y LABEL LOCATION ylabel_loc = 'center' # Y LABEL LOCATION ylabel_pad = 20 # Y LABEL FONT ylabel_font={ 'fontsize': 12, 'fontweight': 'normal', 'color': 'black', 'verticalalignment': 'center', 'horizontalalignment': 'center' } # CREATE Y LABEL plt.ylabel( ylabel=ylabel_text, loc=ylabel_loc, labelpad=ylabel_pad, fontdict=ylabel_font, ) plt.legend(title='Articles', loc='upper right') # --- removing the top and right axes spines sns.despine() # --- SAVE FIG --- plt.savefig('img/most_selled_articles_by_country.png') # --- SHOW PLOT --- plt.show()