Coffee Production Analysis
#Importing key data analysis libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
#Importing dataset
coffee_production = pd.read_csv("/work/Coffee_production.csv", header=0, delimiter=",")
coffee_production
#Show the key information of the dataset
print(coffee_production.info())
#Only two columns are data type object and the rest are float64
#Renaming columns
coffee_production = coffee_production.rename(columns={"Type": "Coffee_type"})
coffee_production
Top 5 Robusta Production Countries
--Selecting the top 5 Robusta production countries
SELECT *
FROM coffee_production
WHERE Coffee_type = 'Robusta'
ORDER BY Total_production DESC
LIMIT 5
#Visualizaing what countries have a robusta type of coffee.
all_total_production = Coffee_Robusta['Total_production'].sum()
#New column: Percentage of Total Production
Coffee_Robusta['Production_Percentage'] = Coffee_Robusta['Total_production']/all_total_production*100
#Creating a list for countries and production percentage
production_percentage = Coffee_Robusta['Production_Percentage']
Countries = Coffee_Robusta['Country']
#Color palette
color_palete = sns.color_palette('dark')
#Pie chart
plt.pie(production_percentage, labels=Countries, colors=color_palete, autopct='%.0f%%')
plt.title("Top 5 Robusta Production Countries")
plt.show()
Top 5 Arabica Production Countries
-- Selecting the top 5 Arabica Production Countries
SELECT *
FROM coffee_production
WHERE Coffee_type ='Arabica'
ORDER BY Total_production DESC
LIMIT 5
#Visualizaing what countries have a arabica type of coffee.
all_total_arabica_production = df_coffee_arabica['Total_production'].sum()
#New column: Percentage of Total Production
df_coffee_arabica['Production_Percentage'] = df_coffee_arabica['Total_production']/all_total_arabica_production*100
#Creating a list for countries and production percentage
production_percentage_arabica = df_coffee_arabica['Production_Percentage']
Countries_arabica = df_coffee_arabica['Country']
#Color paletter
color_palete = sns.color_palette('dark')
#Pie chart
plt.pie(production_percentage_arabica, labels=Countries_arabica, colors=color_palete, autopct='%.0f%%')
plt.title("Top 5 Arabica Production Countries")
plt.show()
Top 5 Robusta/Arabica Production Countries
SELECT *
FROM coffee_production
WHERE Coffee_type ='Robusta/Arabica'
ORDER BY Total_production DESC
LIMIT 5
#Visualizaing what countries have a robusta/arabica type of coffee.
all_total_robusta_arabica_production = df_robusta_arabica_coffee['Total_production'].sum()
#New column: Percentage of Total Production
df_robusta_arabica_coffee['Production_Percentage'] = df_robusta_arabica_coffee['Total_production']/all_total_robusta_arabica_production*100
#Creating a list for countries and production percentage
production_percentage_robusta_arabica = df_robusta_arabica_coffee['Production_Percentage']
Countries_robusta_arabica = df_robusta_arabica_coffee['Country']
#Color paletter
color_palete = sns.color_palette('dark')
#Pie chart
plt.pie(production_percentage_robusta_arabica, labels=Countries_robusta_arabica, colors=color_palete, autopct='%.0f%%')
plt.title("Top 5 Robusta/Arabica Production Countries")
plt.show()
Top 5 Arabica/Robusta Production Countries
SELECT *
FROM coffee_production
WHERE Coffee_type ='Arabica/Robusta'
ORDER BY Total_production DESC
LIMIT 5
#Visualizaing what countries have a arabica/robusta type of coffee.
all_total_arabica_robusta_production = df_arabica_robusta['Total_production'].sum()
#New column: Percentage of Total Production
df_arabica_robusta['Production_Percentage'] = df_arabica_robusta['Total_production']/all_total_arabica_robusta_production*100
#Creating a list for countries and production percentage
production_percentage_arabica_robusta = df_arabica_robusta['Production_Percentage']
Countries_arabica_robusta = df_arabica_robusta['Country']
#Color paletter
color_palete = sns.color_palette('dark')
#Pie chart
plt.pie(production_percentage_arabica_robusta, labels=Countries_arabica_robusta, colors=color_palete,autopct='%.0f%%')
plt.title("Top 5 Arabica/Robusta Production Countries")
plt.show()