import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
world_population = pd.read_csv("./world_population.csv")
world_population
display(world_population.dtypes, world_population.shape)
top_populated = world_population.sort_values(by="Population (2020)", ascending=False).iloc[:10,:2]
top_populated["Population (2020)"] = top_populated["Population (2020)"].apply(lambda x: np.round(x/1_000_000, 2))
top_populated.columns = ["Country", "Population"]
top_populated.reset_index(drop=True, inplace=True)
top_populated
plt.figure(dpi=100, facecolor="#FBFEB7").add_axes(rect=[0, 0, 1.9, 1.4], facecolor="#FDFFC9")
colors = ["#910000", '#C88A0F', "#549431", "#532B72", "#B14BB9", "#822805", "#02FFE5", "#C8CA2B", "#6928BF", "#12B40D"]
bars = plt.bar(x=top_populated.Country, height=top_populated.Population, color=colors, width=0.7)
plt.title("Most populated countries in the world", fontdict={"size": 16, "weight": "bold"}, pad=11)
plt.xlabel("Country", fontdict={"size": 14})
plt.ylabel("Population [M]", fontdict={"size": 14})
plt.ylim(0.001)
plt.tick_params(size=7, width=1.2, labelsize=12)
for bar in bars:
height = bar.get_height()
label_x_position = bar.get_x() + bar.get_width()/2
plt.text(label_x_position, height, s=f"{height}", ha="center", va="bottom", fontdict={"size": 12})
plt.show()
newborns = world_population.sort_values(by="Fert. Rate", ascending=False).loc[:, ["Country/Other", "Fert. Rate"]].reset_index(drop=True)
newborns.columns=["country", "fertility_rate"]
newborns = newborns.iloc[:10]
colors = ["#6928BF", "#75B199", "#12B40D", "#FB443A", "#0E3C3C", "#604904", "#B14BB9", "#822805", "#02FFE5", "#C8CA2B"]
fig, ax = plt.subplots(figsize=(14, 8))
bars = ax.bar(x=newborns.country, height=newborns.fertility_rate, width=0.5, color=colors)
# ax.axes.get_yaxis().set_ticks([])
for bar in bars:
height = bar.get_height()
label_x_position = bar.get_x() + bar.get_width()/2
ax.text(label_x_position, height, s=f"{height}", ha="center", va="bottom", fontdict={"size":12})
fig.set_facecolor("#FBFEB7")
ax.set_facecolor("#FDFFC9")
plt.title("Countries with most newborns", fontdict={"size": 16, "weight":"bold"}, pad=11)
plt.xlabel("Country", fontdict={"size": 15})
plt.ylabel("Fertility rate", fontdict={"size": 15})
plt.ylim(0.001)
plt.tick_params(size=7, width=1.2, labelsize=14)
plt.show()
fig, ax = plt.subplots(figsize=(10,7))
y = world_population["Net Change"]
x = world_population["Migrants (net)"]
ax.scatter(x=x, y=y, c=colors[7], label="Original data")
fig.set_facecolor("#FBFEB7")
ax.set_facecolor("#FDFFC9")
plt.title("Amount of migrants Vs Change in population",fontdict={"size":16, "weight":"bold"}, pad=10)
plt.ylabel("Net change", fontdict={"size":14})
plt.xlabel("Migrants", fontdict={"size":14})
plt.tick_params(size=7, width=1.2, labelsize=12)
plt.show()
fig, ax = plt.subplots(figsize=(10,7))
y = world_population["Net Change"]
x = world_population["Migrants (net)"]
ax.scatter(x=x, y=y, c=colors[7], label="Original data")
fig.set_facecolor("#FBFEB7")
ax.set_facecolor("#FDFFC9")
plt.title("Amount of migrants Vs Change in population",fontdict={"size":16, "weight":"bold"}, pad=10)
plt.ylabel("Net change", fontdict={"size":14})
plt.xlabel("Migrants", fontdict={"size":14})
plt.xlim((-200_000,200_000))
plt.ylim((-2_000_000, 0.8*10_000_000))
plt.tick_params(size=7, width=1.2, labelsize=12)
# Regression line
res = stats.linregress(x, y)
plt.plot(x, res.slope*x + res.intercept, label='Fitted line', c='#549431')
plt.legend(facecolor="#FBFEB7", edgecolor="grey", fontsize=12)
plt.show()
print(f" R-squared: {res.rvalue**2}, correlation coefficient:{stats.pearsonr(x, y)[0]}, p-value: {stats.pearsonr(x, y)[1]}")
density = world_population.iloc[:,[0,4]]
density.columns = ["Country", "Density"]
density = density.sort_values(by="Density", ascending=False)
density.reset_index(drop=True, inplace=True)
density = density.iloc[:15]
density.sort_values(by="Density", inplace=True)
density
plt.figure(dpi=70, facecolor="#FBFEB7").add_axes(rect=[0,0,1.2,1.2], facecolor="#FDFFC9")
colors = ["#910000", '#C88A0F', "#549431", "#532B72", "#B14BB9", "#822805", "#02FFE5", "#C8CA2B", "#6928BF", "#12B40D", "#FB443A", "#0E3C3C", "#604904", "#B14BB9", "#822805"]
bars = plt.barh(y=density.Country, width=density.Density, height=0.8, color=colors)
for bar in bars:
label_y_position = bar.get_y() + bar.get_height()/2
width = bar.get_width()
plt.text(width, label_y_position, s=f"{width}", ha="left", va="center", fontdict={"size":10})
plt.title("Most densely populated countries in the world", fontdict={"size":16, "style":"italic"}, pad=11)
plt.xlabel("Density (P/Km²)", fontdict={"size":14}, labelpad=10)
# plt.ylabel("Country", fontdict={"size":14})
plt.tick_params(size=7, width=1.2, labelsize=12)
plt.xticks([])
plt.xlim(0.001, 23_600)
plt.ylim(-0.8, label_y_position+0.8)
plt.show()