world demographic trends

import pandas as pd import numpy as np import matplotlib.pyplot as plt from scipy import stats

world_population = pd.read_csv("./world_population.csv") world_population

display(world_population.dtypes, world_population.shape)

top_populated = world_population.sort_values(by="Population (2020)", ascending=False).iloc[:10,:2] top_populated["Population (2020)"] = top_populated["Population (2020)"].apply(lambda x: np.round(x/1_000_000, 2)) top_populated.columns = ["Country", "Population"] top_populated.reset_index(drop=True, inplace=True) top_populated

plt.figure(dpi=100, facecolor="#FBFEB7").add_axes(rect=[0, 0, 1.9, 1.4], facecolor="#FDFFC9") colors = ["#910000", '#C88A0F', "#549431", "#532B72", "#B14BB9", "#822805", "#02FFE5", "#C8CA2B", "#6928BF", "#12B40D"] bars = plt.bar(x=top_populated.Country, height=top_populated.Population, color=colors, width=0.7) plt.title("Most populated countries in the world", fontdict={"size": 16, "weight": "bold"}, pad=11) plt.xlabel("Country", fontdict={"size": 14}) plt.ylabel("Population [M]", fontdict={"size": 14}) plt.ylim(0.001) plt.tick_params(size=7, width=1.2, labelsize=12) for bar in bars: height = bar.get_height() label_x_position = bar.get_x() + bar.get_width()/2 plt.text(label_x_position, height, s=f"{height}", ha="center", va="bottom", fontdict={"size": 12}) plt.show()

newborns = world_population.sort_values(by="Fert. Rate", ascending=False).loc[:, ["Country/Other", "Fert. Rate"]].reset_index(drop=True) newborns.columns=["country", "fertility_rate"] newborns = newborns.iloc[:10] colors = ["#6928BF", "#75B199", "#12B40D", "#FB443A", "#0E3C3C", "#604904", "#B14BB9", "#822805", "#02FFE5", "#C8CA2B"] fig, ax = plt.subplots(figsize=(14, 8)) bars = ax.bar(x=newborns.country, height=newborns.fertility_rate, width=0.5, color=colors) # ax.axes.get_yaxis().set_ticks([]) for bar in bars: height = bar.get_height() label_x_position = bar.get_x() + bar.get_width()/2 ax.text(label_x_position, height, s=f"{height}", ha="center", va="bottom", fontdict={"size":12}) fig.set_facecolor("#FBFEB7") ax.set_facecolor("#FDFFC9") plt.title("Countries with most newborns", fontdict={"size": 16, "weight":"bold"}, pad=11) plt.xlabel("Country", fontdict={"size": 15}) plt.ylabel("Fertility rate", fontdict={"size": 15}) plt.ylim(0.001) plt.tick_params(size=7, width=1.2, labelsize=14) plt.show()

fig, ax = plt.subplots(figsize=(10,7)) y = world_population["Net Change"] x = world_population["Migrants (net)"] ax.scatter(x=x, y=y, c=colors[7], label="Original data") fig.set_facecolor("#FBFEB7") ax.set_facecolor("#FDFFC9") plt.title("Amount of migrants Vs Change in population",fontdict={"size":16, "weight":"bold"}, pad=10) plt.ylabel("Net change", fontdict={"size":14}) plt.xlabel("Migrants", fontdict={"size":14}) plt.tick_params(size=7, width=1.2, labelsize=12) plt.show()

fig, ax = plt.subplots(figsize=(10,7)) y = world_population["Net Change"] x = world_population["Migrants (net)"] ax.scatter(x=x, y=y, c=colors[7], label="Original data") fig.set_facecolor("#FBFEB7") ax.set_facecolor("#FDFFC9") plt.title("Amount of migrants Vs Change in population",fontdict={"size":16, "weight":"bold"}, pad=10) plt.ylabel("Net change", fontdict={"size":14}) plt.xlabel("Migrants", fontdict={"size":14}) plt.xlim((-200_000,200_000)) plt.ylim((-2_000_000, 0.8*10_000_000)) plt.tick_params(size=7, width=1.2, labelsize=12) # Regression line res = stats.linregress(x, y) plt.plot(x, res.slope*x + res.intercept, label='Fitted line', c='#549431') plt.legend(facecolor="#FBFEB7", edgecolor="grey", fontsize=12) plt.show() print(f" R-squared: {res.rvalue**2}, correlation coefficient:{stats.pearsonr(x, y)[0]}, p-value: {stats.pearsonr(x, y)[1]}")

density = world_population.iloc[:,[0,4]] density.columns = ["Country", "Density"] density = density.sort_values(by="Density", ascending=False) density.reset_index(drop=True, inplace=True) density = density.iloc[:15] density.sort_values(by="Density", inplace=True) density

plt.figure(dpi=70, facecolor="#FBFEB7").add_axes(rect=[0,0,1.2,1.2], facecolor="#FDFFC9") colors = ["#910000", '#C88A0F', "#549431", "#532B72", "#B14BB9", "#822805", "#02FFE5", "#C8CA2B", "#6928BF", "#12B40D", "#FB443A", "#0E3C3C", "#604904", "#B14BB9", "#822805"] bars = plt.barh(y=density.Country, width=density.Density, height=0.8, color=colors) for bar in bars: label_y_position = bar.get_y() + bar.get_height()/2 width = bar.get_width() plt.text(width, label_y_position, s=f"{width}", ha="left", va="center", fontdict={"size":10}) plt.title("Most densely populated countries in the world", fontdict={"size":16, "style":"italic"}, pad=11) plt.xlabel("Density (P/Km²)", fontdict={"size":14}, labelpad=10) # plt.ylabel("Country", fontdict={"size":14}) plt.tick_params(size=7, width=1.2, labelsize=12) plt.xticks([]) plt.xlim(0.001, 23_600) plt.ylim(-0.8, label_y_position+0.8) plt.show()