Assignment 1

# Set your working directory (where you store the csv files) here to run the code # Figures are loaded with markdown to make sure they are visible. Markdown does NOT use your workdir. workdir = '/work/'

# Import modules needed for the program import numpy import matplotlib import pandas pandas.set_option("display.max_rows", None, "display.max_columns", None)

# Read CSV files with data into variables expectancy = pandas.read_csv(workdir + 'life-expectancy.csv') gdp = pandas.read_csv(workdir + 'gdp-per-capita-worldbank.csv') #Select data for 2019 and drop rows with empty fields, as rows with empty fields represents continents and not countries expectancy2019 = expectancy[expectancy['Year'] == 2019].dropna() gdp2019 = gdp[gdp['Year'] == 2019].dropna() # Check that entity (country) exists for both DFs expectancyMatch = expectancy2019[expectancy2019['Entity'].isin(gdp2019['Entity'])] gdpMatch = gdp2019[gdp2019['Entity'].isin(expectancyMatch['Entity'])]

#Check that both DFs contain exactly the same entities dotheymatch = expectancyMatch['Entity'].isin(gdpMatch['Entity']).value_counts() print(dotheymatch) #Sort values and reset indexes to ensure that the datasets match sortedExpectancy = expectancyMatch.sort_values(by=['Entity'], ascending=True).reset_index(drop=True) sortedgdp = gdpMatch.sort_values(by=['Entity'], ascending=True).reset_index(drop=True) #Merge data into one dataframe gdpAndExpectancy = pandas.concat([sortedgdp, sortedExpectancy], axis=1)

#Scatter plot life expectancy vs GDP per capita expectancy_gdp_plot = gdpAndExpectancy.plot.scatter(x="GDP per capita, PPP (constant 2017 international $)", \ y="Life expectancy", c='GDP per capita, PPP (constant 2017 international $)', colormap='viridis_r', sharex=False, figsize=(15, 10), title='Life expectancy vs GDP per capita') expectancy_gdp_plot.figure.savefig('plot1.png')

1d: Which countries have a life expectancy higher than one standard deviation above the mean?

#calculate mean, standard deviation and gdp per capita representing one standard deivation above the mean expectancystd = gdpAndExpectancy['Life expectancy'].std() expectancymean = gdpAndExpectancy['Life expectancy'].mean() expectancyoneovermean = expectancymean + expectancystd #filter the dataset for gdp based on the results above expectancyoneovermeancountries = gdpAndExpectancy[gdpAndExpectancy['Life expectancy'] > expectancyoneovermean] #print countries with a life expectancy of one standard deviation above the mean expectancyoneovermeancountries

1e: Which countries have high life expectancy but have low GDP (per capita)?

# The countries identified in 1d as having high life expectancy are sorted with the lowest GDP # appearing first. The top 5 countries are presented. expectancyovermean_sorted = expectancyoneovermeancountries.sort_values(by=['GDP per capita, PPP (constant 2017 international $)'], ascending=True).reset_index() expectancyovermean_sorted.head(5)

gdpTotal = pandas.read_csv(workdir + 'gross-domestic-product.csv') # import GDP data gdpTotalfiltered = gdpTotal[gdpTotal['Year'] == 2019].dropna() # Filter GDP data for 2019 # sort values, reset index, and merge with life expectancy gdpSorted = gdpTotalfiltered.sort_values(by=['Entity'], ascending=True).reset_index(drop=True) mergedgdpTotalandLife = pandas.merge(expectancy2019, gdpSorted, on=['Entity']) #Scatter plot life expectancy vs GDP totalAndlifeplot = mergedgdpTotalandLife.plot.scatter(x="GDP (constant 2010 US$)", \ y="Life expectancy", c='GDP (constant 2010 US$)', colormap='viridis_r', sharex=False, figsize=(15, 10), title='Life expectancy vs GDP') totalAndlifeplot.figure.savefig('plot2.png')

expectancywline = expectancy_gdp_plot.axvline(x=20000) expectancy_gdp_plot.figure expectancy_gdp_plot.figure.savefig('plot3.png')

Part 2

We found some more appropriate methods when working with part 2. But we keep the more crude approaches in part one to show the development in our knowledge of dataframes.

Sources: Income inequality: https://ourworldindata.org/income-inequality, Corruption statistics: https://ourworldindata.org/grapher/share-of-people-paying-bribes-vs-corruption-perception, Happiness: https://ourworldindata.org/grapher/happiness-wvs-vs-gallup

# Read csv into dataframe def read_csv(file): output = pandas.read_csv(workdir + file) return output # Filter by a column and keep the lastest entry for duplicates def df_filter(df, label, subset): output = df[df[label].notnull()].drop_duplicates(subset=subset, keep='last') return output # Merge two dataframes by a column def df_merge(df1, df2, label): output = pandas.merge(df1, df2, on=[label], ) return output

# Read bribes data bribes = read_csv('share-of-people-paying-bribes-vs-corruption-perception.csv') # Only keep countries that have bribe rate data bribesFilter = df_filter(bribes, 'Bribery Rate (%)', 'Entity') #bribes[bribes['Bribery Rate (%)'].notnull()].drop_duplicates(subset='Entity', keep='last') # Read happiness data happiness = read_csv('Happiness-WVS-vs-Gallup.csv') # Only keep countries that have happiness data happinessFilter = df_filter(happiness, 'Share of people who are happy (World Value Survey 2014)', 'Entity') # Merge happiness and bribes with an inner union. happinessAndBribes = df_merge(bribesFilter, happinessFilter, 'Entity') #merged # Read economic inequality data inequality = read_csv('economic-inequality-gini-index.csv') #Only keep last data entry of countries with inequality data inequalityFilter = df_filter(inequality, 'Gini index', 'Entity') # Merge all merged = df_merge(happinessAndBribes, inequalityFilter, 'Entity')

plot4 = merged.plot.scatter(x="Share of people who are happy (World Value Survey 2014)", \ y="Bribery Rate (%)", c='Corruption Perception Index - Transparency International (2018)', \ colormap='viridis_r', sharex=False, figsize=(15, 10), title='Bribery Rate vs. Happiness' ) plot4.figure.savefig('plot4.png')

plot5 = merged.plot.scatter(x="Gini index", y="Bribery Rate (%)", c='Corruption Perception Index - Transparency International (2018)', \ colormap='viridis_r', sharex=False, figsize=(15, 10), title= 'Bribery Rate vs Economic inequality') plot5.set_xlabel('Income inequality (Gini Index, lower = more equal)') plot5.figure.savefig('plot5.png')

happyVSgini = merged.plot.scatter(x="Gini index", y="Share of people who are happy (World Value Survey 2014)", sharex=False, figsize=(15, 10), title= 'Happiness vs Economic inequality', c='g') happyVSgini.axvline(x= merged['Gini index'].mean(), c='k', lw=0.5) happyVSgini.axhline(y=merged['Share of people who are happy (World Value Survey 2014)'].mean(), c='k', lw=0.5) happyVSgini.set_xlabel('Income inequality (Gini Index, lower = more equal)') happyVSgini.figure.savefig('plot6.png')

.css-15w88e5{color:var(--chakra-colors-fg-neutral-primary);font-weight:inherit;letter-spacing:-0.09px;}1d: Which countries have a life expectancy higher than one standard deviation above the mean?

1e: Which countries have high life expectancy but have low GDP (per capita)?

Part 2

1d: Which countries have a life expectancy higher than one standard deviation above the mean?