Samuel Ivarsson and Martin Andersson

import matplotlib.pyplot as plt import pandas as pd import numpy as np gdpDF = pd.read_csv('/work/gdp-per-capita-worldbank.csv') lifeDF = pd.read_csv('/work/life-expectancy-at-birth-total-years.csv') gdp2DF = pd.read_csv('/work/gdp-world-regions-stacked-area.csv') gdpString = 'GDP per capita, PPP (constant 2017 international $)' lifeString = 'Life expectancy at birth, total (years)' gdp2String = 'GDP' # Selecting data from 2017 gdp2017 = gdpDF.loc[gdpDF['Year'] == 2017] # Removing all entries with no code (removing areas and only keeping specific countries) gdp2017.dropna(subset = ['Code'], inplace=True) # Removing the world entry gdp2017 = gdp2017[gdp2017['Code'] != 'OWID_WRL'] # Selecting data from 2017 life2017 = lifeDF.loc[lifeDF['Year'] == 2017] # Removing all entries with no code (removing areas and only keeping specific countries) life2017.dropna(subset = ['Code'], inplace=True) # Removing the world entry life2017 = life2017[life2017['Code'] != 'OWID_WRL'] # Combining the data based on the code, entity and year. result = pd.merge(gdp2017, life2017, on=['Code','Entity','Year']) # Setting GDP as the x-axis x = result[gdpString] # Setting Life expectancy as the y-axis y = result[lifeString] # Plot the data as a scatter plot plt.scatter(x, y) plt.title('GDP per capita (PPP) vs. Life expectancy') plt.show() # Find the standard deviation of life expectancy lifeSTD = np.std(result[lifeString]) # Find the mean of life expectancy lifeMean = np.mean(result[lifeString]) # Life expectancy of one standard deviation higher than the mean var1 = result.loc[result[lifeString] > lifeMean + lifeSTD] # High life expectancy but low GDP per capita lowGDP = 23937 var2 = result.loc[result[gdpString] < lowGDP] var2 = var2.loc[var2[lifeString] > lifeMean] # Does every strong economy (normally indicated by GDP) have high life expectancy? # Selecting data from 2017 gdp2_2017 = gdp2DF.loc[gdp2DF['Year'] == 2017] # Removing all entries with no code (removing areas and only keeping specific countries) gdp2_2017.dropna(subset = ['Code'], inplace=True) # Removing the world entry gdp2_2017 = gdp2_2017[gdp2_2017['Code'] != 'OWID_WRL'] result2 = pd.merge(gdp2_2017, life2017, on=['Code','Entity','Year']) # Sort descending result2Sorted = result2.sort_values(by=[gdp2String], ascending=False) x = result2Sorted['Code'][0:10] y = result2Sorted[lifeString][0:10] fig = plt.figure() ax = fig.add_axes([0,0,1,1]) ax.bar(x,y) plt.show() # Sort descending resultSorted = result.sort_values(by=[gdpString], ascending=False) x = resultSorted['Code'][0:10] y = resultSorted[lifeString][0:10] fig = plt.figure() ax = fig.add_axes([0,0,1,1]) ax.bar(x,y) plt.show()

import matplotlib.pyplot as plt import pandas as pd import numpy as np ################# Deaths by cancer then vs now ################# df = pd.read_csv('/work/cancerDeathsThenVsNow/cancer-death-rates-by-type.csv') colString = 'Deaths - Breast cancer - Sex: Both - Age: Age-standardized (Rate)' # Selecting data from 2019 xByYear = df.loc[df['Year'] == 2019] # Removing all entries with no code (removing areas and only keeping specific countries) xByYear.dropna(subset = ['Code'], inplace=True) # Removing the world entry xByYear = xByYear[xByYear['Code'] != 'OWID_WRL'] # Selecting data from 1990 yByYear = df.loc[df['Year'] == 1990] # Removing all entries with no code (removing areas and only keeping specific countries) yByYear.dropna(subset = ['Code'], inplace=True) # Removing the world entry yByYear = yByYear[yByYear['Code'] != 'OWID_WRL'] # Combining the data based on the code and entity. result = pd.merge(xByYear, yByYear, on=['Code','Entity']) # Sort by number of deaths by Prostate cancer in 1990 resultSorted = result.sort_values(by=[colString+'_y'], ascending=False) # Plot the data as a bar plot (Only use the countries with the most deaths in 1990) res = resultSorted[0:5] ax = res.plot.bar(x='Code', y=[colString+'_y', colString+'_x'], xlabel='Country code', ylabel='Deaths per 100 000') ax.set_title('Deaths - Breast cancer') ax.legend(['1990','2019']) # Selecting the same countries from the bar plot but including all years allYears = df[(df['Code'] == 'GRL') | (df['Code'] == 'KNA') | (df['Code'] == 'DNK') | (df['Code'] == 'URY') | (df['Code'] == 'BMU')] allYears.dropna(subset = ['Code'], inplace=True) # Plot the data through all years allYears = allYears.pivot(index='Year', columns='Code', values=colString) ax = allYears.plot(xlabel='Year', ylabel='Deaths per 100 000') ax.set_title('Deaths - Breast cancer')

import matplotlib.pyplot as plt import pandas as pd import numpy as np ################# Happiness vs Vacation ################# xDF = pd.read_csv('/work/Happiness vs vacation/days-of-vacation-and-holidays.csv') yDF = pd.read_csv('/work/Happiness vs vacation/share-of-people-who-say-they-are-happy.csv') xString = 'Days of vacation and holidays for full-time production workers in non-agricultural activities (Huberman & Minns 2007)' yString = 'Share of people who are happy (World Value Survey 2014)' #print(yDF) # Selecting data from 2000 xByYear = xDF.loc[xDF['Year'] == 2000] # Removing all entries with no code (removing areas and only keeping specific countries) xByYear.dropna(subset = ['Code'], inplace=True) # Selecting data from 1998 yByYear = yDF.loc[yDF['Year'] == 1998] # Combining the data based on the code, entity. result = pd.merge(xByYear, yByYear, on=['Code','Entity']) #print(result.sort_values(by=xString)) # Setting Days as the x-axis x = result[xString] # Setting Happiness percentage as the y-axis y = result[yString] # Plot the data as a scatter plot plt.scatter(x, y) plt.title('Happiness vs Vacation') plt.xlabel("days") plt.ylabel("percent") #plt.show() # Plots regression line. m, b = np.polyfit(x, y, 1) plt.plot(x, m*x + b)

.css-15w88e5{color:var(--chakra-colors-fg-neutral-primary);font-weight:inherit;letter-spacing:-0.09px;}Samuel Ivarsson and Martin Andersson

Samuel Ivarsson and Martin Andersson