# Loading in required libraries
# ... YOUR CODE FOR TASK 1 ...
import pandas as pd
import seaborn as sns
import numpy as np
# Reading in the Nobel Prize data
nobel = pd.read_csv("datasets/nobel.csv")
# Taking a look at the first several winners
# ... YOUR CODE FOR TASK 1 ...
nobel.head(6)
yearint64
categoryobject
0
1901
Chemistry
1
1901
Literature
2
1901
Medicine
3
1901
Peace
4
1901
Peace
5
1901
Physics
# Display the number of (possibly shared) Nobel Prizes handed
# out between 1901 and 2016
# ... YOUR CODE FOR TASK 2 ...
num_prizes = len(nobel)
display(num_prizes)
# Display the number of prizes won by male and female recipients.
# ... YOUR CODE FOR TASK 2 ...
sex_counts = nobel['sex'].value_counts()
display(sex_counts)
# Display the number of prizes won by the top 10 nationalities.
# ... YOUR CODE FOR TASK 2 ...
top_countries = nobel['birth_country'].value_counts().head(10)
top_countries
# Calculating the proportion of USA born winners per decade
nobel['usa_born_winner'] = nobel['birth_country'] == 'United States of America'
nobel['decade'] = np.floor(nobel['year']/10).astype(int) * 10
prop_usa_winners = nobel.groupby('decade', as_index=False)['usa_born_winner'].mean()
# Display the proportions of USA born winners per decade
# ... YOUR CODE FOR TASK 3 ...
prop_usa_winners[['decade', 'usa_born_winner']]
decadeint64
1900 - 2010
usa_born_winnerfloat64
0.017543859649122806 - 0.42276422764227645
0
1900
0.017543859649122806
1
1910
0.075
2
1920
0.07407407407407407
3
1930
0.25
4
1940
0.3023255813953488
5
1950
0.2916666666666667
6
1960
0.26582278481012656
7
1970
0.3173076923076923
8
1980
0.31958762886597936
9
1990
0.40384615384615385
# Setting the plotting theme
sns.set()
# and setting the size of all plots.
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [11, 7]
# Plotting USA born winners
ax = sns.lineplot(data=prop_usa_winners, x='decade', y='usa_born_winner')
# Adding %-formatting to the y-axis
from matplotlib.ticker import PercentFormatter
# ... YOUR CODE FOR TASK 4 ...
ax.yaxis.set_major_formatter(PercentFormatter(1.0))
# Calculating the proportion of female laureates per decade
nobel['female_winner'] = nobel['sex'] == 'Female'
prop_female_winners = nobel.groupby(['decade', 'category'], as_index=False)['female_winner'].mean()
# Plotting USA born winners with % winners on the y-axis
# ... YOUR CODE FOR TASK 5 ...
ax = sns.lineplot(data=prop_female_winners, x='decade', y='female_winner', hue='category')
ax.yaxis.set_major_formatter(PercentFormatter(1.0))
# Picking out the first woman to win a Nobel Prize
# ... YOUR CODE FOR TASK 5 ...
nobel_female = nobel[nobel['sex'] == 'Female']
nobel_female.nsmallest(1, columns='year')
yearint64
categoryobject
19
1903
Physics
nobel.groupby('full_name').filter(lambda group: len(group) >= 2)
yearint64
1903 - 1981
categoryobject
Peace46.2%
Chemistry30.8%
Physics23.1%
19
1903
Physics
62
1911
Chemistry
89
1917
Peace
215
1944
Peace
278
1954
Chemistry
283
1954
Peace
298
1956
Physics
306
1958
Chemistry
340
1962
Peace
348
1963
Peace
# Converting birth_date from String to datetime
nobel['birth_date'] = pd.to_datetime(nobel['birth_date'], errors='coerce')
# Calculating the age of Nobel Prize winners
nobel['age'] = nobel['year'] - nobel['birth_date'].dt.year
# Plotting the age of Nobel Prize winners
sns.lmplot(data=nobel, x='year', y='age', lowess=True, aspect=2, line_kws={'color': 'black'})
# Same plot as above, but separate plots for each type of Nobel Prize
# ... YOUR CODE FOR TASK 9 ...
sns.lmplot(x='year', y='age', data=nobel, row='category', aspect=2)
# The oldest winner of a Nobel Prize as of 2016
# ... YOUR CODE FOR TASK 10 ...
display(nobel.nlargest(1, 'age'))
# The youngest winner of a Nobel Prize as of 2016
# ... YOUR CODE FOR TASK 10 ...
nobel.nsmallest(1, 'age')
yearint64
categoryobject
885
2014
Peace
# The name of the youngest winner of the Nobel Prize as of 2016
youngest_winner = nobel.nsmallest(1, 'age')['full_name'].values[0].split()[0]
youngest_winner