# Run this cell to start.
import numpy as np
import pandas as pd
# Safe settings for Pandas.
pd.set_option('mode.chained_assignment', 'raise')
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
# Load the OKpy test library and tests.
from client.api.notebook import Notebook
ok = Notebook('titanic.ok')
titanic = pd.read_csv('titanic_stlearn.csv')
titanic.head()
# Test you are on the right track.
_ = ok.grade('q_01_titanic')
gender_by_survived = pd.crosstab( (titanic['gender']), (titanic['survived']))
# Show the table in the notebook
gender_by_survived
# Check you are on the right track.
_ = ok.grade('q_02_gender_by_survived')
gender_by_survived_p = pd.crosstab((titanic['gender']), (titanic['survived']), normalize= 'index')
# Show the table in the notebook
gender_by_survived_p
# Check you are on the right track.
_ = ok.grade('q_03_gender_by_survived_p')
# Run this cell.
mwc = titanic['gender'].copy()
mwc.head()
#- Your code here.
# Show the unique values and counts for the "mwc" Series.
is_child= titanic['age']<15
mwc[is_child] = "child"
mwc.value_counts()
_ = ok.grade('q_04_mwc')
mwc_by_survived_p = pd.crosstab(mwc, (titanic['survived']), normalize='index')
mwc_by_survived_p
_ = ok.grade('q_05_mwc_p')
# Run this cell.
titanic['class'].value_counts()
# Run this cell to create example row classification function
def classify_mf_child(row):
if row.loc['age'] >= 15:
return 'adult'
if row.loc['gender'] == 'female':
return 'female child'
return 'male child'
classify_mf_child(titanic.iloc[0])
classify_mf_child(titanic.iloc[1])
mf_child = titanic.apply(classify_mf_child, axis='columns')
mf_child.head()
a = 'Bah humbug'
'humbug' in a
pd.isna(np.nan)
def classify_role(row):
if "victualling" in row.loc['class']:
return "catering"
elif "restaurant" in row.loc['class']:
return "catering"
elif "engineering" in row.loc['class']:
return "engineering"
elif "deck" in row.loc['class']:
return "deck"
elif "Andrews, Mr. Thomas" in row.loc["name"]:
return "guarantee"
elif "Campbell, Mr. William Henry" in row.loc["name"]:
return "guarantee"
elif "Chisholm, Mr." in row.loc["name"]:
return "guarantee"
elif "Cunningham, Mr." in row.loc["name"]:
return "guarantee"
elif "Frost, Mr." in row.loc["name"]:
return "guarantee"
elif "Knight, Mr." in row.loc["name"]:
return "guarantee"
elif "Parkes, Mr." in row.loc["name"]:
return "guarantee"
elif "Parr, Mr." in row.loc["name"]:
return "guarantee"
elif "Watson, Mr." in row.loc["name"]:
return "guarantee"
elif "Brailey, Mr." in row.loc["name"]:
return "musician"
elif "Bricoux, Mr." in row.loc["name"]:
return "musician"
elif "Clarke, Mr." in row.loc["name"]:
return "musician"
elif "Hartley, Mr." in row.loc["name"]:
return "musician"
elif "Hume, Mr." in row.loc["name"]:
return "musician"
elif "Kins, Mr." in row.loc["name"]:
return "musician"
elif "Taylor, Mr. Percy" in row.loc["name"]:
return "musician"
elif "Woodward, Mr." in row.loc["name"]:
return "musician"
elif "3" in row.loc["class"]:
return "3rd"
elif "2" in row.loc["class"]:
return "2nd"
else:
return "1st"
print(classify_role(titanic.iloc[0])) # Should show '3rd'
print(classify_role(titanic.iloc[6])) # Should show '2nd'
print(classify_role(titanic.iloc[-1])) # Should show 'catering'
print(classify_role(titanic.iloc[-3])) # Should show 'engineering'
print(classify_role(titanic.iloc[-4])) # Should show 'catering'
print(classify_role(titanic.iloc[-5])) # Should show 'deck'
is_brailey = titanic['name'].str.startswith('Brailey')
print(classify_role(titanic[is_brailey].iloc[0])) # Should show 'musician'
is_andrews = titanic['name'] == 'Andrews, Mr. Thomas'
print(classify_role(titanic[is_andrews].iloc[0])) # Should show 'guarantee'
# This test runs the tests above, and some extra besides.
_ = ok.grade('q_06_classify_role')
role_by_survived= titanic.apply(classify_role, axis= 'columns')
male_p= titanic['gender']== "male"
survived= titanic['survived']
role_by_survived_p = pd.crosstab(role_by_survived, (survived[male_p]), normalize= 'index')
role_by_survived_p
# For your convenience, you can run this cell to run all the tests at once!
import os
_ = [ok.grade(q[:-3]) for q in os.listdir("tests") if q.startswith('q')]