# Import libraries needed for this assignment
import altair as alt
import pandas as pd
import numpy as np
alt.data_transformers.enable("default", max_rows=None)
#Read in the original data
pl_raw = pd.read_csv('data/power_lifting.csv')
pl_raw.info()
#data wrangling
pl= pl_raw.drop(columns= ['Squat4Kg', 'Bench4Kg', 'Deadlift4Kg', 'Age', 'MeetID'])
pl_clean = pl_raw.drop(columns= ['BodyweightKg','Division'])
#filter for raw lifters only and seperate by sex
pl_women = pl_clean[(pl_clean['Sex']== 'F') & (pl_clean['Equipment'] == 'Raw')].dropna(subset=['Wilks', 'WeightClassKg'])
pl_men = pl_clean[(pl_clean['Sex']== 'M') & (pl_clean['Equipment'] == 'Raw')].dropna(subset=['Wilks', 'WeightClassKg'])
#add columns for mean of lifts
women_bb = pl_women.groupby('WeightClassKg')['BestBenchKg'].transform('mean').sort_values()
women_bd= pl_women.groupby('WeightClassKg')['BestDeadliftKg'].transform('mean').sort_values()
women_sq= pl_women.groupby('WeightClassKg')['BestSquatKg'].transform('mean').sort_values()
pl_women_avs= pl_women.assign(best_dead_av= women_bd,
best_bench_av = women_bb,
best_squat_av = women_sq)
#add columns for mean of lifts
men_bb = pl_men.groupby('WeightClassKg')['BestBenchKg'].transform('mean').sort_values()
men_bd= pl_men.groupby('WeightClassKg')['BestDeadliftKg'].transform('mean').sort_values()
men_sq= pl_men.groupby('WeightClassKg')['BestSquatKg'].transform('mean').sort_values()
pl_men_avs= pl_men.assign(best_dead_av= men_bd,
best_bench_av = men_bb,
best_squat_av = men_sq)
pl_women.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 14493 entries, 3 to 105237
Data columns (total 15 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 MeetID 14493 non-null int64
1 Name 14493 non-null object
2 Sex 14493 non-null object
3 Equipment 14493 non-null object
4 Age 4248 non-null float64
5 WeightClassKg 14493 non-null object
6 Squat4Kg 42 non-null float64
7 BestSquatKg 11453 non-null float64
8 Bench4Kg 142 non-null float64
9 BestBenchKg 14027 non-null float64
10 Deadlift4Kg 191 non-null float64
11 BestDeadliftKg 12758 non-null float64
12 TotalKg 14493 non-null float64
13 Place 14492 non-null object
14 Wilks 14493 non-null float64
dtypes: float64(9), int64(1), object(5)
memory usage: 1.8+ MB
pl_men.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 32459 entries, 23 to 105235
Data columns (total 15 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 MeetID 32459 non-null int64
1 Name 32459 non-null object
2 Sex 32459 non-null object
3 Equipment 32459 non-null object
4 Age 9370 non-null float64
5 WeightClassKg 32459 non-null object
6 Squat4Kg 59 non-null float64
7 BestSquatKg 21700 non-null float64
8 Bench4Kg 346 non-null float64
9 BestBenchKg 31042 non-null float64
10 Deadlift4Kg 275 non-null float64
11 BestDeadliftKg 25325 non-null float64
12 TotalKg 32459 non-null float64
13 Place 32446 non-null object
14 Wilks 32459 non-null float64
dtypes: float64(9), int64(1), object(5)
memory usage: 4.0+ MB
#women mean totals
pl_women_avs["mean_total"] = pl_women_avs["best_squat_av"] + pl_women_avs["best_bench_av"]+ pl_women_avs["best_dead_av"]
womens_dist = alt.Chart(pl_women_avs).mark_bar(color='purple', size=20, strokeWidth=0.8, stroke="white", opacity=0.7).encode(
alt.X('mean_total:Q', title= 'Mean of Total Lifts in Kg', scale=alt.Scale(domain= [100, 700])),
alt.Y('count()', title ="Count")
).properties(title='Distribution of Womens\' Mean Lifts', width=500)
#mens
pl_men_avs["mean_total"] = pl_men_avs["best_squat_av"] + pl_men_avs["best_bench_av"]+ pl_men_avs["best_dead_av"]
mens_dist = alt.Chart(pl_men_avs).mark_bar(color='green', size=20, strokeWidth=0.8, stroke="white", opacity =0.7).encode(
alt.X('mean_total:Q', title='Mean of Total Lifts in Kg', scale=alt.Scale(domain= [100, 700])),
alt.Y('count()', title="Count")
).properties(title="Distribution of Mens\' Mean Lifts", width= 500)
womens_dist & mens_dist
#ladies first
pl_women = pl_women.sort_values(['WeightClassKg'])
wilks_wc = alt.Chart(pl_women).mark_bar().encode(
alt.X('WeightClassKg', title="Weight Class in Kg"),
alt.Y('mean(Wilks)', title= "Mean of Wilks Score"),
color='count()').properties(title='Womens Wilks Scores by Weight Class', width=550)
#dropdown
weightclass_f = sorted(pl_women['WeightClassKg'].unique())
dropdown_wc = alt.binding_select(name='Weight Class Kg Women', options=weightclass_f)
select_wc_f= alt.selection_single(fields=['WeightClassKg'], bind=dropdown_wc, init={'WeightClassKg':'74'})
wilks_wc_drop_f= wilks_wc.add_selection(select_wc_f).encode(
color=alt.value('purple'), opacity=alt.condition(select_wc_f, alt.value(0.8), alt.value(0.08)),
)
#men chart and dropdown
pl_men = pl_men.sort_values(['WeightClassKg'])
wilks_wc = alt.Chart(pl_men).mark_bar().encode(
alt.X('WeightClassKg', title='Weight Class in Kg'),
alt.Y('mean(Wilks)', title='Mean of Wilks Score'),
color='count()').properties(title='Mens Wilks Scores by Weight Class', width=550)
weightclass_m = sorted(pl_men['WeightClassKg'].unique())
dropdown_wc_m = alt.binding_select(name='Weight Class Kg Men', options=weightclass_m)
select_wc_m= alt.selection_single(fields=['WeightClassKg'], bind=dropdown_wc_m, init={'WeightClassKg':'74'})
wilks_wc_drop_m= wilks_wc.add_selection(select_wc_m).encode(
color=alt.value('green'), opacity=alt.condition(select_wc_m, alt.value(0.8), alt.value(0.08)),
)
wilks_wc_drop_f & wilks_wc_drop_m
#set up interaction and title
click = alt.selection_multi(on='click', empty='all', fields=['WeightClassKg'])
title_f = alt.TitleParams(
"Mean of Total in Kg for the Three Lifts for Women",
anchor='middle', fontSize=20, color= 'Purple')
#base chart
base = alt.Chart(pl_women_avs).properties(
width=550,
height=300
).add_selection(click)
#squat
squat_w= base.mark_point(size=55, color='purple').encode(
alt.X('mean(TotalKg):Q', scale=alt.Scale(zero=False), title= " "),
alt.Y('best_squat_av', title= " "),
tooltip=['WeightClassKg'],
opacity = alt.condition(click, alt.value(0.9), alt.value(0.2))).add_selection(click).properties(width=300, height=300, title="Best Squat Average")
#bench
bench_w =base.mark_point(size=55, color='purple').encode(
alt.X('mean(TotalKg):Q', scale=alt.Scale(zero=False), title= " "),
alt.Y('best_bench_av', title= " "),
tooltip=['WeightClassKg'],
opacity = alt.condition(click, alt.value(0.9), alt.value(0.2))).add_selection(click).properties(width=300, height=300, title="Best Bench Average")
#dead
dead_w= base.mark_point(size=55, color='purple').encode(
alt.X('mean(TotalKg):Q', scale=alt.Scale(zero=False), title= " "),
alt.Y('best_dead_av', title= " "),
tooltip=['WeightClassKg'],
opacity = alt.condition(click, alt.value(0.9), alt.value(0.2))).properties(width=300, height=300, title="Best Deadlift Average").add_selection(click)
#put them together
totals_f = (squat_w | bench_w | dead_w).properties(title= title_f)
totals_f
#define click
click = alt.selection_multi(on='click', empty='all', fields=['WeightClassKg'])
#title
title_m = alt.TitleParams(
"Mean of Total in Kg for the Three Lifts for Men",
anchor='middle', fontSize=20, color= 'Green')
#base men
base_m = alt.Chart(pl_men_avs).properties(
width=550,
height=300
).add_selection(click)
#squat
squat_m = base.mark_point(size=55, color='green').encode(
alt.X('mean(TotalKg):Q', scale=alt.Scale(zero=False), title= " "),
alt.Y('best_squat_av', title= " "),
tooltip=['WeightClassKg'],
opacity = alt.condition(click, alt.value(0.9), alt.value(0.2))).add_selection(click).properties(width=300, height=300, title="Best Squat Average")
#bench
bench_m = base.mark_point(size=55, color='green').encode(
alt.X('mean(TotalKg):Q', scale=alt.Scale(zero=False), title= " "),
alt.Y('best_bench_av', title= " "),
tooltip=['WeightClassKg'],
opacity = alt.condition(click, alt.value(0.9), alt.value(0.2))).add_selection(click).properties(width=300, height=300, title="Best Bench Average")
#dead
dead_m = base.mark_point(size=55, color='green').encode(
alt.X('mean(TotalKg):Q', scale=alt.Scale(zero=False), title= " "),
alt.Y('best_dead_av', title= " "),
tooltip=['WeightClassKg'],
opacity = alt.condition(click, alt.value(0.9), alt.value(0.2))).add_selection(click).properties(width=300, height=300, title="Best Deadlift Average")
#put them together
totals_m = (squat_m | bench_m | dead_m).properties(title= title_m)
totals_m
#define interaction
interval = alt.selection_single(encodings=['x'])
#rank
placements_f= (alt.Chart(pl_women).mark_bar(color='black').encode(
alt.X('Place',sort='descending', scale=alt.Scale(domain=(1, 2,3,4,5,6,7,8,9,10)), title= "Top 10 Placements for Female Atheletes"),
alt.Y('TotalKg', title="Total in Kg"),
opacity=alt.condition(interval,alt.value(0.8),alt.value(0.01))))
#wilks women
wilks_point_f = placements_f.mark_tick(color='purple').encode(alt.Y('Wilks')).properties(title="Wilks Scores in Relation to Female Athelete Placements")
wilks_place_f = (placements_f + wilks_point_f)
#bar women
bar_slider_f = wilks_place_f.encode(
alt.Y('count()', title="Count"),
color=alt.condition(interval,alt.value('purple'),alt.value('lightgray'))
).add_selection(interval).properties(height=100, width=600).properties(title='Click a Placement for More Details')
#combine
placements_combo_f = (wilks_place_f |bar_slider_f )
placements_combo_f
#define interaction
interval = alt.selection_single(encodings=['x'])
#ranks
placements_m= (alt.Chart(pl_men).mark_bar(color='black').encode(
alt.X('Place',sort='descending', scale=alt.Scale(domain=(1, 2,3,4,5,6,7,8,9,10)), title= "Top 10 Placements for Male Atheletes"),
alt.Y('TotalKg', title="Total in Kg"),
opacity=alt.condition(interval,alt.value(0.8),alt.value(0.01))))
#wilks men
wilks_point_m = placements_m.mark_tick(color='green').encode(alt.Y('Wilks')).properties(title='Wilks Scores in Relation to Male Athelete Placements' )
wilks_place = (placements_m + wilks_point_m)
#bar men
bar_slider_m = wilks_place.encode(
alt.Y('count()', title="Count"),
color=alt.condition(interval,alt.value('green'),alt.value('lightgray'))
).add_selection(interval).properties(height=100, width=600).properties(title='Click a Placement for More Details')
#combine
placements_combo_m = (wilks_place | bar_slider_m )
placements_combo_m
%%shell
jupyter nbconvert --to html /content/reportfinal.ipynb
[NbConvertApp] Converting notebook /content/reportfinal.ipynb to html
[NbConvertApp] Writing 59666658 bytes to /content/reportfinal.html
#define parts
dashboard_0 = (womens_dist | mens_dist)
dashboard_1= (wilks_place_f | (bar_slider_f & bar_slider_m) | wilks_place)
dashboard_2 =(wilks_wc_drop_f | wilks_wc_drop_m)
dashboard_3 =(totals_f & totals_m)
#combine
dashboard= dashboard_0 & dashboard_3 & dashboard_1 & dashboard_2
dashboard