import pandas as pd
# import seaborn as sns
# import numpy as np
from scipy.stats import pearsonr, spearmanr
import plotly_express as px
import plotly
import plotly.graph_objects as go
import re
# import statsmodels
# import math
df = pd.read_csv('rawdata.csv')
PGY_LEVEL = 'pgy_level'
OPERATION = 'operation'
SCORE = 'score'
PGY_DICT = {
'PGY1' : 1,
'PGY2' : 2,
'PGY3' : 3,
'Research Year Resident' : 4,
'PGY4' : 5,
'PGY5' : 6
}
PGY_REVERSE_DICT = {
1: 'PGY1',
2: 'PGY2',
3: 'PGY3',
4: 'Research',
5: 'PGY4',
6: 'PGY5'
}
PGY_DOUBLE_REVERESE_DICT = {
'PGY1' : 1,
'PGY2' : 2,
'PGY3' : 3,
'Research' : 4,
'PGY4' : 5,
'PGY5' : 6
}
PORT_SCORE_DICT = {
'Observation Only' : 1,
'Direct Supervision' : 2,
'Indirect Supervision' : 3,
'Unsupervised Practice' : 4,
'Supervising Others' : 5
}
THRESHOLD = 4
df.replace({PGY_LEVEL: PGY_DICT}, inplace=True)
proc_cols = list(new_cols.values())[2:]
for col in proc_cols:
df.replace({col : PORT_SCORE_DICT}, inplace=True)
df
## Set a Color
R1 = '69'
B1 = '145'
G1 = '226'
## Set a Color
R2 = '15'
B2 = '149'
G2 = '215'
## Set a Color
R3 = '174'
B3 = '214'
G3 = '234'
## Set a Color
R4 = '243'
B4 = '243'
G4 = '243'
## Set a Color
R5 = '190'
B5 = '190'
G5 = '190'
darkblue = 'rgba(' + R2 + ', ' + B2 + ', ' + G2 + ', 1.0)'
lightblue = 'rgba(' + R3 + ', ' + B3 + ', ' + G3 + ', 1.0)'
lightgray = 'rgba(' + R4 + ', ' + B4 + ', ' + G4 + ', 1.0)'
darkgray = 'rgba(' + R5 + ', ' + B5 + ', ' + G5 + ', 1.0)'
# solid_color = 'rgba(' + R + ', ' + B + ', ' + G + ', 1.0)'
# bg_color = 'rgba(' + R + ', ' + B + ', ' + G + ', 0.2)'
no_color = 'rgba(' + R1 + ', ' + B1 + ', ' + G1 + ', 0.0)'
df_response = df.groupby(PGY_LEVEL).agg(
responses = ('Timestamp', 'count')
)
df_response.reset_index(inplace=True)
df_response.replace({PGY_LEVEL: PGY_REVERSE_DICT}, inplace=True)
df_response['total'] = [7, 7, 8, 11, 6, 7]
df_response['percent'] = ((df_response['responses']/df_response['total']).round(2)*100).astype(int).astype(str) + '%'
df_response
df_melt = pd.melt(df, id_vars=PGY_LEVEL, value_vars=df.columns[2:], var_name='operation', value_name='score')
df_melt.sort_values(by=[OPERATION, PGY_LEVEL], inplace=True)
# print(df)
df_melt = df_melt.dropna()
df_melt = df_melt.reset_index(drop=True)
df_melt
# df.to_csv('rawscores.csv')
dict = {OPERATION:[],'correlation':[],'pvalue':[]}
for name, group in df_melt.groupby('operation'):
corr, pvalue = spearmanr(group[PGY_LEVEL], group[SCORE])
dict['operation'].append(name)
dict['correlation'].append(corr)
dict['pvalue'].append(pvalue)
corr_df = pd.DataFrame(dict)
corr_df['correlation'] = corr_df['correlation'].round(2)
corr_df['p-value'] = corr_df.apply(lambda x: '{:.2e}'.format(x['pvalue']), axis=1)
# corr_df['p-value'] = corr_df.apply(lambda x: round(x['pvalue'], 3), axis=1)
# corr_df.drop('pvalue', inplace=True, axis=1)
corr_df
df_count_per_score = df_melt.groupby([OPERATION, PGY_LEVEL, SCORE]).agg(
count_per_score = ('score', 'count')
)
df_count_per_score.reset_index(inplace=True)
df_count_per_score
df_scatter = pd.merge(df_melt, df_count_per_score, on=[OPERATION, PGY_LEVEL, SCORE])
df_scatter
fig = px.scatter(df_scatter, x=PGY_LEVEL, y=SCORE, trendline='ols', size='count_per_score',
facet_col=OPERATION,
facet_col_wrap=4,
facet_row_spacing=0.04,
facet_col_spacing=0.02,
color_discrete_sequence=[darkblue],
opacity=1
)
fig.update_layout(title =
'Correlation between PGY Level and Operative Comfort',
xaxis_title = 'PGY Level', yaxis_title = 'score',
width = 1200, height = 1600,
font_family='Inter',
plot_bgcolor=lightgray)
fig.update_traces(marker_line_width=0)
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[1]))
fig.update_xaxes(tickvals=[1,2,3,4,5,6], ticktext=['1', '2', '3', 'R', '4', '5']),
fig.update_yaxes(tickvals=[1, 2, 3, 4, 5], ticktext=['OB', 'DS', 'IS', 'UP', 'SO'])
fig.show()
fig.write_image('fig_scatter.svg')
df_scatter
df_stats = df_melt.groupby([OPERATION, PGY_LEVEL]).agg(
num_residents = ('score', 'count'),
min = ('score', 'min'),
mean = ('score', 'mean'),
median = ('score', 'median'),
max = ('score', 'max'),
stdev = ('score', 'std')
)
df_stats.reset_index(inplace=True)
df_stats['mean'] = df_stats['mean'].round(2)
df_stats['stdev'] = df_stats['stdev'].round(2)
df_stats['min'] = df_stats['min'].astype(int)
df_stats['max'] = df_stats['max'].astype(int)
df_stats['range'] = df_stats['max']-df_stats['min']
df_stats.sort_values(by=['operation', 'pgy_level'], ascending=False, inplace=True)
df_stats.replace({PGY_LEVEL: PGY_REVERSE_DICT}, inplace=True)
df_stats
df_stats = df_melt.groupby([OPERATION, PGY_LEVEL]).agg(
num_residents = ('score', 'count'),
min = ('score', 'min'),
# mean = ('score', 'mean'),
median = ('score', 'median'),
max = ('score', 'max'),
# stdev = ('score', 'std')
)
df_stats.reset_index(inplace=True)
# df_stats['mean'] = df_stats['mean'].round(2)
# df_stats['stdev'] = df_stats['stdev'].round(2)
# df_stats['min'] = df_stats['min'].astype(int)
# df_stats['max'] = df_stats['max'].astype(int)
df_stats['range'] = df_stats['max']-df_stats['min']
df_stats.sort_values(by=['operation', 'pgy_level'], ascending=True, inplace=True)
df_stats.replace({PGY_LEVEL: PGY_REVERSE_DICT}, inplace=True)
df_stats
# fig = px.line(df_stats, x=PGY_LEVEL, y='min', facet_col=OPERATION, facet_col_wrap=4,
# facet_row_spacing=0.04, # default is 0.07 when facet_col_wrap is used
# facet_col_spacing=0.02, # default is 0.03`
# )
# fig.update_traces(line_color=no_color)
# for t in fig.data:
# row = t['xaxis']
# col = t['yaxis']
# proc = t['hovertemplate'].strip('operation=')
# proc = re.sub('(<br>.+)', '', proc)
# dff = df_stats.loc[df_stats['operation'] == proc]
# fig.add_trace(go.Scatter(x=dff[PGY_LEVEL],
# y=dff['max'],
# xaxis=row,
# yaxis=col,
# fill='tonexty',
# fillcolor=lightblue,
# line_color=no_color))
# fig.add_trace(go.Scatter(x=dff[PGY_LEVEL],
# y=dff['min'],
# xaxis=row,
# yaxis=col,
# fill='tonexty',
# fillcolor=lightblue,
# line_color=no_color))
# fig.add_trace(go.Scatter(x=dff[PGY_LEVEL],
# y=dff['mean'],
# xaxis=row,
# yaxis=col,
# line_color=darkblue,
# line_width=4))
# fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[1]))
# fig.update_xaxes(tickvals=[1,2,3,4,5,6], ticktext=['1', '2', '3', 'R', '4', '5']),
# fig.update_yaxes(tickvals=[1, 2, 3, 4, 5], ticktext=['OB', 'DS', 'IS', 'UP', 'SO'])
# fig.add_hline(y=4, line_dash="dot")
# fig.update_layout(title =
# 'Operative Comfort By PGY Level',
# xaxis_title = 'PGY Level', yaxis_title = 'score',
# width = 1200, height = 1600,
# font_family='Inter',
# showlegend=False,
# plot_bgcolor=lightgray)
# fig.write_image("op_readiness_curves.svg")
# fig.show()
fig = px.line(df_stats, x=PGY_LEVEL, y='min', facet_col=OPERATION, facet_col_wrap=4,
facet_row_spacing=0.04, # default is 0.07 when facet_col_wrap is used
facet_col_spacing=0.02, # default is 0.03`
)
fig.update_traces(line_color=no_color)
for t in fig.data:
row = t['xaxis']
col = t['yaxis']
proc = t['hovertemplate'].strip('operation=')
proc = re.sub('(<br>.+)', '', proc)
dff = df_stats.loc[df_stats['operation'] == proc]
fig.add_trace(go.Scatter(x=dff[PGY_LEVEL],
y=dff['max'],
xaxis=row,
yaxis=col,
fill='tonexty',
fillcolor=lightblue,
line_color=no_color))
fig.add_trace(go.Scatter(x=dff[PGY_LEVEL],
y=dff['min'],
xaxis=row,
yaxis=col,
fill='tonexty',
fillcolor=lightblue,
line_color=no_color))
fig.add_trace(go.Scatter(x=dff[PGY_LEVEL],
y=dff['median'],
xaxis=row,
yaxis=col,
line_color=darkblue,
line_width=4))
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[1]))
fig.update_xaxes(tickvals=['PGY1', 'PGY2', 'PGY3', 'Research', 'PGY4','PGY5'], ticktext=['1', '2', '3', 'R', '4', '5']),
fig.update_yaxes(tickvals=[1, 2, 3, 4, 5], ticktext=['OB', 'DS', 'IS', 'UP', 'SO'])
fig.add_hline(y=4, line_dash="dot")
fig.update_layout(title =
'Operative Comfort By PGY Level',
xaxis_title = 'PGY Level', yaxis_title = 'score',
width = 1200, height = 1600,
font_family='Inter',
showlegend=False,
plot_bgcolor=lightgray)
fig.write_image("op_readiness_curves.svg")
fig.show()
# THRESHOLD = 4
df_stats_reverse = df_stats
df_stats_reverse
df_stats_reverse.replace({PGY_LEVEL: PGY_DOUBLE_REVERESE_DICT}, inplace=True)
df_stats_reverse
df_threshold = {OPERATION : [], 'threshold_by': []}
for name, group in df_stats_reverse.groupby(OPERATION):
for index, row in group.iterrows():
if row['median'] >= THRESHOLD:
if row[PGY_LEVEL] == 4:
level = row[PGY_LEVEL] + 1
else:
level = row[PGY_LEVEL]
break
else:
level = 7
df_threshold[OPERATION].append(name)
df_threshold['threshold_by'].append(level)
df_threshold = pd.DataFrame(df_threshold)
df_threshold.sort_values(by=['threshold_by', 'operation'], ascending=False, inplace=True)
df_threshold
fig = px.bar(df_threshold, x='threshold_by', y=OPERATION, orientation='h', text_auto=True)
fig.update_layout(title =
'Achievement of Unsupervised Practice by Operation',
xaxis_title = PGY_LEVEL, yaxis_title = SCORE,
width = 1200, height = 800,
font_family='Inter',
showlegend=False)
fig.update_xaxes(tickvals=[1,2,3,4,5,6,7], ticktext=['PGY1', 'PGY2', 'PGY3', 'Research', 'PGY4', 'PGY5', 'Not Reached'])
fig.write_image('threshold.svg')
fig.update_layout(title =
'PGY Level at Which Unsupervised Practice Threshold is Reached',
xaxis_title = 'PGY Level', yaxis_title = 'Activity',
width = 1200, height = 1600,
font_family='Inter',
showlegend=False)
fig.show()
SELECT *
FROM 'rawdata.csv'