import pandas as pd
# import seaborn as sns
# import numpy as np
from scipy.stats import pearsonr, spearmanr
import plotly_express as px
import plotly
import plotly.graph_objects as go
import re
# import statsmodels
# import math
df = pd.read_csv('rawdata.csv')
PGY_LEVEL = 'pgy_level'
OPERATION = 'operation'
SCORE = 'score'
PGY_DICT = {
    'PGY1'                   : 1,
    'PGY2'                   : 2,
    'PGY3'                   : 3,
    'Research Year Resident' : 4,
    'PGY4'                   : 5,
    'PGY5'                   : 6
}
PGY_REVERSE_DICT = {
    1: 'PGY1',
    2: 'PGY2',
    3: 'PGY3',
    4: 'Research',
    5: 'PGY4',
    6: 'PGY5'
}
PGY_DOUBLE_REVERESE_DICT = {
    'PGY1'                   : 1,
    'PGY2'                   : 2,
    'PGY3'                   : 3,
    'Research'               : 4,
    'PGY4'                   : 5,
    'PGY5'                   : 6
}
PORT_SCORE_DICT = {
    'Observation Only'      : 1,
    'Direct Supervision'    : 2,
    'Indirect Supervision'  : 3,
    'Unsupervised Practice' : 4,
    'Supervising Others'    : 5
}
THRESHOLD = 4
df.replace({PGY_LEVEL: PGY_DICT}, inplace=True)
proc_cols = list(new_cols.values())[2:]
for col in proc_cols:
    df.replace({col : PORT_SCORE_DICT}, inplace=True)
df
## Set a Color
R1 = '69'
B1 = '145'
G1 = '226'
## Set a Color
R2 = '15'
B2 = '149'
G2 = '215'
## Set a Color
R3 = '174'
B3 = '214'
G3 = '234'
## Set a Color
R4 = '243'
B4 = '243'
G4 = '243'
## Set a Color
R5 = '190'
B5 = '190'
G5 = '190'
darkblue = 'rgba(' + R2 + ', ' + B2 + ', ' + G2 + ', 1.0)'
lightblue = 'rgba(' + R3 + ', ' + B3 + ', ' + G3 + ', 1.0)'
lightgray = 'rgba(' + R4 + ', ' + B4 + ', ' + G4 + ', 1.0)'
darkgray = 'rgba(' + R5 + ', ' + B5 + ', ' + G5 + ', 1.0)'
# solid_color = 'rgba(' + R + ', ' + B + ', ' + G + ', 1.0)'
# bg_color = 'rgba(' + R + ', ' + B + ', ' + G + ', 0.2)'
no_color = 'rgba(' + R1 + ', ' + B1 + ', ' + G1 + ', 0.0)'
df_response = df.groupby(PGY_LEVEL).agg(
    responses = ('Timestamp', 'count')
)
df_response.reset_index(inplace=True)
df_response.replace({PGY_LEVEL: PGY_REVERSE_DICT}, inplace=True)
df_response['total'] = [7, 7, 8, 11, 6, 7]
df_response['percent'] = ((df_response['responses']/df_response['total']).round(2)*100).astype(int).astype(str) + '%'
df_response
df_melt = pd.melt(df, id_vars=PGY_LEVEL, value_vars=df.columns[2:], var_name='operation', value_name='score')
df_melt.sort_values(by=[OPERATION, PGY_LEVEL], inplace=True)
# print(df)
df_melt = df_melt.dropna()
df_melt = df_melt.reset_index(drop=True)
df_melt
# df.to_csv('rawscores.csv')
dict = {OPERATION:[],'correlation':[],'pvalue':[]}
for name, group in df_melt.groupby('operation'):
    corr, pvalue = spearmanr(group[PGY_LEVEL], group[SCORE])
    dict['operation'].append(name)
    dict['correlation'].append(corr)
    dict['pvalue'].append(pvalue)
corr_df = pd.DataFrame(dict)
corr_df['correlation'] = corr_df['correlation'].round(2)
corr_df['p-value'] = corr_df.apply(lambda x: '{:.2e}'.format(x['pvalue']), axis=1)
# corr_df['p-value'] = corr_df.apply(lambda x: round(x['pvalue'], 3), axis=1)
# corr_df.drop('pvalue', inplace=True, axis=1)
corr_df
df_count_per_score = df_melt.groupby([OPERATION, PGY_LEVEL, SCORE]).agg(
    count_per_score = ('score', 'count')
)
df_count_per_score.reset_index(inplace=True)
df_count_per_score
df_scatter = pd.merge(df_melt, df_count_per_score, on=[OPERATION, PGY_LEVEL, SCORE])
df_scatter
fig = px.scatter(df_scatter, x=PGY_LEVEL, y=SCORE, trendline='ols', size='count_per_score',
                 facet_col=OPERATION,
                 facet_col_wrap=4,
                 facet_row_spacing=0.04,
                 facet_col_spacing=0.02,
                 color_discrete_sequence=[darkblue],
                 opacity=1
                 )
fig.update_layout(title =
                  'Correlation between PGY Level and Operative Comfort',
                  xaxis_title = 'PGY Level', yaxis_title = 'score',
                  width = 1200, height = 1600,
                  font_family='Inter',
                  plot_bgcolor=lightgray)
fig.update_traces(marker_line_width=0)
                  
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[1]))
fig.update_xaxes(tickvals=[1,2,3,4,5,6], ticktext=['1', '2', '3', 'R', '4', '5']),
fig.update_yaxes(tickvals=[1, 2, 3, 4, 5], ticktext=['OB', 'DS', 'IS', 'UP', 'SO'])
fig.show()
fig.write_image('fig_scatter.svg')
df_scatter
df_stats = df_melt.groupby([OPERATION, PGY_LEVEL]).agg(
    num_residents   = ('score', 'count'),
    min             = ('score', 'min'),
    mean            = ('score', 'mean'),
    median          = ('score', 'median'),
    max             = ('score', 'max'),
    stdev           = ('score', 'std')
)
df_stats.reset_index(inplace=True)
df_stats['mean'] = df_stats['mean'].round(2)
df_stats['stdev'] = df_stats['stdev'].round(2)
df_stats['min'] = df_stats['min'].astype(int)
df_stats['max'] = df_stats['max'].astype(int)
df_stats['range'] = df_stats['max']-df_stats['min']
df_stats.sort_values(by=['operation', 'pgy_level'], ascending=False, inplace=True)
df_stats.replace({PGY_LEVEL: PGY_REVERSE_DICT}, inplace=True)
df_stats
df_stats = df_melt.groupby([OPERATION, PGY_LEVEL]).agg(
    num_residents   = ('score', 'count'),
    min             = ('score', 'min'),
    # mean            = ('score', 'mean'),
    median          = ('score', 'median'),
    max             = ('score', 'max'),
    # stdev           = ('score', 'std')
)
df_stats.reset_index(inplace=True)
# df_stats['mean'] = df_stats['mean'].round(2)
# df_stats['stdev'] = df_stats['stdev'].round(2)
# df_stats['min'] = df_stats['min'].astype(int)
# df_stats['max'] = df_stats['max'].astype(int)
df_stats['range'] = df_stats['max']-df_stats['min']
df_stats.sort_values(by=['operation', 'pgy_level'], ascending=True, inplace=True)
df_stats.replace({PGY_LEVEL: PGY_REVERSE_DICT}, inplace=True)
df_stats
# fig = px.line(df_stats, x=PGY_LEVEL, y='min', facet_col=OPERATION, facet_col_wrap=4,
#               facet_row_spacing=0.04,  # default is 0.07 when facet_col_wrap is used
#               facet_col_spacing=0.02,  # default is 0.03`
#               )
# fig.update_traces(line_color=no_color)
# for t in fig.data:
#     row = t['xaxis']
#     col = t['yaxis']
#     proc = t['hovertemplate'].strip('operation=')
#     proc = re.sub('(<br>.+)', '', proc)
#     dff = df_stats.loc[df_stats['operation'] == proc]
#     fig.add_trace(go.Scatter(x=dff[PGY_LEVEL],
#                              y=dff['max'],
#                              xaxis=row,
#                              yaxis=col,
#                              fill='tonexty',
#                              fillcolor=lightblue,
#                              line_color=no_color))
#     fig.add_trace(go.Scatter(x=dff[PGY_LEVEL],
#                              y=dff['min'],
#                              xaxis=row,
#                              yaxis=col,
#                              fill='tonexty',
#                              fillcolor=lightblue,
#                              line_color=no_color))
#     fig.add_trace(go.Scatter(x=dff[PGY_LEVEL],
#                              y=dff['mean'],
#                              xaxis=row,
#                              yaxis=col,
#                              line_color=darkblue,
#                              line_width=4))
# fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[1]))
# fig.update_xaxes(tickvals=[1,2,3,4,5,6], ticktext=['1', '2', '3', 'R', '4', '5']),
# fig.update_yaxes(tickvals=[1, 2, 3, 4, 5], ticktext=['OB', 'DS', 'IS', 'UP', 'SO'])
# fig.add_hline(y=4, line_dash="dot")
# fig.update_layout(title =
#                   'Operative Comfort By PGY Level',
#                   xaxis_title = 'PGY Level', yaxis_title = 'score',
#                   width = 1200, height = 1600,
#                   font_family='Inter',
#                   showlegend=False,
#                   plot_bgcolor=lightgray)
# fig.write_image("op_readiness_curves.svg")
# fig.show()
fig = px.line(df_stats, x=PGY_LEVEL, y='min', facet_col=OPERATION, facet_col_wrap=4,
              facet_row_spacing=0.04,  # default is 0.07 when facet_col_wrap is used
              facet_col_spacing=0.02,  # default is 0.03`
              )
fig.update_traces(line_color=no_color)
for t in fig.data:
    row = t['xaxis']
    col = t['yaxis']
    proc = t['hovertemplate'].strip('operation=')
    proc = re.sub('(<br>.+)', '', proc)
    dff = df_stats.loc[df_stats['operation'] == proc]
    fig.add_trace(go.Scatter(x=dff[PGY_LEVEL],
                             y=dff['max'],
                             xaxis=row,
                             yaxis=col,
                             fill='tonexty',
                             fillcolor=lightblue,
                             line_color=no_color))
    fig.add_trace(go.Scatter(x=dff[PGY_LEVEL],
                             y=dff['min'],
                             xaxis=row,
                             yaxis=col,
                             fill='tonexty',
                             fillcolor=lightblue,
                             line_color=no_color))
    fig.add_trace(go.Scatter(x=dff[PGY_LEVEL],
                             y=dff['median'],
                             xaxis=row,
                             yaxis=col,
                             line_color=darkblue,
                             line_width=4))
fig.for_each_annotation(lambda a: a.update(text=a.text.split("=")[1]))
fig.update_xaxes(tickvals=['PGY1', 'PGY2', 'PGY3', 'Research', 'PGY4','PGY5'], ticktext=['1', '2', '3', 'R', '4', '5']),
fig.update_yaxes(tickvals=[1, 2, 3, 4, 5], ticktext=['OB', 'DS', 'IS', 'UP', 'SO'])
fig.add_hline(y=4, line_dash="dot")
fig.update_layout(title =
                  'Operative Comfort By PGY Level',
                  xaxis_title = 'PGY Level', yaxis_title = 'score',
                  width = 1200, height = 1600,
                  font_family='Inter',
                  showlegend=False,
                  plot_bgcolor=lightgray)
fig.write_image("op_readiness_curves.svg")
fig.show()
# THRESHOLD = 4
df_stats_reverse = df_stats
df_stats_reverse
df_stats_reverse.replace({PGY_LEVEL: PGY_DOUBLE_REVERESE_DICT}, inplace=True)
df_stats_reverse
df_threshold = {OPERATION : [], 'threshold_by': []}
for name, group in df_stats_reverse.groupby(OPERATION):
    for index, row in group.iterrows():
        if row['median'] >= THRESHOLD:
            if row[PGY_LEVEL] == 4:
                level = row[PGY_LEVEL] + 1
            else:
                level = row[PGY_LEVEL]
            break
        else:
            level = 7
    df_threshold[OPERATION].append(name)
    df_threshold['threshold_by'].append(level)
df_threshold = pd.DataFrame(df_threshold)
df_threshold.sort_values(by=['threshold_by', 'operation'], ascending=False, inplace=True)
df_threshold
fig = px.bar(df_threshold, x='threshold_by', y=OPERATION, orientation='h', text_auto=True)
fig.update_layout(title =
                  'Achievement of Unsupervised Practice by Operation',
                  xaxis_title = PGY_LEVEL, yaxis_title = SCORE,
                  width = 1200, height = 800,
                  font_family='Inter',
                  showlegend=False)
fig.update_xaxes(tickvals=[1,2,3,4,5,6,7], ticktext=['PGY1', 'PGY2', 'PGY3', 'Research', 'PGY4', 'PGY5', 'Not Reached'])
fig.write_image('threshold.svg')
fig.update_layout(title =
                  'PGY Level at Which Unsupervised Practice Threshold is Reached',
                  xaxis_title = 'PGY Level', yaxis_title = 'Activity',
                  width = 1200, height = 1600,
                  font_family='Inter',
                  showlegend=False)
fig.show()
SELECT *
FROM 'rawdata.csv'