Clipboard Health

import pandas as pd pd.options.mode.chained_assignment = None # default='warn'

shifts_df = pd.read_csv("data/Chicago MSA Shifts.csv") hcp_df = pd.read_csv("data/Chicago HCPs Active in October 2021.csv") cancellations_df = pd.read_csv("data/Chicago Cancel Logs October 2021.csv")

cancellations_df['Cancel Date'] = pd.to_datetime(cancellations_df['Created At']).dt.date hcp_worker_and_facility_cancels = cancellations_df[cancellations_df["Action"].isin(["WORKER_CANCEL", "NO_CALL_NO_SHOW", "FACILITY_CANCEL_FILLED"])] hcp_all_worker_cancels = cancellations_df[cancellations_df["Action"].isin(["WORKER_CANCEL", "NO_CALL_NO_SHOW"])] hcp_soft_cancels = cancellations_df[cancellations_df["Action"] == "WORKER_CANCEL"] hcp_hard_cancels = cancellations_df[cancellations_df["Action"] == "NO_CALL_NO_SHOW"] hcf_cancels = cancellations_df[cancellations_df["Action"] == "FACILITY_CANCEL_FILLED"]

print("Start date: " + cancellations_df.sort_values(by=['Created At']).iloc[0]['Created At']) print("End date: " + cancellations_df.sort_values(by=['Created At']).iloc[-1]['Created At'])

shift_count = shifts_df['ID'].nunique() print("Unique shifts: " + str(shift_count)) # Unique shifts def print_cancel_rate(df, title): rate = round(df['Shift ID'].nunique() / shift_count * 100, 1) print(title + ": " + str(rate) + "%") print_cancel_rate(hcp_worker_and_facility_cancels, "Total cancellation rate (worker + facility)") print_cancel_rate(hcp_all_worker_cancels, "Worker cancellation rate (normal + no call no show)") print_cancel_rate(hcp_hard_cancels, "Worker no call no show cancellation rate") print_cancel_rate(hcf_cancels, "Facility cancellation rate")

cancels_per_shift = cancellations_df.groupby('Shift ID').size() cancels_per_shift = pd.DataFrame({'Shift ID':cancels_per_shift.index, 'count':cancels_per_shift.values}) shifts_w_multiple_cancels = cancels_per_shift[cancels_per_shift['count'] > 1] # About 10% of all shifts with cancels have multiple cancels cancellations_df[cancellations_df['Shift ID'] == '615738bcfe2d1a018293eb3e'] # Example of shift with multiple cancels cancellations_df[cancellations_df['Shift ID'].isin(shifts_w_multiple_cancels['Shift ID'])].sort_values(by=['Shift ID']) # List of all shifts with multiple cancels

hcp_all_worker_cancels.groupby('Cancel Date').size().plot(figsize=(12,3), title='All worker cancellations (normal + no show no call)')

hcf_cancels.groupby('Cancel Date').size().plot(figsize=(12,3), title='Facility cancellations')

hcp_soft_cancels['Start'] = pd.to_datetime(hcp_soft_cancels['Start']) hcp_soft_cancels['Created At'] = pd.to_datetime(hcp_soft_cancels['Created At']) hcp_soft_cancels['Notice'] = (hcp_soft_cancels['Start'] - hcp_soft_cancels['Created At']).dt.days hcp_cancels_groupby = hcp_soft_cancels.groupby('Notice').size() hcp_cancels_groupby[0] += hcp_hard_cancels['Shift ID'].nunique() # Add no call no shows hcp_cancels_groupby.plot(figsize=(12,3), title='Worker cancellations by notice days', kind='bar') rate = round(hcp_cancels_groupby[0] / shift_count * 100, 1) print("Worker cancel rate with less than 24h notice: " + str(rate) + "% of all shifts")

hcp_soft_cancels_same_day = hcp_soft_cancels[hcp_soft_cancels['Notice'] < 1] hcp_soft_cancels_same_day['Notice Hours'] = (hcp_soft_cancels_same_day['Start'] - hcp_soft_cancels_same_day['Created At']).dt.total_seconds()/(60*60) hcp_soft_cancels_same_day['Notice Hours'] = hcp_soft_cancels_same_day['Notice Hours'].round() hcp_cancels_same_day_groupby = hcp_soft_cancels_same_day.groupby('Notice Hours').size() hcp_cancels_same_day_groupby[0] += hcp_hard_cancels['Shift ID'].nunique() # Add no call no shows hcp_cancels_same_day_groupby.plot(figsize=(12,3), title='Worker same day cancellations by notice hours', kind='bar') less_than_6h_count = sum([hcp_cancels_same_day_groupby[x] for x in range(7)]) rate = round(less_than_6h_count / shift_count * 100, 1) print("Worker cancel rate with less than 6h notice: " + str(rate) + "% of all shifts")

hcf_cancels['Start dt'] = pd.to_datetime(hcf_cancels['Start']) hcf_cancels['Created At dt'] = pd.to_datetime(hcf_cancels['Created At']) hcf_cancels['Notice'] = (hcf_cancels['Start dt'] - hcf_cancels['Created At dt']).dt.days hcf_cancels_groupby = hcf_cancels.groupby('Notice').size() less_than_24h_count = sum([hcf_cancels_groupby[x] for x in [-26, -1, 0]]) hcf_cancels_groupby[0] = less_than_24h_count del[hcf_cancels_groupby[-1]] del[hcf_cancels_groupby[-26]] hcf_cancels_groupby.plot(figsize=(12,3), title='HCF cancellations by notice days', kind='bar') rate = round(hcf_cancels_groupby[0] / shift_count * 100, 1) print("Facility cancel rate with less than 24h notice: " + str(rate) + "% of all shifts")

hcf_cancels_same_day = hcf_cancels[hcf_cancels['Notice'] < 1] hcf_cancels_same_day['Notice Hours'] = (hcf_cancels_same_day['Start dt'] - hcf_cancels_same_day['Created At dt']).dt.total_seconds()/(60*60) hcf_cancels_same_day['Notice Hours'] = hcf_cancels_same_day['Notice Hours'].round() hcf_cancels_same_day_groupby = hcf_cancels_same_day.groupby('Notice Hours').size() idx_less_than_6h = [x for x in list(hcf_cancels_same_day_groupby.index) if x <= 6] less_than_6h_count = sum([hcf_cancels_same_day_groupby[x] for x in idx_less_than_6h]) idx_less_than_0h = [x for x in list(hcf_cancels_same_day_groupby.index) if x <= 0] show_up_no_shift_count = sum([hcf_cancels_same_day_groupby[x] for x in idx_less_than_0h]) hcf_cancels_same_day_groupby[0] = show_up_no_shift_count for x in [i for i in idx_less_than_0h if i != 0]: del[hcf_cancels_same_day_groupby[x]] hcf_cancels_same_day_groupby.plot(figsize=(12,3), title='Facility same day cancellations by notice hours', kind='bar') less_than_6h_rate = round(less_than_6h_count / shift_count * 100, 1) show_up_no_shift_rate = round(show_up_no_shift_count / shift_count * 100, 1) print("Facility cancel rate with less than 6h notice: " + str(less_than_6h_rate) + "% of all shifts") print("Show up no shift: " + str(show_up_no_shift_rate) + "% of all shifts")

no_show_shift_ids = list(hcp_hard_cancels['Shift ID'].values) no_show_shifts = shifts_df[shifts_df['ID'].isin(no_show_shift_ids)] no_show_confirmed = no_show_shifts[no_show_shifts['Confirmed'] == True].shape[0] no_show_not_confirmed = no_show_shifts[no_show_shifts['Confirmed'] != True].shape[0] confirmed_no_show_rate = round(no_show_confirmed / shifts_df[shifts_df['Confirmed'] == True].shape[0] * 100, 1) not_confirmed_no_show_rate = round(no_show_not_confirmed / shifts_df[shifts_df['Confirmed'] != True].shape[0] * 100, 1) print("No show rate with confirmation: " + str(confirmed_no_show_rate) + "%") print("No show rate without confirmation: " + str(not_confirmed_no_show_rate) + "%")

no_show_shifts_grouped = no_show_shifts.groupby('Agent Req').size() all_shifts_grouped = shifts_df.groupby('Agent Req').size() ratio_grouped = (no_show_shifts_grouped / all_shifts_grouped * 100).fillna(0) ratio_grouped = ratio_grouped.round(1) ratio_grouped.plot(figsize=(12,3), title='No call no show by worker type (in %)', kind='bar')

all_shifts_grouped.plot(figsize=(12,3), title='All shifts by worker type', kind='bar')

no_show_shifts_grouped = no_show_shifts.groupby('Shift Type').size() all_shifts_grouped = shifts_df.groupby('Shift Type').size() ratio_grouped = (no_show_shifts_grouped / all_shifts_grouped * 100).fillna(0) ratio_grouped = ratio_grouped.round(1) ratio_grouped.plot(figsize=(12,3), title='No call no show by shift type (in %)', kind='bar')

all_shifts_grouped.plot(figsize=(12,3), title='All shifts by shift type', kind='bar')

no_show_shifts_grouped = no_show_shifts.groupby('By Facility').size() all_shifts_grouped = shifts_df.groupby('By Facility').size() ratio_grouped = (no_show_shifts_grouped / all_shifts_grouped * 100).fillna(0) ratio_grouped = ratio_grouped.round(1) ratio_grouped.plot(figsize=(12,3), title='No call no show by facility post (in %)', kind='bar')

all_shifts_grouped.plot(figsize=(12,3), title='All shifts by facility post', kind='bar')

hcp_df['Cancel Rate'] = ((hcp_df['Cancelled Shifts'] / (hcp_df['Cancelled Shifts'] + hcp_df['Completed Shifts'])) * 100).round(1) # hcp_df['Binned Cancel Rate'] = pd.cut(hcp_df['Cancel Rate'], [-0.1, 20, 40, 60, 80, 100]) share_values = [] share_index = [] for i in range(5): sub_hcp_df = hcp_df[hcp_df['Cancel Rate'] < (i+1) * 20] sub_hcp_df = sub_hcp_df[sub_hcp_df['Cancel Rate'] >= i * 20] share = round(sum(sub_hcp_df['Cancelled Shifts'].values) / sum(hcp_df['Cancelled Shifts'].values) * 100, 1) # share_arr.append([str(i) + "-" + str(i+1), share]) share_values.append(share) share_index.append("workers with " + str(i*20) + "% - " + str((i+1)*20) + "% cancellation rate") pd.DataFrame(share_values, index=share_index, columns=['% of cancelled shifts']).plot(figsize=(12,3), title='% of all cancelled shifts per worker group', kind='bar')

bins = [0, 1, 5, 10, 50, 100, 500] # hcp_df['Binned Completed Shifts'] = pd.cut(hcp_df['Completed Shifts'], bins) # hcp_df.groupby('Binned Completed Shifts').size().plot(figsize=(12,3), title='Nbr of workers per cancel rates', kind='bar') share_values = [] share_index = [] for i in range(len(bins) - 1): sub_hcp_df = hcp_df[hcp_df['Completed Shifts'] < bins[i+1]] sub_hcp_df = sub_hcp_df[sub_hcp_df['Completed Shifts'] >= bins[i]] share = round(sum(sub_hcp_df['Cancelled Shifts'].values) / sum(hcp_df['Cancelled Shifts'].values) * 100, 1) # share_arr.append([str(i) + "-" + str(i+1), share]) share_values.append(share) share_index.append("workers with " + str(bins[i]) + " - " + str(bins[i+1]) + " completed shifts") pd.DataFrame(share_values, index=share_index, columns=['% of cancelled shifts']).plot(figsize=(12,3), title='% of all cancelled shifts per worker group', kind='bar')