Labor/Machine down time relationship visualization

from random import randint import pandas as pd

# Bring the sample data generators def generateMachines(number): machines = [] for i in range(1,number+1): machines.append(f'Machine_{i}') return machines def generateDataObjects(number_machines,observations): machines = generateMachines(number_machines) observationList = [] for i in range(0,observations): machine = randint(0,number_machines-1) observation = {"id":i,"Machine":machines[machine],"Week":randint(1,52),"Labor Hours":randint(0,30),"Downtime Hours":randint(0,8)} observationList.append(observation) return observationList def generateDataFrame(number_machines,observations): data = generateDataObjects(number_machines,observations) testData = pd.DataFrame(data) return testData

# Generate sample data and save it to a pandas dataframe # In this case, 3 machines with 100 observations are generated TestData = generateDataFrame(3,100)

# Sort the test data for readability # Note that muliple records in the same week is possible. TestData.sort_values(by=['Machine','Week'])

# Also note that in some weeks when there is no event of labor hours or downtime hours, there is no record at all. # In order to calcuate moving sum accros period correctly, we need to insert records with zero values in such events. TestData[TestData['Machine']=='Machine_3'].sort_values('Week')

# The following codes check machine by machine, if there is a recrod for all the weeks from week 1 to week 52. # If not, it will insert a row of zero valus and the coresponding week number # 3 machine x 52 weeks = 156 ; there should be around 156 rows in the result def fillgaps(dataFrame): currId = dataFrame["id"].max() machine_name = list(set(dataFrame['Machine']))[0] arrayGaps = [] for week in range (1,53): flag = dataFrame[dataFrame["Week"]==week]["Week"].count() if flag == 0: line = {"id":currId+1,"Machine":machine_name,"Week":week,"Labor Hours":0,"Downtime Hours":0} arrayGaps.append(line) currId = currId +1 result = dataFrame.append(pd.DataFrame(arrayGaps)) return result machines = set(TestData["Machine"]) df=pd.DataFrame() for machine in machines: machineData = TestData[TestData['Machine']==machine] df = df.append(fillgaps(machineData)) df

# However there are 177 rows. # This is because in the original row data, there are mutilple rows in the same week # By counting the weeks of each machine and refering to the cells above, we can confrim this record is correct df.groupby('Machine').count()

# To avoid causing issue in the rolling sum calculation latter on,let's group up these rows df_g = df.groupby(['Machine','Week']).sum() df_g

# Sort according to the week number. This step is essential to the following rolling sum calculation df_sorted = df_g.sort_values(by=['Machine','Week']) # let's see all the 52 weeks of the last machine df_sorted.tail(52)

# rolling sum over periods rolling_period = 13 df_rolled = df_sorted.groupby('Machine').rolling(13).sum() df_rolled.tail(52)

# adding the ration between labor and down time df_rolled['labor/down_singleMachine']= df_rolled['Labor Hours']/df_rolled['Downtime Hours'] df_rolled.tail(52)

import seaborn as sns

# plot the ration of all the machine sns.scatterplot(x='Week',y='labor/down_singleMachine', hue='Machine',data=df_rolled) # caulucation the mean by using sum both statistics of all machines df_group=df_rolled.groupby('Week').sum() df_group["labor/down_allMachine"] = df_group['Labor Hours']/df_group['Downtime Hours'] # plot the mean as a line sns.lineplot(x='Week',y="labor/down_allMachine",data=df_group)