from random import randint
import pandas as pd
# Bring the sample data generators
def generateMachines(number):
machines = []
for i in range(1,number+1):
machines.append(f'Machine_{i}')
return machines
def generateDataObjects(number_machines,observations):
machines = generateMachines(number_machines)
observationList = []
for i in range(0,observations):
machine = randint(0,number_machines-1)
observation = {"id":i,"Machine":machines[machine],"Week":randint(1,52),"Labor Hours":randint(0,30),"Downtime Hours":randint(0,8)}
observationList.append(observation)
return observationList
def generateDataFrame(number_machines,observations):
data = generateDataObjects(number_machines,observations)
testData = pd.DataFrame(data)
return testData
# Generate sample data and save it to a pandas dataframe
# In this case, 3 machines with 100 observations are generated
TestData = generateDataFrame(3,100)
# Sort the test data for readability
# Note that muliple records in the same week is possible.
TestData.sort_values(by=['Machine','Week'])
# Also note that in some weeks when there is no event of labor hours or downtime hours, there is no record at all.
# In order to calcuate moving sum accros period correctly, we need to insert records with zero values in such events.
TestData[TestData['Machine']=='Machine_3'].sort_values('Week')
# The following codes check machine by machine, if there is a recrod for all the weeks from week 1 to week 52.
# If not, it will insert a row of zero valus and the coresponding week number
# 3 machine x 52 weeks = 156 ; there should be around 156 rows in the result
def fillgaps(dataFrame):
currId = dataFrame["id"].max()
machine_name = list(set(dataFrame['Machine']))[0]
arrayGaps = []
for week in range (1,53):
flag = dataFrame[dataFrame["Week"]==week]["Week"].count()
if flag == 0:
line = {"id":currId+1,"Machine":machine_name,"Week":week,"Labor Hours":0,"Downtime Hours":0}
arrayGaps.append(line)
currId = currId +1
result = dataFrame.append(pd.DataFrame(arrayGaps))
return result
machines = set(TestData["Machine"])
df=pd.DataFrame()
for machine in machines:
machineData = TestData[TestData['Machine']==machine]
df = df.append(fillgaps(machineData))
df
# However there are 177 rows.
# This is because in the original row data, there are mutilple rows in the same week
# By counting the weeks of each machine and refering to the cells above, we can confrim this record is correct
df.groupby('Machine').count()
# To avoid causing issue in the rolling sum calculation latter on,let's group up these rows
df_g = df.groupby(['Machine','Week']).sum()
df_g
# Sort according to the week number. This step is essential to the following rolling sum calculation
df_sorted = df_g.sort_values(by=['Machine','Week'])
# let's see all the 52 weeks of the last machine
df_sorted.tail(52)
# rolling sum over periods
rolling_period = 13
df_rolled = df_sorted.groupby('Machine').rolling(13).sum()
df_rolled.tail(52)
# adding the ration between labor and down time
df_rolled['labor/down_singleMachine']= df_rolled['Labor Hours']/df_rolled['Downtime Hours']
df_rolled.tail(52)
import seaborn as sns
# plot the ration of all the machine
sns.scatterplot(x='Week',y='labor/down_singleMachine', hue='Machine',data=df_rolled)
# caulucation the mean by using sum both statistics of all machines
df_group=df_rolled.groupby('Week').sum()
df_group["labor/down_allMachine"] = df_group['Labor Hours']/df_group['Downtime Hours']
# plot the mean as a line
sns.lineplot(x='Week',y="labor/down_allMachine",data=df_group)