import numpy as np
import pandas as pd
original_data = pd.read_csv("dataset_mood_smartphone.csv")
# select date
time = list(original_data["time"])
date = []
for d in time:
date.append(d[0:10])
original_data["time"] = date
# remove null values (202)
original_data["value"].isnull().value_counts()
original_data["value"] = original_data["value"].fillna("NA")
NA_index = original_data[(original_data.value == "NA")].index.tolist()
original_data = original_data.drop(NA_index)
original_data["value"].isnull().value_counts()
original_data["id"].isnull().value_counts()
original_data["time"].isnull().value_counts()
original_data["variable"].isnull().value_counts()
# reorder
original_data.reset_index(drop=True, inplace=True)
original_data[original_data["variable"]=="appCat.weather"]
# intercept sub-dataframe for features and target
mood_df = pd.DataFrame(columns=['id', 'time', 'variable', 'value'])
mood_df["id"] = original_data["id"][0:5641]
mood_df["time"] = original_data["time"][0:5641]
mood_df["variable"] = original_data["variable"][0:5641]
mood_df["value"] = original_data["value"][0:5641]
ca_df = pd.DataFrame(columns=['id', 'time', 'variable', 'value'])
ca_df["id"] = original_data["id"][5641:11238]
ca_df["time"] = original_data["time"][5641:11238]
ca_df["variable"] = original_data["variable"][5641:11238]
ca_df["value"] = original_data["value"][5641:11238]
cv_df = pd.DataFrame(columns=['id', 'time', 'variable', 'value'])
cv_df["id"] = original_data["id"][11238:16725]
cv_df["time"] = original_data["time"][11238:16725]
cv_df["variable"] = original_data["variable"][11238:16725]
cv_df["value"] = original_data["value"][11238:16725]
activity_df = pd.DataFrame(columns=['id', 'time', 'variable', 'value'])
activity_df["id"] = original_data["id"][16725:39690]
activity_df["time"] = original_data["time"][16725:39690]
activity_df["variable"] = original_data["variable"][16725:39690]
activity_df["value"] = original_data["value"][16725:39690]
screen_df = pd.DataFrame(columns=['id', 'time', 'variable', 'value'])
screen_df["id"] = original_data["id"][39690:136268]
screen_df["time"] = original_data["time"][39690:136268]
screen_df["variable"] = original_data["variable"][39690:136268]
screen_df["value"] = original_data["value"][39690:136268]
call_df = pd.DataFrame(columns=['id', 'time', 'variable', 'value'])
call_df["id"] = original_data["id"][136268:141507]
call_df["time"] = original_data["time"][136268:141507]
call_df["variable"] = original_data["variable"][136268:141507]
call_df["value"] = original_data["value"][136268:141507]
sms_df = pd.DataFrame(columns=['id', 'time', 'variable', 'value'])
sms_df["id"] = original_data["id"][141507:143305]
sms_df["time"] = original_data["time"][141507:143305]
sms_df["variable"] = original_data["variable"][141507:143305]
sms_df["value"] = original_data["value"][141507:143305]
appb_df = pd.DataFrame(columns=['id', 'time', 'variable', 'value'])
appb_df["id"] = original_data["id"][143305:234593]
appb_df["time"] = original_data["time"][143305:234593]
appb_df["variable"] = original_data["variable"][143305:234593]
appb_df["value"] = original_data["value"][143305:234593]
appc_df = pd.DataFrame(columns=['id', 'time', 'variable', 'value'])
appc_df["id"] = original_data["id"][234593:308869]
appc_df["time"] = original_data["time"][234593:308869]
appc_df["variable"] = original_data["variable"][234593:308869]
appc_df["value"] = original_data["value"][234593:308869]
appe_df = pd.DataFrame(columns=['id', 'time', 'variable', 'value'])
appe_df["id"] = original_data["id"][308869:335994]
appe_df["time"] = original_data["time"][308869:335994]
appe_df["variable"] = original_data["variable"][308869:335994]
appe_df["value"] = original_data["value"][308869:335994]
appf_df = pd.DataFrame(columns=['id', 'time', 'variable', 'value'])
appf_df["id"] = original_data["id"][335994:336933]
appf_df["time"] = original_data["time"][335994:336933]
appf_df["variable"] = original_data["variable"][335994:336933]
appf_df["value"] = original_data["value"][335994:336933]
appg_df = pd.DataFrame(columns=['id', 'time', 'variable', 'value'])
appg_df["id"] = original_data["id"][336933:337746]
appg_df["time"] = original_data["time"][336933:337746]
appg_df["variable"] = original_data["variable"][336933:337746]
appg_df["value"] = original_data["value"][336933:337746]
appof_df = pd.DataFrame(columns=['id', 'time', 'variable', 'value'])
appof_df["id"] = original_data["id"][337746:343388]
appof_df["time"] = original_data["time"][337746:343388]
appof_df["variable"] = original_data["variable"][337746:343388]
appof_df["value"] = original_data["value"][337746:343388]
appot_df = pd.DataFrame(columns=['id', 'time', 'variable', 'value'])
appot_df["id"] = original_data["id"][343388:351038]
appot_df["time"] = original_data["time"][343388:351038]
appot_df["variable"] = original_data["variable"][343388:351038]
appot_df["value"] = original_data["value"][343388:351038]
apps_df = pd.DataFrame(columns=['id', 'time', 'variable', 'value'])
apps_df["id"] = original_data["id"][351038:370183]
apps_df["time"] = original_data["time"][351038:370183]
apps_df["variable"] = original_data["variable"][351038:370183]
apps_df["value"] = original_data["value"][351038:370183]
appt_df = pd.DataFrame(columns=['id', 'time', 'variable', 'value'])
appt_df["id"] = original_data["id"][370183:373029]
appt_df["time"] = original_data["time"][370183:373029]
appt_df["variable"] = original_data["variable"][370183:373029]
appt_df["value"] = original_data["value"][370183:373029]
appun_df = pd.DataFrame(columns=['id', 'time', 'variable', 'value'])
appun_df["id"] = original_data["id"][373029:373968]
appun_df["time"] = original_data["time"][373029:373968]
appun_df["variable"] = original_data["variable"][373029:373968]
appun_df["value"] = original_data["value"][373029:373968]
apput_df = pd.DataFrame(columns=['id', 'time', 'variable', 'value'])
apput_df["id"] = original_data["id"][373968:376455]
apput_df["time"] = original_data["time"][373968:376455]
apput_df["variable"] = original_data["variable"][373968:376455]
apput_df["value"] = original_data["value"][373968:376455]
appw_df = pd.DataFrame(columns=['id', 'time', 'variable', 'value'])
appw_df["id"] = original_data["id"][376455:376710]
appw_df["time"] = original_data["time"][376455:376710]
appw_df["variable"] = original_data["variable"][376455:376710]
appw_df["value"] = original_data["value"][376455:376710]
# build dictionary for each sub-dataframe
mood_dict = {col:mood_df[col].tolist() for col in mood_df.columns}
ca_dict = {col:ca_df[col].tolist() for col in ca_df.columns}
cv_dict = {col:cv_df[col].tolist() for col in cv_df.columns}
activity_dict = {col:activity_df[col].tolist() for col in activity_df.columns}
screen_dict = {col:screen_df[col].tolist() for col in screen_df.columns}
call_dict = {col:call_df[col].tolist() for col in call_df.columns}
sms_dict = {col:sms_df[col].tolist() for col in sms_df.columns}
appb_dict = {col:appb_df[col].tolist() for col in appb_df.columns}
appc_dict = {col:appc_df[col].tolist() for col in appc_df.columns}
appe_dict = {col:appe_df[col].tolist() for col in appe_df.columns}
appf_dict = {col:appf_df[col].tolist() for col in appf_df.columns}
appg_dict = {col:appg_df[col].tolist() for col in appg_df.columns}
appof_dict = {col:appof_df[col].tolist() for col in appof_df.columns}
appot_dict = {col:appot_df[col].tolist() for col in appot_df.columns}
apps_dict = {col:apps_df[col].tolist() for col in apps_df.columns}
appt_dict = {col:appt_df[col].tolist() for col in appt_df.columns}
appun_dict = {col:appun_df[col].tolist() for col in appun_df.columns}
apput_dict = {col:apput_df[col].tolist() for col in apput_df.columns}
appw_dict = {col:appw_df[col].tolist() for col in appw_df.columns}
l2 = sorted(set(mood_df["id"]),key=mood_dict["id"].index)
id_count_list = []
ca_id_count_list = []
cv_id_count_list = []
activity_id_count_list = []
screen_id_count_list = []
call_id_count_list = []
sms_id_count_list = []
appb_id_count_list = []
appc_id_count_list = []
appe_id_count_list = []
appf_id_count_list = []
appg_id_count_list = []
appof_id_count_list = []
appot_id_count_list = []
apps_id_count_list = []
appt_id_count_list = []
appun_id_count_list = []
apput_id_count_list = []
appw_id_count_list = []
for ID in l2:
id_count_list.append(mood_dict["id"].count(ID))
ca_id_count_list.append(ca_dict["id"].count(ID))
cv_id_count_list.append(cv_dict["id"].count(ID))
activity_id_count_list.append(activity_dict["id"].count(ID))
screen_id_count_list.append(screen_dict["id"].count(ID))
call_id_count_list.append(call_dict["id"].count(ID))
sms_id_count_list.append(sms_dict["id"].count(ID))
appb_id_count_list.append(appb_dict["id"].count(ID))
appc_id_count_list.append(appc_dict["id"].count(ID))
appe_id_count_list.append(appe_dict["id"].count(ID))
appf_id_count_list.append(appf_dict["id"].count(ID))
appg_id_count_list.append(appg_dict["id"].count(ID))
appof_id_count_list.append(appof_dict["id"].count(ID))
appot_id_count_list.append(appot_dict["id"].count(ID))
apps_id_count_list.append(apps_dict["id"].count(ID))
appt_id_count_list.append(appt_dict["id"].count(ID))
appun_id_count_list.append(appun_dict["id"].count(ID))
apput_id_count_list.append(apput_dict["id"].count(ID))
appw_id_count_list.append(appw_dict["id"].count(ID))
# merge mood values for one day into one mean value with round() function
id_time_dict = {}
time_count_dict = {}
value_mean_dict = {}
for ind in range(len(id_count_list)):
tem_list = []
tem_list2 = []
if ind == 0:
sublist = mood_dict["time"][0:id_count_list[ind]]
value_sublist = mood_dict["value"][0:id_count_list[ind]]
else:
ind_n1 = sum(id_count_list[0:ind])
ind_n2 = sum(id_count_list[0:ind+1])
sublist = mood_dict["time"][ind_n1:ind_n2]
value_sublist = mood_dict["value"][ind_n1:ind_n2]
id_time_dict[l2[ind]] = sorted(set(sublist),key=sublist.index)
for t in id_time_dict[l2[ind]]:
num = sublist.count(t)
tem_list.append(num)
time_count_dict[l2[ind]] = tem_list
ti = time_count_dict[l2[ind]]
for y in range(len(ti)):
if y == 0:
mean_value = round(sum(value_sublist[0:ti[y]]) / ti[y])
else:
ind_n3 = sum(ti[0:y])
ind_n4 = sum(ti[0:y+1])
mean_value = round(sum(value_sublist[ind_n3:ind_n4]) / ti[y])
tem_list2.append(mean_value)
value_mean_dict[l2[ind]] = tem_list2
# build dataframe for processed mood value
mood_final_dict = {"id":[],"time":[],"mood":[]}
for ids in l2:
length = len(id_time_dict[ids])
for c in range(length):
mood_final_dict["id"].append(ids)
for ele in id_time_dict[ids]:
mood_final_dict["time"].append(ele)
for scores in value_mean_dict[ids]:
mood_final_dict["mood"].append(scores)
mood_final_df = pd.DataFrame(mood_final_dict)
# merge values for one day into one mean value with round() function
ca_id_time_dict = {}
ca_time_count_dict = {}
ca_value_mean_dict = {}
for ca_ind in range(len(ca_id_count_list)):
ca_tem_list = []
ca_tem_list2 = []
if ca_ind == 0:
sublist = ca_dict["time"][0:ca_id_count_list[ca_ind]]
value_sublist = ca_dict["value"][0:ca_id_count_list[ca_ind]]
else:
ind_n1 = sum(ca_id_count_list[0:ca_ind])
ind_n2 = sum(ca_id_count_list[0:ca_ind+1])
sublist = ca_dict["time"][ind_n1:ind_n2]
value_sublist = ca_dict["value"][ind_n1:ind_n2]
ca_id_time_dict[l2[ca_ind]] = sorted(set(sublist),key=sublist.index)
for ca_t in ca_id_time_dict[l2[ca_ind]]:
num = sublist.count(ca_t)
ca_tem_list.append(num)
ca_time_count_dict[l2[ca_ind]] = ca_tem_list
ti = ca_time_count_dict[l2[ca_ind]]
for ca_y in range(len(ti)):
if ca_y == 0:
mean_value = round(sum(value_sublist[0:ti[ca_y]]) / ti[ca_y])
else:
ind_n3 = sum(ti[0:y])
ind_n4 = sum(ti[0:y+1])
mean_value = round(sum(value_sublist[ind_n3:ind_n4]) / ti[ca_y])
ca_tem_list2.append(mean_value)
ca_value_mean_dict[l2[ca_ind]] = ca_tem_list2
ca_final_dict = {"id":[],"time":[],"circumplex.arousal":[]}
for ca_ids in l2:
length = len(ca_id_time_dict[ca_ids])
for ca_c in range(length):
ca_final_dict["id"].append(ca_ids)
for ca_ele in ca_id_time_dict[ca_ids]:
ca_final_dict["time"].append(ca_ele)
for ca_scores in ca_value_mean_dict[ca_ids]:
ca_final_dict["circumplex.arousal"].append(ca_scores)
ca_final_df = pd.DataFrame(ca_final_dict)
# merge values for one day into one mean value
cv_id_time_dict = {}
cv_time_count_dict = {}
cv_value_mean_dict = {}
for cv_ind in range(len(cv_id_count_list)):
cv_tem_list = []
cv_tem_list2 = []
if cv_ind == 0:
sublist = cv_dict["time"][0:cv_id_count_list[cv_ind]]
value_sublist = cv_dict["value"][0:cv_id_count_list[cv_ind]]
else:
ind_n1 = sum(cv_id_count_list[0:cv_ind])
ind_n2 = sum(cv_id_count_list[0:cv_ind+1])
sublist = cv_dict["time"][ind_n1:ind_n2]
value_sublist = cv_dict["value"][ind_n1:ind_n2]
cv_id_time_dict[l2[cv_ind]] = sorted(set(sublist),key=sublist.index)
for cv_t in cv_id_time_dict[l2[cv_ind]]:
num = sublist.count(cv_t)
cv_tem_list.append(num)
cv_time_count_dict[l2[cv_ind]] = cv_tem_list
ti = cv_time_count_dict[l2[cv_ind]]
for cv_y in range(len(ti)):
if cv_y == 0:
mean_value = round(sum(value_sublist[0:ti[cv_y]]) / ti[cv_y])
else:
ind_n3 = sum(ti[0:y])
ind_n4 = sum(ti[0:y+1])
mean_value = round(sum(value_sublist[ind_n3:ind_n4]) / ti[cv_y])
cv_tem_list2.append(mean_value)
cv_value_mean_dict[l2[cv_ind]] = cv_tem_list2
cv_final_dict = {"id":[],"time":[],"circumplex.valence":[]}
for cv_ids in l2:
length = len(cv_id_time_dict[cv_ids])
for cv_c in range(length):
cv_final_dict["id"].append(cv_ids)
for cv_ele in cv_id_time_dict[cv_ids]:
cv_final_dict["time"].append(cv_ele)
for cv_scores in cv_value_mean_dict[cv_ids]:
cv_final_dict["circumplex.valence"].append(cv_scores)
cv_final_df = pd.DataFrame(cv_final_dict)
# merge values for one day into one mean value
activity_id_time_dict = {}
activity_time_count_dict = {}
activity_value_mean_dict = {}
for activity_ind in range(len(activity_id_count_list)):
activity_tem_list = []
activity_tem_list2 = []
if activity_ind == 0:
sublist = activity_dict["time"][0:activity_id_count_list[activity_ind]]
value_sublist = activity_dict["value"][0:activity_id_count_list[activity_ind]]
else:
ind_n1 = sum(activity_id_count_list[0:activity_ind])
ind_n2 = sum(activity_id_count_list[0:activity_ind+1])
sublist = activity_dict["time"][ind_n1:ind_n2]
value_sublist = activity_dict["value"][ind_n1:ind_n2]
activity_id_time_dict[l2[activity_ind]] = sorted(set(sublist),key=sublist.index)
for activity_t in activity_id_time_dict[l2[activity_ind]]:
num = sublist.count(activity_t)
activity_tem_list.append(num)
activity_time_count_dict[l2[activity_ind]] = activity_tem_list
ti = activity_time_count_dict[l2[activity_ind]]
for activity_y in range(len(ti)):
if activity_y == 0:
mean_value = sum(value_sublist[0:ti[activity_y]]) / ti[activity_y]
else:
ind_n3 = sum(ti[0:activity_y])
ind_n4 = sum(ti[0:activity_y+1])
mean_value = sum(value_sublist[ind_n3:ind_n4]) / ti[activity_y]
activity_tem_list2.append(mean_value)
activity_value_mean_dict[l2[activity_ind]] = activity_tem_list2
activity_final_dict = {"id":[],"time":[],"activity":[]}
for activity_ids in l2:
length = len(activity_id_time_dict[activity_ids])
for activity_c in range(length):
activity_final_dict["id"].append(activity_ids)
for activity_ele in activity_id_time_dict[activity_ids]:
activity_final_dict["time"].append(activity_ele)
for activity_scores in activity_value_mean_dict[activity_ids]:
activity_final_dict["activity"].append(activity_scores)
activity_final_df = pd.DataFrame(activity_final_dict)
# merge values for one day into one mean value
screen_id_time_dict = {}
screen_time_count_dict = {}
screen_value_mean_dict = {}
for screen_ind in range(len(screen_id_count_list)):
screen_tem_list = []
screen_tem_list2 = []
if screen_ind == 0:
sublist = screen_dict["time"][0:screen_id_count_list[screen_ind]]
value_sublist = screen_dict["value"][0:screen_id_count_list[screen_ind]]
else:
ind_n1 = sum(screen_id_count_list[0:screen_ind])
ind_n2 = sum(screen_id_count_list[0:screen_ind+1])
sublist = screen_dict["time"][ind_n1:ind_n2]
value_sublist = screen_dict["value"][ind_n1:ind_n2]
screen_id_time_dict[l2[screen_ind]] = sorted(set(sublist),key=sublist.index)
for screen_t in screen_id_time_dict[l2[screen_ind]]:
num = sublist.count(screen_t)
screen_tem_list.append(num)
screen_time_count_dict[l2[screen_ind]] = screen_tem_list
ti = screen_time_count_dict[l2[screen_ind]]
for screen_y in range(len(ti)):
if screen_y == 0:
mean_value = sum(value_sublist[0:ti[screen_y]]) / ti[screen_y]
else:
ind_n3 = sum(ti[0:screen_y])
ind_n4 = sum(ti[0:screen_y+1])
mean_value = sum(value_sublist[ind_n3:ind_n4]) / ti[screen_y]
screen_tem_list2.append(mean_value)
screen_value_mean_dict[l2[screen_ind]] = screen_tem_list2
screen_final_dict = {"id":[],"time":[],"screen":[]}
for screen_ids in l2:
length = len(screen_id_time_dict[screen_ids])
for screen_c in range(length):
screen_final_dict["id"].append(screen_ids)
for screen_ele in screen_id_time_dict[screen_ids]:
screen_final_dict["time"].append(screen_ele)
for screen_scores in screen_value_mean_dict[screen_ids]:
screen_final_dict["screen"].append(screen_scores)
screen_final_df = pd.DataFrame(screen_final_dict)
# merge values for one day into one value represented by 1
call_id_time_dict = {}
call_time_count_dict = {}
call_value_mean_dict = {}
for call_ind in range(len(call_id_count_list)):
call_tem_list = []
call_tem_list2 = []
if call_ind == 0:
sublist = call_dict["time"][0:call_id_count_list[call_ind]]
value_sublist = call_dict["value"][0:call_id_count_list[call_ind]]
else:
ind_n1 = sum(call_id_count_list[0:call_ind])
ind_n2 = sum(call_id_count_list[0:call_ind+1])
sublist = call_dict["time"][ind_n1:ind_n2]
value_sublist = call_dict["value"][ind_n1:ind_n2]
call_id_time_dict[l2[call_ind]] = sorted(set(sublist),key=sublist.index)
for call_t in call_id_time_dict[l2[call_ind]]:
num = sublist.count(call_t)
call_tem_list.append(num)
call_time_count_dict[l2[call_ind]] = call_tem_list
ti = call_time_count_dict[l2[call_ind]]
for call_y in range(len(ti)):
#if call_y == 0:
mean_value = 1 #sum(value_sublist[0:ti[call_y]]) / ti[call_y]
#else:
#ind_n3 = sum(ti[0:screen_y])
#ind_n4 = sum(ti[0:screen_y+1])
#mean_value = 1 #sum(value_sublist[ind_n3:ind_n4]) / ti[call_y]
call_tem_list2.append(mean_value)
call_value_mean_dict[l2[call_ind]] = call_tem_list2
call_final_dict = {"id":[],"time":[],"call":[]}
for call_ids in l2:
length = len(call_id_time_dict[call_ids])
for call_c in range(length):
call_final_dict["id"].append(call_ids)
for call_ele in call_id_time_dict[call_ids]:
call_final_dict["time"].append(call_ele)
for call_scores in call_value_mean_dict[call_ids]:
call_final_dict["call"].append(call_scores)
call_final_df = pd.DataFrame(call_final_dict)
# merge values for one day into one value represented by 1
sms_id_time_dict = {}
sms_time_count_dict = {}
sms_value_mean_dict = {}
for sms_ind in range(len(sms_id_count_list)):
sms_tem_list = []
sms_tem_list2 = []
if sms_ind == 0:
sublist = sms_dict["time"][0:sms_id_count_list[sms_ind]]
value_sublist = sms_dict["value"][0:sms_id_count_list[sms_ind]]
else:
ind_n1 = sum(sms_id_count_list[0:sms_ind])
ind_n2 = sum(sms_id_count_list[0:sms_ind+1])
sublist = sms_dict["time"][ind_n1:ind_n2]
value_sublist = sms_dict["value"][ind_n1:ind_n2]
sms_id_time_dict[l2[sms_ind]] = sorted(set(sublist),key=sublist.index)
for sms_t in sms_id_time_dict[l2[sms_ind]]:
num = sublist.count(sms_t)
sms_tem_list.append(num)
sms_time_count_dict[l2[sms_ind]] = sms_tem_list
ti = sms_time_count_dict[l2[sms_ind]]
for sms_y in range(len(ti)):
#if sms_y == 0:
mean_value = 1 #sum(value_sublist[0:ti[call_y]]) / ti[call_y]
#else:
#ind_n3 = sum(ti[0:screen_y])
#ind_n4 = sum(ti[0:screen_y+1])
#mean_value = 1 #sum(value_sublist[ind_n3:ind_n4]) / ti[call_y]
sms_tem_list2.append(mean_value)
sms_value_mean_dict[l2[sms_ind]] = sms_tem_list2
sms_final_dict = {"id":[],"time":[],"sms":[]}
for sms_ids in l2:
length = len(sms_id_time_dict[sms_ids])
for sms_c in range(length):
sms_final_dict["id"].append(sms_ids)
for sms_ele in sms_id_time_dict[sms_ids]:
sms_final_dict["time"].append(sms_ele)
for sms_scores in sms_value_mean_dict[sms_ids]:
sms_final_dict["sms"].append(sms_scores)
sms_final_df = pd.DataFrame(sms_final_dict)
# merge values for one day into one mean value
appb_id_time_dict = {}
appb_time_count_dict = {}
appb_value_mean_dict = {}
for appb_ind in range(len(appb_id_count_list)):
appb_tem_list = []
appb_tem_list2 = []
if appb_ind == 0:
sublist = appb_dict["time"][0:appb_id_count_list[appb_ind]]
value_sublist = appb_dict["value"][0:appb_id_count_list[appb_ind]]
else:
ind_n1 = sum(appb_id_count_list[0:appb_ind])
ind_n2 = sum(appb_id_count_list[0:appb_ind+1])
sublist = appb_dict["time"][ind_n1:ind_n2]
value_sublist = appb_dict["value"][ind_n1:ind_n2]
appb_id_time_dict[l2[appb_ind]] = sorted(set(sublist),key=sublist.index)
for appb_t in appb_id_time_dict[l2[appb_ind]]:
num = sublist.count(appb_t)
appb_tem_list.append(num)
appb_time_count_dict[l2[appb_ind]] = appb_tem_list
ti = appb_time_count_dict[l2[appb_ind]]
for appb_y in range(len(ti)):
if appb_y == 0:
mean_value = sum(value_sublist[0:ti[appb_y]]) / ti[appb_y]
else:
ind_n3 = sum(ti[0:appb_y])
ind_n4 = sum(ti[0:appb_y+1])
mean_value = sum(value_sublist[ind_n3:ind_n4]) / ti[appb_y]
appb_tem_list2.append(mean_value)
appb_value_mean_dict[l2[appb_ind]] = appb_tem_list2
appb_final_dict = {"id":[],"time":[],"appCat.builtin":[]}
for appb_ids in l2:
length = len(appb_id_time_dict[appb_ids])
for appb_c in range(length):
appb_final_dict["id"].append(appb_ids)
for appb_ele in appb_id_time_dict[appb_ids]:
appb_final_dict["time"].append(appb_ele)
for appb_scores in appb_value_mean_dict[appb_ids]:
appb_final_dict["appCat.builtin"].append(appb_scores)
appb_final_df = pd.DataFrame(appb_final_dict)
# merge values for one day into one mean value
appc_id_time_dict = {}
appc_time_count_dict = {}
appc_value_mean_dict = {}
for appc_ind in range(len(appc_id_count_list)):
appc_tem_list = []
appc_tem_list2 = []
if appc_ind == 0:
sublist = appc_dict["time"][0:appc_id_count_list[appc_ind]]
value_sublist = appc_dict["value"][0:appc_id_count_list[appc_ind]]
else:
ind_n1 = sum(appc_id_count_list[0:appc_ind])
ind_n2 = sum(appc_id_count_list[0:appc_ind+1])
sublist = appc_dict["time"][ind_n1:ind_n2]
value_sublist = appc_dict["value"][ind_n1:ind_n2]
appc_id_time_dict[l2[appc_ind]] = sorted(set(sublist),key=sublist.index)
for appc_t in appc_id_time_dict[l2[appc_ind]]:
num = sublist.count(appc_t)
appc_tem_list.append(num)
appc_time_count_dict[l2[appc_ind]] = appc_tem_list
ti = appc_time_count_dict[l2[appc_ind]]
for appc_y in range(len(ti)):
if appc_y == 0:
mean_value = sum(value_sublist[0:ti[appc_y]]) / ti[appc_y]
else:
ind_n3 = sum(ti[0:appb_y])
ind_n4 = sum(ti[0:appb_y+1])
mean_value = sum(value_sublist[ind_n3:ind_n4]) / ti[appc_y]
appc_tem_list2.append(mean_value)
appc_value_mean_dict[l2[appc_ind]] = appc_tem_list2
appc_final_dict = {"id":[],"time":[],"appCat.communication":[]}
for appc_ids in l2:
length = len(appc_id_time_dict[appc_ids])
for appc_c in range(length):
appc_final_dict["id"].append(appc_ids)
for appc_ele in appc_id_time_dict[appc_ids]:
appc_final_dict["time"].append(appc_ele)
for appc_scores in appc_value_mean_dict[appc_ids]:
appc_final_dict["appCat.communication"].append(appc_scores)
appc_final_df = pd.DataFrame(appc_final_dict)
# merge values for one day into one mean value
appe_id_time_dict = {}
appe_time_count_dict = {}
appe_value_mean_dict = {}
for appe_ind in range(len(appe_id_count_list)):
appe_tem_list = []
appe_tem_list2 = []
if appe_ind == 0:
sublist = appe_dict["time"][0:appe_id_count_list[appe_ind]]
value_sublist = appe_dict["value"][0:appe_id_count_list[appe_ind]]
else:
ind_n1 = sum(appe_id_count_list[0:appe_ind])
ind_n2 = sum(appe_id_count_list[0:appe_ind+1])
sublist = appe_dict["time"][ind_n1:ind_n2]
value_sublist = appe_dict["value"][ind_n1:ind_n2]
appe_id_time_dict[l2[appe_ind]] = sorted(set(sublist),key=sublist.index)
for appe_t in appe_id_time_dict[l2[appe_ind]]:
num = sublist.count(appe_t)
appe_tem_list.append(num)
appe_time_count_dict[l2[appe_ind]] = appe_tem_list
ti = appe_time_count_dict[l2[appe_ind]]
for appe_y in range(len(ti)):
if appe_y == 0:
mean_value = sum(value_sublist[0:ti[appe_y]]) / ti[appe_y]
else:
ind_n3 = sum(ti[0:appe_y])
ind_n4 = sum(ti[0:appe_y+1])
mean_value = sum(value_sublist[ind_n3:ind_n4]) / ti[appe_y]
appe_tem_list2.append(mean_value)
appe_value_mean_dict[l2[appe_ind]] = appe_tem_list2
appe_final_dict = {"id":[],"time":[],"appCat.entertainment":[]}
for appe_ids in l2:
length = len(appe_id_time_dict[appe_ids])
for appe_c in range(length):
appe_final_dict["id"].append(appe_ids)
for appe_ele in appe_id_time_dict[appe_ids]:
appe_final_dict["time"].append(appe_ele)
for appe_scores in appe_value_mean_dict[appe_ids]:
appe_final_dict["appCat.entertainment"].append(appe_scores)
appe_final_df = pd.DataFrame(appe_final_dict)
# merge values for one day into one mean value
appf_id_time_dict = {}
appf_time_count_dict = {}
appf_value_mean_dict = {}
for appf_ind in range(len(appf_id_count_list)):
appf_tem_list = []
appf_tem_list2 = []
if appf_ind == 0:
sublist = appf_dict["time"][0:appf_id_count_list[appf_ind]]
value_sublist = appf_dict["value"][0:appf_id_count_list[appf_ind]]
else:
ind_n1 = sum(appf_id_count_list[0:appf_ind])
ind_n2 = sum(appf_id_count_list[0:appf_ind+1])
sublist = appf_dict["time"][ind_n1:ind_n2]
value_sublist = appf_dict["value"][ind_n1:ind_n2]
appf_id_time_dict[l2[appf_ind]] = sorted(set(sublist),key=sublist.index)
for appf_t in appf_id_time_dict[l2[appf_ind]]:
num = sublist.count(appf_t)
appf_tem_list.append(num)
appf_time_count_dict[l2[appf_ind]] = appf_tem_list
ti = appf_time_count_dict[l2[appf_ind]]
for appf_y in range(len(ti)):
if appf_y == 0:
mean_value = sum(value_sublist[0:ti[appf_y]]) / ti[appf_y]
else:
ind_n3 = sum(ti[0:appf_y])
ind_n4 = sum(ti[0:appf_y+1])
mean_value = sum(value_sublist[ind_n3:ind_n4]) / ti[appf_y]
appf_tem_list2.append(mean_value)
appf_value_mean_dict[l2[appf_ind]] = appf_tem_list2
appf_final_dict = {"id":[],"time":[],"appCat.finance":[]}
for appf_ids in l2:
length = len(appf_id_time_dict[appf_ids])
for appf_c in range(length):
appf_final_dict["id"].append(appf_ids)
for appf_ele in appf_id_time_dict[appf_ids]:
appf_final_dict["time"].append(appf_ele)
for appf_scores in appf_value_mean_dict[appf_ids]:
appf_final_dict["appCat.finance"].append(appf_scores)
appf_final_df = pd.DataFrame(appf_final_dict)
# merge values for one day into one mean value
appg_id_time_dict = {}
appg_time_count_dict = {}
appg_value_mean_dict = {}
for appg_ind in range(len(appg_id_count_list)):
appg_tem_list = []
appg_tem_list2 = []
if appg_ind == 0:
sublist = appg_dict["time"][0:appg_id_count_list[appg_ind]]
value_sublist = appg_dict["value"][0:appg_id_count_list[appg_ind]]
else:
ind_n1 = sum(appg_id_count_list[0:appg_ind])
ind_n2 = sum(appg_id_count_list[0:appg_ind+1])
sublist = appg_dict["time"][ind_n1:ind_n2]
value_sublist = appg_dict["value"][ind_n1:ind_n2]
appg_id_time_dict[l2[appg_ind]] = sorted(set(sublist),key=sublist.index)
for appg_t in appg_id_time_dict[l2[appg_ind]]:
num = sublist.count(appg_t)
appg_tem_list.append(num)
appg_time_count_dict[l2[appg_ind]] = appg_tem_list
ti = appg_time_count_dict[l2[appg_ind]]
for appg_y in range(len(ti)):
if appg_y == 0:
mean_value = sum(value_sublist[0:ti[appg_y]]) / ti[appg_y]
else:
ind_n3 = sum(ti[0:appg_y])
ind_n4 = sum(ti[0:appg_y+1])
mean_value = sum(value_sublist[ind_n3:ind_n4]) / ti[appg_y]
appg_tem_list2.append(mean_value)
appg_value_mean_dict[l2[appg_ind]] = appg_tem_list2
appg_final_dict = {"id":[],"time":[],"appCat.game":[]}
for appg_ids in l2:
length = len(appg_id_time_dict[appg_ids])
for appg_c in range(length):
appg_final_dict["id"].append(appg_ids)
for appg_ele in appg_id_time_dict[appg_ids]:
appg_final_dict["time"].append(appg_ele)
for appg_scores in appg_value_mean_dict[appg_ids]:
appg_final_dict["appCat.game"].append(appg_scores)
appg_final_df = pd.DataFrame(appg_final_dict)
# merge values for one day into one mean value
appof_id_time_dict = {}
appof_time_count_dict = {}
appof_value_mean_dict = {}
for appof_ind in range(len(appof_id_count_list)):
appof_tem_list = []
appof_tem_list2 = []
if appof_ind == 0:
sublist = appof_dict["time"][0:appof_id_count_list[appof_ind]]
value_sublist = appof_dict["value"][0:appof_id_count_list[appof_ind]]
else:
ind_n1 = sum(appof_id_count_list[0:appof_ind])
ind_n2 = sum(appof_id_count_list[0:appof_ind+1])
sublist = appof_dict["time"][ind_n1:ind_n2]
value_sublist = appof_dict["value"][ind_n1:ind_n2]
appof_id_time_dict[l2[appof_ind]] = sorted(set(sublist),key=sublist.index)
for appof_t in appof_id_time_dict[l2[appof_ind]]:
num = sublist.count(appof_t)
appof_tem_list.append(num)
appof_time_count_dict[l2[appof_ind]] = appof_tem_list
ti = appof_time_count_dict[l2[appof_ind]]
for appof_y in range(len(ti)):
if appof_y == 0:
mean_value = sum(value_sublist[0:ti[appof_y]]) / ti[appof_y]
else:
ind_n3 = sum(ti[0:appof_y])
ind_n4 = sum(ti[0:appof_y+1])
mean_value = sum(value_sublist[ind_n3:ind_n4]) / ti[appof_y]
appof_tem_list2.append(mean_value)
appof_value_mean_dict[l2[appof_ind]] = appof_tem_list2
appof_final_dict = {"id":[],"time":[],"appCat.office":[]}
for appof_ids in l2:
length = len(appof_id_time_dict[appof_ids])
for appof_c in range(length):
appof_final_dict["id"].append(appof_ids)
for appof_ele in appof_id_time_dict[appof_ids]:
appof_final_dict["time"].append(appof_ele)
for appof_scores in appof_value_mean_dict[appof_ids]:
appof_final_dict["appCat.office"].append(appof_scores)
appof_final_df = pd.DataFrame(appof_final_dict)
# merge values for one day into one mean value
appot_id_time_dict = {}
appot_time_count_dict = {}
appot_value_mean_dict = {}
for appot_ind in range(len(appot_id_count_list)):
appot_tem_list = []
appot_tem_list2 = []
if appot_ind == 0:
sublist = appot_dict["time"][0:appot_id_count_list[appot_ind]]
value_sublist = appot_dict["value"][0:appot_id_count_list[appot_ind]]
else:
ind_n1 = sum(appot_id_count_list[0:appot_ind])
ind_n2 = sum(appot_id_count_list[0:appot_ind+1])
sublist = appot_dict["time"][ind_n1:ind_n2]
value_sublist = appot_dict["value"][ind_n1:ind_n2]
appot_id_time_dict[l2[appot_ind]] = sorted(set(sublist),key=sublist.index)
for appot_t in appot_id_time_dict[l2[appot_ind]]:
num = sublist.count(appot_t)
appot_tem_list.append(num)
appot_time_count_dict[l2[appot_ind]] = appot_tem_list
ti = appot_time_count_dict[l2[appot_ind]]
for appot_y in range(len(ti)):
if appot_y == 0:
mean_value = sum(value_sublist[0:ti[appot_y]]) / ti[appot_y]
else:
ind_n3 = sum(ti[0:appot_y])
ind_n4 = sum(ti[0:appot_y+1])
mean_value = sum(value_sublist[ind_n3:ind_n4]) / ti[appot_y]
appot_tem_list2.append(mean_value)
appot_value_mean_dict[l2[appot_ind]] = appot_tem_list2
appot_final_dict = {"id":[],"time":[],"appCat.other":[]}
for appot_ids in l2:
length = len(appot_id_time_dict[appot_ids])
for appot_c in range(length):
appot_final_dict["id"].append(appot_ids)
for appot_ele in appot_id_time_dict[appot_ids]:
appot_final_dict["time"].append(appot_ele)
for appot_scores in appot_value_mean_dict[appot_ids]:
appot_final_dict["appCat.other"].append(appot_scores)
appot_final_df = pd.DataFrame(appot_final_dict)
# merge values for one day into one mean value
apps_id_time_dict = {}
apps_time_count_dict = {}
apps_value_mean_dict = {}
for apps_ind in range(len(apps_id_count_list)):
apps_tem_list = []
apps_tem_list2 = []
if apps_ind == 0:
sublist = apps_dict["time"][0:apps_id_count_list[apps_ind]]
value_sublist = apps_dict["value"][0:apps_id_count_list[apps_ind]]
else:
ind_n1 = sum(apps_id_count_list[0:apps_ind])
ind_n2 = sum(apps_id_count_list[0:apps_ind+1])
sublist = apps_dict["time"][ind_n1:ind_n2]
value_sublist = apps_dict["value"][ind_n1:ind_n2]
apps_id_time_dict[l2[apps_ind]] = sorted(set(sublist),key=sublist.index)
for apps_t in apps_id_time_dict[l2[apps_ind]]:
num = sublist.count(apps_t)
apps_tem_list.append(num)
apps_time_count_dict[l2[apps_ind]] = apps_tem_list
ti = apps_time_count_dict[l2[apps_ind]]
for apps_y in range(len(ti)):
if apps_y == 0:
mean_value = sum(value_sublist[0:ti[apps_y]]) / ti[apps_y]
else:
ind_n3 = sum(ti[0:apps_y])
ind_n4 = sum(ti[0:apps_y+1])
mean_value = sum(value_sublist[ind_n3:ind_n4]) / ti[apps_y]
apps_tem_list2.append(mean_value)
apps_value_mean_dict[l2[apps_ind]] = apps_tem_list2
apps_final_dict = {"id":[],"time":[],"appCat.social":[]}
for apps_ids in l2:
length = len(apps_id_time_dict[apps_ids])
for apps_c in range(length):
apps_final_dict["id"].append(apps_ids)
for apps_ele in apps_id_time_dict[apps_ids]:
apps_final_dict["time"].append(apps_ele)
for apps_scores in apps_value_mean_dict[apps_ids]:
apps_final_dict["appCat.social"].append(apps_scores)
apps_final_df = pd.DataFrame(apps_final_dict)
# merge values for one day into one mean value
appt_id_time_dict = {}
appt_time_count_dict = {}
appt_value_mean_dict = {}
for appt_ind in range(len(appt_id_count_list)):
appt_tem_list = []
appt_tem_list2 = []
if appt_ind == 0:
sublist = appt_dict["time"][0:appt_id_count_list[appt_ind]]
value_sublist = appt_dict["value"][0:appt_id_count_list[appt_ind]]
else:
ind_n1 = sum(appt_id_count_list[0:appt_ind])
ind_n2 = sum(appt_id_count_list[0:appt_ind+1])
sublist = appt_dict["time"][ind_n1:ind_n2]
value_sublist = appt_dict["value"][ind_n1:ind_n2]
appt_id_time_dict[l2[appt_ind]] = sorted(set(sublist),key=sublist.index)
for appt_t in appt_id_time_dict[l2[appt_ind]]:
num = sublist.count(appt_t)
appt_tem_list.append(num)
appt_time_count_dict[l2[appt_ind]] = appt_tem_list
ti = appt_time_count_dict[l2[appt_ind]]
for appt_y in range(len(ti)):
if appt_y == 0:
mean_value = sum(value_sublist[0:ti[appt_y]]) / ti[appt_y]
else:
ind_n3 = sum(ti[0:appt_y])
ind_n4 = sum(ti[0:appt_y+1])
mean_value = sum(value_sublist[ind_n3:ind_n4]) / ti[appt_y]
appt_tem_list2.append(mean_value)
appt_value_mean_dict[l2[appt_ind]] = appt_tem_list2
appt_final_dict = {"id":[],"time":[],"appCat.travel":[]}
for appt_ids in l2:
length = len(appt_id_time_dict[appt_ids])
for appt_c in range(length):
appt_final_dict["id"].append(appt_ids)
for appt_ele in appt_id_time_dict[appt_ids]:
appt_final_dict["time"].append(appt_ele)
for appt_scores in appt_value_mean_dict[appt_ids]:
appt_final_dict["appCat.travel"].append(appt_scores)
appt_final_df = pd.DataFrame(appt_final_dict)
# merge values for one day into one mean value
appun_id_time_dict = {}
appun_time_count_dict = {}
appun_value_mean_dict = {}
for appun_ind in range(len(appun_id_count_list)):
appun_tem_list = []
appun_tem_list2 = []
if appun_ind == 0:
sublist = appun_dict["time"][0:appun_id_count_list[appun_ind]]
value_sublist = appun_dict["value"][0:appun_id_count_list[appun_ind]]
else:
ind_n1 = sum(appun_id_count_list[0:appun_ind])
ind_n2 = sum(appun_id_count_list[0:appun_ind+1])
sublist = appun_dict["time"][ind_n1:ind_n2]
value_sublist = appun_dict["value"][ind_n1:ind_n2]
appun_id_time_dict[l2[appun_ind]] = sorted(set(sublist),key=sublist.index)
for appun_t in appun_id_time_dict[l2[appun_ind]]:
num = sublist.count(appun_t)
appun_tem_list.append(num)
appun_time_count_dict[l2[appun_ind]] = appun_tem_list
ti = appun_time_count_dict[l2[appun_ind]]
for appun_y in range(len(ti)):
if appun_y == 0:
mean_value = sum(value_sublist[0:ti[appun_y]]) / ti[appun_y]
else:
ind_n3 = sum(ti[0:appun_y])
ind_n4 = sum(ti[0:appun_y+1])
mean_value = sum(value_sublist[ind_n3:ind_n4]) / ti[appun_y]
appun_tem_list2.append(mean_value)
appun_value_mean_dict[l2[appun_ind]] = appun_tem_list2
appun_final_dict = {"id":[],"time":[],"appCat.unknown":[]}
for appun_ids in l2:
length = len(appun_id_time_dict[appun_ids])
for appun_c in range(length):
appun_final_dict["id"].append(appun_ids)
for appun_ele in appun_id_time_dict[appun_ids]:
appun_final_dict["time"].append(appun_ele)
for appun_scores in appun_value_mean_dict[appun_ids]:
appun_final_dict["appCat.unknown"].append(appun_scores)
appun_final_df = pd.DataFrame(appun_final_dict)
# merge values for one day into one mean value
apput_id_time_dict = {}
apput_time_count_dict = {}
apput_value_mean_dict = {}
for apput_ind in range(len(apput_id_count_list)):
apput_tem_list = []
apput_tem_list2 = []
if apput_ind == 0:
sublist = apput_dict["time"][0:apput_id_count_list[apput_ind]]
value_sublist = apput_dict["value"][0:apput_id_count_list[apput_ind]]
else:
ind_n1 = sum(apput_id_count_list[0:apput_ind])
ind_n2 = sum(apput_id_count_list[0:apput_ind+1])
sublist = apput_dict["time"][ind_n1:ind_n2]
value_sublist = apput_dict["value"][ind_n1:ind_n2]
apput_id_time_dict[l2[apput_ind]] = sorted(set(sublist),key=sublist.index)
for apput_t in apput_id_time_dict[l2[apput_ind]]:
num = sublist.count(apput_t)
apput_tem_list.append(num)
apput_time_count_dict[l2[apput_ind]] = apput_tem_list
ti = apput_time_count_dict[l2[apput_ind]]
for apput_y in range(len(ti)):
if apput_y == 0:
mean_value = sum(value_sublist[0:ti[apput_y]]) / ti[apput_y]
else:
ind_n3 = sum(ti[0:apput_y])
ind_n4 = sum(ti[0:apput_y+1])
mean_value = sum(value_sublist[ind_n3:ind_n4]) / ti[apput_y]
apput_tem_list2.append(mean_value)
apput_value_mean_dict[l2[apput_ind]] = apput_tem_list2
apput_final_dict = {"id":[],"time":[],"appCat.utilities":[]}
for apput_ids in l2:
length = len(apput_id_time_dict[apput_ids])
for apput_c in range(length):
apput_final_dict["id"].append(apput_ids)
for apput_ele in apput_id_time_dict[apput_ids]:
apput_final_dict["time"].append(apput_ele)
for apput_scores in apput_value_mean_dict[apput_ids]:
apput_final_dict["appCat.utilities"].append(apput_scores)
apput_final_df = pd.DataFrame(apput_final_dict)
# merge values for one day into one mean value
appw_id_time_dict = {}
appw_time_count_dict = {}
appw_value_mean_dict = {}
for appw_ind in range(len(appw_id_count_list)):
appw_tem_list = []
appw_tem_list2 = []
if appw_ind == 0:
sublist = appw_dict["time"][0:appw_id_count_list[appw_ind]]
value_sublist = appw_dict["value"][0:appw_id_count_list[appw_ind]]
else:
ind_n1 = sum(appw_id_count_list[0:appw_ind])
ind_n2 = sum(appw_id_count_list[0:appw_ind+1])
sublist = appw_dict["time"][ind_n1:ind_n2]
value_sublist = appw_dict["value"][ind_n1:ind_n2]
appw_id_time_dict[l2[appw_ind]] = sorted(set(sublist),key=sublist.index)
for appw_t in appw_id_time_dict[l2[appw_ind]]:
num = sublist.count(appw_t)
appw_tem_list.append(num)
appw_time_count_dict[l2[appw_ind]] = appw_tem_list
ti = appw_time_count_dict[l2[appw_ind]]
for appw_y in range(len(ti)):
if appw_y == 0:
mean_value = sum(value_sublist[0:ti[appw_y]]) / ti[appw_y]
else:
ind_n3 = sum(ti[0:appw_y])
ind_n4 = sum(ti[0:appw_y+1])
mean_value = sum(value_sublist[ind_n3:ind_n4]) / ti[appw_y]
appw_tem_list2.append(mean_value)
appw_value_mean_dict[l2[appw_ind]] = appw_tem_list2
appw_final_dict = {"id":[],"time":[],"appCat.weather":[]}
for appw_ids in l2:
length = len(appw_id_time_dict[appw_ids])
for appw_c in range(length):
appw_final_dict["id"].append(appw_ids)
for appw_ele in appw_id_time_dict[appw_ids]:
appw_final_dict["time"].append(appw_ele)
for appw_scores in appw_value_mean_dict[appw_ids]:
appw_final_dict["appCat.weather"].append(appw_scores)
appw_final_df = pd.DataFrame(appw_final_dict)
size1 = len(mood_final_df["id"])
size2 = len(ca_final_df["id"])
size3 = len(cv_final_df["id"])
size4 = len(activity_final_df["id"])
size5 = len(screen_final_df["id"])
size6 = len(call_final_df["id"])
size7 = len(sms_final_df["id"])
size8 = len(appb_final_df["id"])
size9 = len(appc_final_df["id"])
size10 = len(appe_final_df["id"])
size11 = len(appf_final_df["id"])
size12 = len(appg_final_df["id"])
size13 = len(appof_final_df["id"])
size14 = len(appot_final_df["id"])
size15 = len(apps_final_df["id"])
size16 = len(appt_final_df["id"])
size17 = len(appun_final_df["id"])
size18 = len(apput_final_df["id"])
size19 = len(appw_final_df["id"])
df_size_dict = {"mood":size1,"ca":size2,"cv":size3,"activity":size4,"screen":size5,"call":size6,"sms":size7,"appb":size8,"appc":size9,"appe":size10,
"appf":size11,"appg":size12,"appof":size13,"appot":size14,"apps":size15,"appt":size16,"appun":size17,"apput":size18,"appw":size19}
df_size_dict
merged_dict = {"id":[],"time":[],"ca":[],"cv":[],"activity":[],"screen":[],"call":[],"sms":[],"appb":[],"appc":[],"appe":[],"appf":[],"appg":[],"appof":[],"appot":[],"apps":[],"appt":[],"appun":[],"apput":[],"appw":[],"mood":[]}
# This cell and the next cell are for merging the whole features and target together
temp_mood_dict = {}
for instance in range(len(mood_final_dict["id"])):
temstr = mood_final_dict["id"][instance] +" " + mood_final_dict["time"][instance]
temp_mood_dict[temstr] = mood_final_dict["mood"][instance]
temp_ca_dict = {}
for instance in range(len(ca_final_dict["id"])):
temstr = ca_final_dict["id"][instance] +" " + ca_final_dict["time"][instance]
temp_ca_dict[temstr] = ca_final_dict["circumplex.arousal"][instance]
temp_cv_dict = {}
for instance in range(len(cv_final_dict["id"])):
temstr = cv_final_dict["id"][instance] +" " + cv_final_dict["time"][instance]
temp_cv_dict[temstr] = cv_final_dict["circumplex.valence"][instance]
temp_activity_dict = {}
for instance in range(len(activity_final_dict["id"])):
temstr = activity_final_dict["id"][instance] +" " + activity_final_dict["time"][instance]
temp_activity_dict[temstr] = activity_final_dict["activity"][instance]
temp_screen_dict = {}
for instance in range(len(screen_final_dict["id"])):
temstr = screen_final_dict["id"][instance] +" " + screen_final_dict["time"][instance]
temp_screen_dict[temstr] = screen_final_dict["screen"][instance]
temp_call_dict = {}
for instance in range(len(call_final_dict["id"])):
temstr = call_final_dict["id"][instance] +" " + call_final_dict["time"][instance]
temp_call_dict[temstr] = call_final_dict["call"][instance]
temp_sms_dict = {}
for instance in range(len(sms_final_dict["id"])):
temstr = sms_final_dict["id"][instance] +" " + sms_final_dict["time"][instance]
temp_sms_dict[temstr] = sms_final_dict["sms"][instance]
temp_appb_dict = {}
for instance in range(len(appb_final_dict["id"])):
temstr = appb_final_dict["id"][instance] +" " + appb_final_dict["time"][instance]
temp_appb_dict[temstr] = appb_final_dict["appCat.builtin"][instance]
temp_appc_dict = {}
for instance in range(len(appc_final_dict["id"])):
temstr = appc_final_dict["id"][instance] +" " + appc_final_dict["time"][instance]
temp_appc_dict[temstr] = appc_final_dict["appCat.communication"][instance]
temp_appe_dict = {}
for instance in range(len(appe_final_dict["id"])):
temstr = appe_final_dict["id"][instance] +" " + appe_final_dict["time"][instance]
temp_appe_dict[temstr] = appe_final_dict["appCat.entertainment"][instance]
temp_appf_dict = {}
for instance in range(len(appf_final_dict["id"])):
temstr = appf_final_dict["id"][instance] +" " + appf_final_dict["time"][instance]
temp_appf_dict[temstr] = appf_final_dict["appCat.finance"][instance]
temp_appg_dict = {}
for instance in range(len(appg_final_dict["id"])):
temstr = appg_final_dict["id"][instance] +" " + appg_final_dict["time"][instance]
temp_appg_dict[temstr] = appg_final_dict["appCat.game"][instance]
temp_appof_dict = {}
for instance in range(len(appof_final_dict["id"])):
temstr = appof_final_dict["id"][instance] +" " + appof_final_dict["time"][instance]
temp_appof_dict[temstr] = appof_final_dict["appCat.office"][instance]
temp_appot_dict = {}
for instance in range(len(appot_final_dict["id"])):
temstr = appot_final_dict["id"][instance] +" " + appot_final_dict["time"][instance]
temp_appot_dict[temstr] = appot_final_dict["appCat.other"][instance]
temp_apps_dict = {}
for instance in range(len(apps_final_dict["id"])):
temstr = apps_final_dict["id"][instance] +" " + apps_final_dict["time"][instance]
temp_apps_dict[temstr] = apps_final_dict["appCat.social"][instance]
temp_appt_dict = {}
for instance in range(len(appt_final_dict["id"])):
temstr = appt_final_dict["id"][instance] +" " + appt_final_dict["time"][instance]
temp_appt_dict[temstr] = appt_final_dict["appCat.travel"][instance]
temp_appun_dict = {}
for instance in range(len(appun_final_dict["id"])):
temstr = appun_final_dict["id"][instance] +" " + appun_final_dict["time"][instance]
temp_appun_dict[temstr] = appun_final_dict["appCat.unknown"][instance]
temp_apput_dict = {}
for instance in range(len(apput_final_dict["id"])):
temstr = apput_final_dict["id"][instance] +" " + apput_final_dict["time"][instance]
temp_apput_dict[temstr] = apput_final_dict["appCat.utilities"][instance]
temp_appw_dict = {}
for instance in range(len(appw_final_dict["id"])):
temstr = appw_final_dict["id"][instance] +" " + appw_final_dict["time"][instance]
temp_appw_dict[temstr] = appw_final_dict["appCat.weather"][instance]
for s1 in temp_mood_dict:
merged_dict["id"].append(s1[0:7])
merged_dict["time"].append(s1[8:18])
merged_dict["mood"].append(temp_mood_dict[s1])
if s1 in temp_ca_dict:
merged_dict["ca"].append(temp_ca_dict[s1])
else:
merged_dict["ca"].append("NA")
for s2 in temp_mood_dict:
if s2 in temp_cv_dict:
merged_dict["cv"].append(temp_cv_dict[s2])
else:
merged_dict["cv"].append("NA")
for s3 in temp_mood_dict:
if s3 in temp_activity_dict:
merged_dict["activity"].append(temp_activity_dict[s3])
else:
merged_dict["activity"].append("NA")
for s4 in temp_mood_dict:
if s4 in temp_screen_dict:
merged_dict["screen"].append(temp_screen_dict[s4])
else:
merged_dict["screen"].append("NA")
for s5 in temp_mood_dict:
if s5 in temp_call_dict:
merged_dict["call"].append(temp_call_dict[s5])
else:
merged_dict["call"].append("NA")
for s6 in temp_mood_dict:
if s6 in temp_sms_dict:
merged_dict["sms"].append(temp_sms_dict[s6])
else:
merged_dict["sms"].append("NA")
for s7 in temp_mood_dict:
if s7 in temp_appb_dict:
merged_dict["appb"].append(temp_appb_dict[s7])
else:
merged_dict["appb"].append("NA")
for s8 in temp_mood_dict:
if s8 in temp_appc_dict:
merged_dict["appc"].append(temp_appc_dict[s8])
else:
merged_dict["appc"].append("NA")
for s9 in temp_mood_dict:
if s9 in temp_appe_dict:
merged_dict["appe"].append(temp_appe_dict[s9])
else:
merged_dict["appe"].append("NA")
for s10 in temp_mood_dict:
if s10 in temp_appf_dict:
merged_dict["appf"].append(temp_appf_dict[s10])
else:
merged_dict["appf"].append("NA")
for s11 in temp_mood_dict:
if s11 in temp_appg_dict:
merged_dict["appg"].append(temp_appg_dict[s11])
else:
merged_dict["appg"].append("NA")
for s12 in temp_mood_dict:
if s12 in temp_appof_dict:
merged_dict["appof"].append(temp_appof_dict[s12])
else:
merged_dict["appof"].append("NA")
for s13 in temp_mood_dict:
if s13 in temp_appot_dict:
merged_dict["appot"].append(temp_appot_dict[s13])
else:
merged_dict["appot"].append("NA")
for s14 in temp_mood_dict:
if s14 in temp_apps_dict:
merged_dict["apps"].append(temp_apps_dict[s14])
else:
merged_dict["apps"].append("NA")
for s15 in temp_mood_dict:
if s15 in temp_appt_dict:
merged_dict["appt"].append(temp_appt_dict[s15])
else:
merged_dict["appt"].append("NA")
for s16 in temp_mood_dict:
if s16 in temp_appun_dict:
merged_dict["appun"].append(temp_appun_dict[s16])
else:
merged_dict["appun"].append("NA")
for s17 in temp_mood_dict:
if s17 in temp_apput_dict:
merged_dict["apput"].append(temp_apput_dict[s17])
else:
merged_dict["apput"].append("NA")
for s18 in temp_mood_dict:
if s18 in temp_appw_dict:
merged_dict["appw"].append(temp_appw_dict[s18])
else:
merged_dict["appw"].append("NA")
merged_final_df = pd.DataFrame(merged_dict)
# output .csv file
outputpath='/Users/liusifan/Desktop/processed.csv'
merged_final_df.to_csv(outputpath,sep=',',index=False,header=False)
#counts = np.bincount(value_sublist[0:ti[activity_y]])
#mode_value = np.argmax(counts)
import matplotlib.pyplot as plt
mood_distribution = merged_final_df['mood']
x1 = mood_distribution.to_numpy()
plt.hist(x1, bins=10)
plt.title('Histogram mood value of dataset')
plt.ylabel('Count')
plt.xlabel('mood value')
plt.xlim(0, 10)
plt.show()
ca_distribution = merged_final_df['ca']
x2 = ca_distribution.to_numpy()
plt.hist(x2, bins=5)
plt.title('Histogram mood value of dataset')
plt.ylabel('Count')
plt.xlabel('ca value')
plt.xlim(-2, 2)
plt.show()
cv_distribution = cv_final_df['circumplex.valence']
x3 = cv_distribution.to_numpy()
plt.hist(x3, bins=5)
plt.title('Histogram mood value of dataset')
plt.ylabel('Count')
plt.xlabel('cv value')
plt.xlim(-2, 2)
plt.show()