Dependencies
from getdist.preprocessing import parse_tb, resample_df, drop_zero_df, get_hour_df, find_cron_hours, get_days, create_crons, get_interval_crons,get_hours_from_cron
from getdist.dl_models import LSTMAE
from getdist.plot import PrsPlots
import importlib
import getdist as gd
import pandas as pd
import matplotlib.pyplot as plt
from croniter import croniter
from datetime import datetime
from croniter import croniter_range
import warnings
warnings.filterwarnings('ignore')
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'
importlib.reload(gd.preprocessing)
importlib.reload(gd.dl_models)
importlib.reload(gd.plot)
1. Get a ZE table
# TODO (Darien)
data = pd.read_csv('data/sample_ze_data.csv')
data2 = pd.read_csv('data/sample_ze_data2.csv')
data3 = pd.read_csv('data/sample_ze_data3.csv')
data4 = pd.read_csv('data/sample_ze_data4.csv')
data5 = pd.read_csv('data/sample_ze_data5.csv')
data6 = pd.read_csv('data/sample_ze_data6.csv')
data7 = pd.read_csv('data/sample_ze_data7.csv')
IESO_data = pd.read_csv('data/table_IESO.csv', index_col= False)
GEOGRAIN_data = pd.read_csv('data/GEOGRAIN_data.csv')
PJM_G_Data = pd.read_csv('data/PJM_G.csv', index_col=False)
PJM_X_Data = pd.read_csv('data/PJM_X.csv', index_col=False)
MISO_data = pd.read_csv('data/MISO.csv', index_col=False)
PJM_A_data = pd.read_csv('data/PJM_A.csv', index_col= False)
# data.head(1)
# data2.head(1)
2. Parse the table
parsed_df = parse_tb(GEOGRAIN_data)
parsed_df.head(3)
IESO_parsed = parse_tb(IESO_data)
3. Aggregate the table
resample_df(parsed_df,'H')
4. Visualize the aggregated data
pplot = PrsPlots()
cron_g = 'S 0 0 17 * * ?'
pplot.plot_with_crons(GEOGRAIN_data, 'H', 5, cron_g, 'GEOGRAIN_CASH_BIDS', True, False)
4. Develop the Models
hours_df_g = find_cron_hours(parsed_df,0, True)
hours_df_g
days_dict_g = get_days(parsed_df, hours_df_g)
days_dict_g
crons_g = create_crons(days_dict_g)
crons_g
all_crons_g = get_interval_crons(crons_g,10)
all_crons_g
pplot.plot_interval_crons(GEOGRAIN_data, all_crons_g)
4.1 ML Model
# TODO (Nadia)
4.2 DL Model
# TODO (Frank)
# Get the aggregated dataframe
parse_geograin = parse_tb(GEOGRAIN_data)
resampled_geograin = resample_df(parse_geograin, 'H')
time_geograin = get_hour_df(drop_zero_df(resampled_geograin))
# Amount of time units (days, hours etc.) to look back on
TIME_STEPS = 10
# Creates and saves the model and scaler transformation
model = LSTMAE()
# Note: this retrains the model, and will take substantial amount of time
lstm_model, scaler = model.create_model(time_geograin, TIME_STEPS)
# Applies the trained LSTM model onto the dataframe
anomalies_df_geo = model.apply_model(time_geograin, TIME_STEPS, input_tuple = (lstm_model, scaler))
print(anomalies_df_geo)
#anomalies_df_geo.to_csv('GEO_Anomaly_data.csv')
latest_start = model.anomaly_schedule(anomalies_df_geo, TIME_STEPS)
print("Suggested time: ", anomalies_df_geo.index[latest_start], " to ", anomalies_df_geo.index[-1])
# for PJM_G_Data
# Get the aggregated dataframe
parse_PJMG = parse_tb(PJM_G_Data)
resampled_PJMG = resample_df(parse_PJMG, 'H')
time_PJMG = get_hour_df(drop_zero_df(resampled_PJMG))
# Amount of time units (days, hours etc.) to look back on
TIME_STEPS = 10
# Creates and saves the model and scaler transformation
model = LSTMAE()
# Note: this retrains the model, and will take substantial amount of time
lstm_model_pjmg, scaler_pjmg = model.create_model(time_PJMG, TIME_STEPS)
# Applies the trained LSTM model onto the dataframe
anomalies_df_PJMG = model.apply_model(time_PJMG, TIME_STEPS, input_tuple = (lstm_model_pjmg, scaler_pjmg))
anomalies_df_PJMG.to_csv('PJMG_Anomaly_data.csv')
print(anomalies_df_PJMG)
latest_start = model.anomaly_schedule(anomalies_df_PJMG, TIME_STEPS)
print("Suggested time: ", anomalies_df_PJMG.index[latest_start], " to ", anomalies_df_PJMG.index[-1])
# for PJM_X.csv
# Get the aggregated dataframe
parse_PJMX = parse_tb(PJM_X_Data)
resampled_PJMX = resample_df(parse_PJMX, 'H')
time_PJMX = get_hour_df(drop_zero_df(resampled_PJMX))
# Amount of time units (days, hours etc.) to look back on
TIME_STEPS = 10
# Creates and saves the model and scaler transformation
model = LSTMAE()
# Note: this retrains the model, and will take substantial amount of time
lstm_model_pjmx, scaler_pjmx = model.create_model(time_PJMX, TIME_STEPS)
anomalies_df_PJMX = model.apply_model(time_PJMX, TIME_STEPS, input_tuple = (lstm_model_pjmx, scaler_pjmx))
anomalies_df_PJMX.to_csv('pjmx_anomaly.csv')
print(anomalies_df_PJMX)
latest_start = model.anomaly_schedule(anomalies_df_PJMX, TIME_STEPS)
print("Suggested time: ", anomalies_df_PJMX.index[latest_start], " to ", anomalies_df_PJMX.index[-1])
# for MISO_data
# Get the aggregated dataframe
parse_MISO = parse_tb(MISO_data)
resampled_MISO = resample_df(parse_MISO, 'H')
time_MISO = get_hour_df(drop_zero_df(resampled_MISO))
# Amount of time units (days, hours etc.) to look back on
TIME_STEPS = 10
# Creates and saves the model and scaler transformation
model = LSTMAE()
# Note: this retrains the model, and will take substantial amount of time
lstm_model_MISO, scaler_MISO = model.create_model(time_MISO, TIME_STEPS)
anomalies_df_MISO = model.apply_model(time_MISO, TIME_STEPS, input_tuple = (lstm_model_MISO, scaler_MISO))
anomalies_df_MISO.to_csv('MISO_anomaly.csv')
print(anomalies_df_MISO)
latest_start = model.anomaly_schedule(anomalies_df_MISO, TIME_STEPS)
print("Suggested time: ", anomalies_df_MISO.index[latest_start], " to ", anomalies_df_MISO.index[-1])
# for PJM_A_Data
# Get the aggregated dataframe
parse_PJMG = parse_tb(PJM_G_Data)
resampled_PJMG = resample_df(parse_PJMG, 'H')
time_PJMG = get_hour_df(drop_zero_df(resampled_PJMG))
# Amount of time units (days, hours etc.) to look back on
TIME_STEPS = 10
# Creates and saves the model and scaler transformation
model = LSTMAE()
# Note: this retrains the model, and will take substantial amount of time
lstm_model_pjmg, scaler_pjmg = model.create_model(time_PJMG, TIME_STEPS)
Find Optimal Cron Schedule for the clean data interval
#print(anomalies_df_geo.index)
geo_tb = anomalies_df_geo[anomalies_df_geo.index>'2017-11-13 15:00:00']
geo_no_anomaly = geo_tb[geo_tb['anomaly']==False]
geo_no_anomaly = geo_no_anomaly.drop(['anomaly'], axis =1)
print(geo_no_anomaly)
pplot.plot_with_crons(geo_tb, 'H', 10, cron_g, 'GEOGRAIN_CASH_BIDS', show_cron= True, show_missed = False)
hours_df_g = find_cron_hours(geo_no_anomaly,0)
days_dict_g = get_days(geo_no_anomaly, hours_df_g)
crons_g = create_crons(days_dict_g)
all_crons_g = get_interval_crons(crons_g,10)
pplot.plot_interval_crons(geo_tb, all_crons_g)