turbine_telemetry = pd.read_csv("data/turbine_telemetry.csv" , parse_dates=['Timestamp'])
# Removes missing values from the data
turbine_telemetry_ = turbine_telemetry.set_index('Timestamp')
turbine_telemetry_ = turbine_telemetry_.loc['2016-01-01':'2018-01-01']
turbine_telemetry_ = turbine_telemetry_.dropna()
# Removes missing values from the data
turbine_telemetry_cur = turbine_telemetry.set_index('Timestamp')
turbine_telemetry_cur = turbine_telemetry_cur.loc['2016-01-01':'2018-01-01']
turbine_telemetry_cur = turbine_telemetry_cur.dropna()
# Power seasonality analysis (monthly averages) - DATA
telemetry_avg = telemetry.resample('M').mean()
# Power seasonality analysis (monthly averages) - VISUALISATION
telemetry_avg.plot(y='Power_kw', use_index=True, figsize = (10,5))
plt.xlabel("")
plt.ylabel("Power (in kw)")
plt.title('Power Seasonality: monthly averages')
plt.show()
# Studies cases when the power (kw) record exceeds the setpoint record (kw)
is_power_bigger = turbine_telemetry['Power_kw'] > turbine_telemetry['Setpoint_kw']
telemetry_filter = turbine_telemetry[is_power_bigger]
print('The total number of instances in which power (kw) surpasses the setpoint (kw) is:',len(telemetry_filter))
# Studies on average how much power (kw) exceeds the set point (kw) - for the relevant cases
telemetry_filter['Extra'] = telemetry_filter['Power_kw'] - telemetry_filter['Setpoint_kw']
print('On average the power (kw) surpasses the setpoint by:',round(telemetry_filter['Extra'].mean(),2),'kw')
# Power and setpoint filtering
mask = turbine_telemetry_['Power_kw'] > turbine_telemetry_['Setpoint_kw']
turbine_telemetry_f = turbine_telemetry_[mask]
for i in range(len(turbine_telemetry_f)):
if turbine_telemetry_f.iloc[i,1] <= turbine_telemetry_f.iloc[i,2] * 1.1:
turbine_telemetry_f.iloc[i,1] = turbine_telemetry_f.iloc[i,2]
else:
turbine_telemetry_f.iloc[i,1] = np.nan
# Rejoins the filtered data (where power > setpoint) with the dataset
turbine_telemetry_f = turbine_telemetry_f.dropna()
turbine_telemetry_ = pd.concat([turbine_telemetry_[-mask], turbine_telemetry_f], axis=0)
turbine_telemetry_ = turbine_telemetry_.sort_index()
# Power as a function of wind - VISUALIZATION
plt.figure(figsize=(10, 5))
plt.scatter(turbine_telemetry["Wind_ms"],turbine_telemetry["Power_kw"], alpha =0.01)
plt.xlabel("Wind (in ms)")
plt.ylabel("Power (in kw)")
plt.title("Power as a function of wind")
plt.show()
# Filters data for uncurtailed periods
mask2 = turbine_telemetry_['Setpoint_kw'] == 900
turbine_telemetry_ = turbine_telemetry_[mask2]
# Minimum speed for minimum power
mask4 = turbine_telemetry_['Power_kw'] == 10
turbine_telemetry_[mask4]['Wind_ms'].min()
# Minimum speed filtering
mask3 = (turbine_telemetry_['Wind_ms'] > 3.0) & (turbine_telemetry_['Power_kw'] == 0)
turbine_telemetry_ = turbine_telemetry_[-mask3]
#Maximum speed filtering
mask4 = (turbine_telemetry_['Wind_ms'] > 20.0) & (turbine_telemetry_['Power_kw'] < 800)
turbine_telemetry_ = turbine_telemetry_[-mask4]
# Power as a function of wind speed
plt.figure(figsize=(10, 5))
plt.scatter(turbine_telemetry_["Wind_ms"],turbine_telemetry_["Power_kw"], alpha =0.01)
plt.xlabel("Wind (in ms)")
plt.ylabel("Power (in kw)")
plt.show()
# Mean model calibration
predict_power_dict={}
for i in np.unique(turbine_telemetry_["Wind_ms"]):
predict_power_dict[i]= np.mean(turbine_telemetry_[turbine_telemetry_["Wind_ms"]==i]["Power_kw"])
predict_power_list=[]
for w in turbine_telemetry_["Wind_ms"]:
#print(w)
p_tmp= predict_power_dict[w]
predict_power_list.append(p_tmp)
turbine_telemetry_["Mean_Predicted_Power_kw"]= predict_power_list
# Mean model - VISUALIZATION
plt.figure(figsize=(10, 5))
plt.scatter(turbine_telemetry_["Wind_ms"],turbine_telemetry_["Mean_Predicted_Power_kw"], alpha =0.01)
plt.xlabel("Wind (in ms)")
plt.ylabel("Power (in kw)")
plt.show()
# Mean model RMSE calculation
MSE_avg = mean_squared_error(turbine_telemetry_["Power_kw"], turbine_telemetry_["Mean_Predicted_Power_kw"])
RMSE_avg = math.sqrt(MSE_avg)
print("The mean model has a Root Mean Square Error of", round(RMSE_avg,2))
# Mean model estimations for power based on wind speed
predict_power_list_cur=[]
for w in turbine_telemetry_cur["Wind_ms"]:
#print(w)
if w in predict_power_dict.keys():
p_tmp= predict_power_dict[w]
else:
tmp_w=w
while tmp_w>0 :
#print(tmp_w)
tmp_w = tmp_w-0.1
if tmp_w in predict_power_dict.keys():
#print(tmp_w)
p_tmp_lower= predict_power_dict[tmp_w]
break
tmp_w=w
p_tmp_upper=900
while tmp_w<35 :
#print(tmp_w)
tmp_w = tmp_w+0.1
if tmp_w in predict_power_dict.keys():
#print(tmp_w)
p_tmp_upper= predict_power_dict[tmp_w]
break
p_tmp = (p_tmp_lower+p_tmp_upper)/2
predict_power_list_cur.append(p_tmp)
turbine_telemetry_cur["Max_Power_kw"]= predict_power_list_cur
# Curtailed power calculation
turbine_telemetry_cur["Curtailed_Power_kw"] = turbine_telemetry_cur["Max_Power_kw"] - turbine_telemetry_cur["Power_kw"]
turbine_telemetry_cur.head()
# Curtailed power calculation - refinement
turbine_telemetry_cur.loc[turbine_telemetry_cur["Curtailed_Power_kw"]< 0,"Curtailed_Power_kw"] =0
turbine_telemetry_cur[turbine_telemetry_cur["Curtailed_Power_kw"]>0].head()
# Curtailed Power - VISUALISATION
power_month_avg = turbine_telemetry_cur.resample('M').mean()
power_month_avg.plot(y='Curtailed_Power_kw', use_index=True, figsize = (10,5))
plt.xlabel("")
plt.ylabel("Average Curtailed Power (in kw)")
plt.title("Average curtailed power")
plt.show()
# Mean curtailed power
print("Average curtailed power:",round(np.mean(turbine_telemetry_cur["Curtailed_Power_kw"]),2),'KW')
print("Standard deviation of curtailed power:",round(np.std(turbine_telemetry_cur["Curtailed_Power_kw"]),2), 'KW')
print("Average theoretical maximum power:", round(np.mean(turbine_telemetry_cur["Max_Power_kw"]),2), 'KW')
## Percentage of power curtailed in 2017
_2017 = turbine_telemetry_cur['2017-01-01':'2018-01-01']
print("Percentage of power curtailed in 2017:",round(_2017["Curtailed_Power_kw"].sum() / _2017["Max_Power_kw"].sum()*100,2),'%')
## Percentage of power curtailed in 2016
_2016 = turbine_telemetry_cur['2016-01-01':'2017-01-01']
print("Percentage of power curtailed in 2016:", round(_2016["Curtailed_Power_kw"].sum() / _2017["Max_Power_kw"].sum()*100,2),'%')
#inspect if maintenance occur
turbine_telemetry_cur[turbine_telemetry_cur["Power_kw"]==0].head()
#1. impute data for the generated power during the maintenance
turbine_telemetry_cur['2016-01-21':'2016-02-03']['Power_kw']= turbine_telemetry_cur['2016-01-21':'2016-02-03']['Max_Power_kw']
#2. subtract expected power energy loss during maintenance over all time periods.
potential_loss= sum(turbine_telemetry_cur.loc['2016-01-21':'2016-02-03']['Power_kw'])
turbine_telemetry_cur['Power_kw'] = turbine_telemetry_cur['Power_kw'] - potential_loss/(len(turbine_telemetry_cur))
turbine_telemetry_cur['Max_Power_kw'] = turbine_telemetry_cur['Max_Power_kw'] - potential_loss/(len(turbine_telemetry_cur))
# Filters the data for the year 2017 (i.e., the year of available demand data)
turbine_curt_energy = turbine_telemetry_cur.loc['2017-01-01':'2018-01-01']
# Calculates energy in kwh as integral of power in kw - for curtailed power
start = turbine_curt_energy.index[0]
start = start - timedelta(seconds=20)
energy_integral = np.empty((17520,1))
for i in range (17520):
if len(turbine_curt_energy.loc[start:start + timedelta(minutes=30)]['Curtailed_Power_kw']) == 30:
energy_integral[i,0]= np.trapz(turbine_curt_energy.loc[start:start + timedelta(minutes=30)]['Curtailed_Power_kw']) / 60
# When the 30-minute interval does not contain 30 reccords (but more than 0), energy is estimated as the average power * 30
elif len(turbine_curt_energy.loc[start:start + timedelta(minutes=30)]['Curtailed_Power_kw']) > 0:
energy_integral[i,0]= (turbine_curt_energy.loc[start:start + timedelta(minutes=30)]['Curtailed_Power_kw'].mean() * 30) / 60
# When the 30-minute interval does not contain any reccords, energy is taken is equivalent to the preceding 30-minute interval
else:
energy_integral[i,0]= energy_integral[i-1,0]
start = start + timedelta(minutes=30)
# Calculates energy in kwh as integral of power in kw - for maximum power
start = turbine_curt_energy.index[0]
start = start - timedelta(seconds=20)
energy_integral_max = np.empty((17520,1))
for i in range (17520):
if len(turbine_curt_energy.loc[start:start + timedelta(minutes=30)]['Max_Power_kw']) == 30:
energy_integral_max[i,0]= np.trapz(turbine_curt_energy.loc[start:start + timedelta(minutes=30)]['Max_Power_kw']) / 60
# When the 30-minute interval does not contain 30 reccords (but more than 0), energy is estimated as the average power * 30
elif len(turbine_curt_energy.loc[start:start + timedelta(minutes=30)]['Max_Power_kw']) > 0:
energy_integral_max[i,0]= (turbine_curt_energy.loc[start:start + timedelta(minutes=30)]['Max_Power_kw'].mean() * 30) / 60
# When the 30-minute interval does not contain any reccords, energy is taken is equivalent to the preceding 30-minute interval
else:
energy_integral_max[i,0]= energy_integral_max[i-1,0]
start = start + timedelta(minutes=30)
# Calculates energy in kwh as integral of power in kw - for actual power
start = turbine_curt_energy.index[0]
start = start - timedelta(seconds=20)
energy_integral_act = np.empty((17520,1))
for i in range (17520):
if len(turbine_curt_energy.loc[start:start + timedelta(minutes=30)]['Power_kw']) == 30:
energy_integral_act[i,0]= np.trapz(turbine_curt_energy.loc[start:start + timedelta(minutes=30)]['Power_kw']) / 60
# When the 30-minute interval does not contain 30 reccords (but more than 0), energy is estimated as the average power * 30
elif len(turbine_curt_energy.loc[start:start + timedelta(minutes=30)]['Power_kw']) > 0:
energy_integral_act[i,0]= (turbine_curt_energy.loc[start:start + timedelta(minutes=30)]['Power_kw'].mean() * 30) / 60
# When the 30-minute interval does not contain any reccords, energy is taken is equivalent to the preceding 30-minute interval
else:
energy_integral_act[i,0]= energy_integral_act[i-1,0]
start = start + timedelta(minutes=30)
# Summarises all calculations in a data frame
start = turbine_curt_energy.index[0]
start = start - timedelta(seconds=20)
timestamps = []
for i in range(17520):
timestamps.append(start + timedelta(minutes=30))
start = start + timedelta(minutes=30)
supply_curtailment = np.column_stack((timestamps, energy_integral[:,0],energy_integral_max[:,0], energy_integral_act[:,0]))
# Cleans and organises the new turbine_energy dataframe
supply_curtailment = pd.DataFrame(supply_curtailment)
supply_curtailment.columns = ['Timestamp', 'Energy_integral_kwh', 'Energy_integral_kwh_max', 'Energy_integral_kwh_act']
supply_curtailment = supply_curtailment.set_index('Timestamp')
supply_curtailment['Energy_integral_kwh'] = supply_curtailment['Energy_integral_kwh'].astype(float, errors = 'raise')
supply_curtailment['Energy_integral_kwh_max'] = supply_curtailment['Energy_integral_kwh_max'].astype(float, errors = 'raise')
supply_curtailment['Energy_integral_kwh_act'] = supply_curtailment['Energy_integral_kwh_act'].astype(float, errors = 'raise')
supply_curtailment = supply_curtailment.round(decimals = 4)
supply_curtailment.head()
# Curtailed Energy - VISUALISATION
energy_month_avg = supply_curtailment.resample('M').mean()
energy_month_avg.plot(y='Energy_integral_kwh', use_index=True, figsize=(10,5))
plt.xlabel("")
plt.ylabel("Average Curtailed Energy (in kwh)")
plt.title("Curtailed energy")
plt.show()
# Total curtailed energy for the year 2017 in GWh for this turbine
supply_curtailment['Energy_integral_kwh'].sum()/1000000
supply_curtailment['total_Rousay']= supply_curtailment['Energy_integral_kwh']*85
supply_curtailment['total_Westray']= supply_curtailment['Energy_integral_kwh']*85*(25.7/24.9)
supply_curtailment['total_Eday']= supply_curtailment['Energy_integral_kwh']*85*(25.3/24.9)
supply_curtailment['total_Stronsay']= supply_curtailment['Energy_integral_kwh']*85*(24.6/24.9)
supply_curtailment['total_Shapinsay']= supply_curtailment['Energy_integral_kwh']*85*(24/24.9)
supply_curtailment['total_Hoy']= supply_curtailment['Energy_integral_kwh']*85*(19.9/24.9)
supply_curtailment['total_all']= supply_curtailment['total_Rousay']+supply_curtailment['total_Westray']+supply_curtailment['total_Eday']+supply_curtailment['total_Stronsay']+supply_curtailment['total_Shapinsay']+supply_curtailment['total_Hoy']
supply_curtailment.head()
# Total curtailed energy for the year 2017 in GWh for a representative Orkney turbine
(supply_curtailment['total_all'].sum()/1000000)/500
# Divides the information of actual vs potential energy in each location
supply_vs = supply_curtailment.copy()
supply_vs['max_Rousay']= supply_curtailment['Energy_integral_kwh_max']*85
supply_vs['max_Westray']= supply_curtailment['Energy_integral_kwh_max']*85*(25.7/24.9)
supply_vs['max_Eday']= supply_curtailment['Energy_integral_kwh_max']*85*(25.3/24.9)
supply_vs['max_Stronsay']= supply_curtailment['Energy_integral_kwh_max']*85*(24.6/24.9)
supply_vs['max_Shapinsay']= supply_curtailment['Energy_integral_kwh_max']*85*(24/24.9)
supply_vs['max_Hoy']= supply_curtailment['Energy_integral_kwh_max']*85*(19.9/24.9)
# Aggregates the total information of actual vs potential energy
supply_vs['total_max']= supply_vs['max_Rousay']+supply_vs['max_Westray']+supply_vs['max_Eday']+supply_vs['max_Stronsay']+supply_vs['max_Shapinsay']+supply_vs['max_Hoy']
supply_vs['dif'] = supply_vs['total_max'] - supply_vs['total_all']
# Organises and cleans the dataframe of potential energy vs. actual energy in GWh
supply_vs_avg = supply_vs[['dif', 'total_max']]
supply_vs_avg = supply_vs_avg.resample('M').sum()
supply_vs_avg = supply_vs_avg.iloc[:12,:]
supply_vs_avg['total_max'] = supply_vs_avg['total_max'] / 1000000
supply_vs_avg['dif'] = supply_vs_avg['dif'] / 1000000
# Total energy potential vs total energy produced - VISUALIZATION
plt.figure(figsize=(10,5))
plt.plot(supply_vs_avg.index, supply_vs_avg['dif'], label = 'Actual Energy Generated')
plt.plot(supply_vs_avg.index, supply_vs_avg['total_max'], label = 'Potential Energy')
plt.xlabel("")
plt.ylabel("Energy (in GWh)")
plt.legend()
plt.title('Curtailment Opportunity: Maximum Potential Energy vs. Actual Energy Recorded')
plt.show()
heating = pd.read_csv('data/Heating_2.csv').rename(columns = {'Unnamed: 0': 'Month', 'Unnamed: 1': 'During month'})
heating
avg_perc_heating = 0.636
multipliers = (heating['Total (%)'] / heating['Total (%)'][3])**(1/2)
multipliers.index = multipliers.index + 1
heating_dict = dict(multipliers * 0.636)
pd.DataFrame({'month': list(calendar.month_name[1:]), 'heating_perc': heating_dict.values()})
avg_el_consumption = resident_demand['Demand_mean_kw'].sum() / 2
resident_demand['electricity_demand'] = resident_demand['Demand_mean_kw'] / 2
avg_gas_consumption = 13522
resident_demand['energy_index'] = (resident_demand['Demand_mean_kw'] / 2) / (avg_el_consumption / len(resident_demand))
resident_demand['gas_demand'] = resident_demand['energy_index'] * (avg_gas_consumption / len(resident_demand))
resident_demand['demand'] = resident_demand['gas_demand'] + resident_demand['electricity_demand']
heater_capacity = 15.4
by_day = resident_demand.groupby(pd.DatetimeIndex(resident_demand.index).normalize())
daily_demand = by_day.agg({'demand': 'sum'})
heating_perc = [heating_dict[month] for month in daily_demand.index.month]
daily_demand['perc_heating'] = heating_perc
num_households = 10385
daily_demand['total_demand'] = daily_demand['demand'] * num_households
daily_demand['curtailment_potential'] = np.minimum(heater_capacity, daily_demand['demand'] * daily_demand['perc_heating'] ) * 10385
plt.figure(figsize=(10,5))
plt.plot(daily_demand.index, daily_demand['demand'] * daily_demand['perc_heating'], label = 'Demand', linestyle = '--')
plt.plot(daily_demand.index, np.repeat(heater_capacity, len(daily_demand), axis = 0), label = 'Heater capacity', linestyle = '--')
plt.ylabel('Energy (kWh)')
plt.legend()
plt.show()
plt.figure(figsize=(10,5))
plt.plot(daily_demand.index, daily_demand['demand'] * daily_demand['perc_heating'], label = 'Demand', linestyle = '--', alpha = 0.55)
plt.plot(daily_demand.index, np.repeat(heater_capacity, len(daily_demand), axis = 0), label = 'Heater capacity', linestyle = '--', alpha = 0.55)
plt.plot(daily_demand.index, daily_demand['curtailment_potential'] / num_households, label = 'Curtailment reduction', color = 'black')
plt.ylabel('Energy (kWh)')
plt.legend()
plt.show()
off_peak = ['00:00', '00:30', '01:00', '01:30', '02:00', '02:30', '03:00', '03:30', '04:00', '04:30', '05:00',
'05:30', '06:00', '06:30', '10:30', '11:00', '11:30', '12:00', '12:30', '13:00', '13:30', '14:00',
'22:30', '23:00', '23:30']
off_peak = pd.to_datetime(off_peak).time
resident_demand['curtailment_potential'] = 0
for row in range(len(resident_demand)):
if resident_demand.index.time[row] in off_peak:
date = resident_demand.index[row].date()
resident_demand['curtailment_potential'].iloc[row] = daily_demand[daily_demand.index.date == date]['curtailment_potential'] / 25
else:
date = resident_demand.index[row].date()
resident_demand['curtailment_potential'].iloc[row] = 0
pd.DataFrame({'Timestamp': resident_demand.index, 'Curtailment potential': resident_demand['curtailment_potential']})
resident_demand = resident_demand[:17520] # only 2017 data points
merged_energy = supply_curtailment.copy()
merged_energy.head()
merged_energy['Energy_integral_kwh'] = supply_curtailment['total_all']
merged_energy["CD_approach_2"] = [value * 0.05 for value in resident_demand["curtailment_potential"]]
merged_energy
merged_energy["MIN curtailment FINAL"] = merged_energy[['Energy_integral_kwh','CD_approach_2']].apply(np.min, axis = 1)
total_yearly_curtailment_reduction = np.sum(merged_energy["MIN curtailment FINAL"])
total_yearly_curtailment_reduction
# Bins the final turbine telemetry data
intervals = pd.interval_range(start=0, freq=0.5, end=31)
turbine_telemetry_['Wind_bins'] = pd.cut(turbine_telemetry_['Wind_ms'], bins = intervals)
turbine_telemetry_['Wind_bins']
# Median power calculation per bucket
turbine_telemetry_['Power_median'] = turbine_telemetry_.groupby('Wind_bins')['Power_kw'].transform('median')
# Binning model - VISUALIZATION
plt.scatter(turbine_telemetry_["Wind_ms"],turbine_telemetry_["Power_median"], alpha =0.01)
plt.xlabel("Wind_ms")
plt.ylabel("Power_kw")
plt.show()
# Continous visualisation
temp = turbine_telemetry_[['Power_kw', 'Wind_bins']].groupby('Wind_bins')
cv = temp.quantile(0.5)
cv['Q1'] = temp.quantile(0.1)
cv['Q3'] = temp.quantile(0.9)
cv = cv.rename(columns = {'Power_kw':'Q2'})
cv.plot(rot=45)
plt.xlabel("Wind_ms")
plt.ylabel("Power_kw")
# Median model calculation
turbine_telemetry_["Median_Predicted_Power_kw"]= turbine_telemetry_.groupby('Wind_ms')['Power_kw'].transform('median')
# Median model RMSE
MSE_med = mean_squared_error(turbine_telemetry_["Power_kw"], turbine_telemetry_["Median_Predicted_Power_kw"])
RMSE_med = math.sqrt(MSE_med)
print("The Root Mean Square Error of the mean model is:\n")
print(round(RMSE_med,2))
# Fit linear regression
from sklearn.linear_model import LinearRegression
model = LinearRegression().fit(np.array(turbine_telemetry_["Wind_ms"]).reshape((-1, 1)),np.array(turbine_telemetry_["Power_kw"]) )
r_sq = model.score(np.array(turbine_telemetry_["Wind_ms"]).reshape((-1, 1)),np.array(turbine_telemetry_["Power_kw"]))
# Predict with linear regression
y_pred = model.predict(np.array(turbine_telemetry_["Wind_ms"]).reshape((-1, 1)))
turbine_telemetry_["Linear_Predicted_Power_kw"]= y_pred
# Linear model visualization
plt.scatter(turbine_telemetry_["Wind_ms"],turbine_telemetry_["Linear_Predicted_Power_kw"], alpha =0.01)
plt.xlabel("Wind_ms")
plt.ylabel("Power_kw")
plt.show()
MSE = mean_squared_error(turbine_telemetry_["Power_kw"], turbine_telemetry_["Linear_Predicted_Power_kw"])
RMSE = math.sqrt(MSE)
print("The Root Mean Square Error of the model is:\n")
print(round(RMSE,2))
# Fit linear model with quadratic term
from sklearn.preprocessing import PolynomialFeatures
x= np.array(turbine_telemetry_["Wind_ms"]).reshape((-1, 1))
transformer = PolynomialFeatures(degree=2, include_bias=False)
transformer.fit(x)
x_ = transformer.transform(x)
model_2 = LinearRegression().fit(x_, np.array(turbine_telemetry_["Power_kw"]))
# Predict using model
y_pred_2 = model_2.predict(x_)
turbine_telemetry_["Linear_2_Predicted_Power_kw"]= y_pred_2
# Model visualization
plt.scatter(turbine_telemetry_["Wind_ms"],turbine_telemetry_["Linear_2_Predicted_Power_kw"], alpha =0.01)
plt.xlabel("Wind_ms")
plt.ylabel("Power_kw")
plt.show()
day_demand1 = resident_demand[14976:15025]
plt.plot(day_demand1["Demand_mean_kw"])
plt.ylabel('Demand_mean_kw')
plt.show()
day_demand2 = resident_demand[1968:2017]
plt.plot(day_demand2["Demand_mean_kw"])
plt.ylabel('Demand_mean_kw')
plt.show()
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
import math
from datetime import timedelta
import warnings
import calendar
warnings.filterwarnings("ignore")
turbine_telemetry = pd.read_csv("data/turbine_telemetry.csv" , parse_dates=['Timestamp'])
# Removes missing values from the data
turbine_telemetry_ = turbine_telemetry.set_index('Timestamp')
turbine_telemetry_ = turbine_telemetry_.loc['2016-01-01':'2018-01-01']
turbine_telemetry_ = turbine_telemetry_.dropna()
# Removes missing values from the data
turbine_telemetry_cur = turbine_telemetry.set_index('Timestamp')
turbine_telemetry_cur = turbine_telemetry_cur.loc['2016-01-01':'2018-01-01']
turbine_telemetry_cur = turbine_telemetry_cur.dropna()
# Studies the timestamp frequency
telemetry = turbine_telemetry.set_index('Timestamp')
timing = pd.Series(telemetry.index).diff()
one_min = pd.Timedelta(minutes=1) # the data appears to be evenly spaced in one-minute intervals
print(round((timing == one_min).sum() / len(timing),2)*100,'% of the data is in one-minute intervals')
# Power seasonality analysis (monthly averages) - DATA
telemetry_avg = telemetry.resample('M').mean()
# Power seasonality analysis (monthly averages) - VISUALISATION
telemetry_avg.plot(y='Power_kw', use_index=True)
plt.xlabel("")
plt.ylabel("Power_kw")
plt.title('Power Seasonality: monthly averages')
plt.show()
# Studies cases when the power (kw) record exceeds the setpoint record (kw)
is_power_bigger = turbine_telemetry['Power_kw'] > turbine_telemetry['Setpoint_kw']
telemetry_filter = turbine_telemetry[is_power_bigger]
print('The total number of instances in which power (kw) surpasses the setpoint (kw) is:',len(telemetry_filter))
# Studies on average how much power (kw) exceeds the set point (kw) - for the relevant cases
telemetry_filter['Extra'] = telemetry_filter['Power_kw'] - telemetry_filter['Setpoint_kw']
print('On average the power (kw) surpasses the setpoint by:',round(telemetry_filter['Extra'].mean(),2),'kw')
# Power and setpoint filtering
mask = turbine_telemetry_['Power_kw'] > turbine_telemetry_['Setpoint_kw']
turbine_telemetry_f = turbine_telemetry_[mask]
for i in range(len(turbine_telemetry_f)):
if turbine_telemetry_f.iloc[i,1] <= turbine_telemetry_f.iloc[i,2] * 1.1:
turbine_telemetry_f.iloc[i,1] = turbine_telemetry_f.iloc[i,2]
else:
turbine_telemetry_f.iloc[i,1] = np.nan
# Rejoins the filtered data (where power > setpoint) with the dataset
turbine_telemetry_f = turbine_telemetry_f.dropna()
turbine_telemetry_ = pd.concat([turbine_telemetry_[-mask], turbine_telemetry_f], axis=0)
turbine_telemetry_ = turbine_telemetry_.sort_index()
# Power as a function of wind - VISUALIZATION
#plt.figure(figsize=(20,15))
plt.scatter(turbine_telemetry["Wind_ms"],turbine_telemetry["Power_kw"], alpha =0.01)
plt.xlabel("Wind_ms")
plt.ylabel("Power_kw")
plt.show()
# Filters data for uncurtailed periods
mask2 = turbine_telemetry_['Setpoint_kw'] == 900
turbine_telemetry_ = turbine_telemetry_[mask2]
# Minimum speed for minimum power
mask4 = turbine_telemetry_['Power_kw'] == 10
turbine_telemetry_[mask4]['Wind_ms'].min()
# Minimum speed filtering
mask3 = (turbine_telemetry_['Wind_ms'] > 3.0) & (turbine_telemetry_['Power_kw'] == 0)
turbine_telemetry_ = turbine_telemetry_[-mask3]
#Maximum speed filtering
mask4 = (turbine_telemetry_['Wind_ms'] > 20.0) & (turbine_telemetry_['Power_kw'] < 800)
turbine_telemetry_ = turbine_telemetry_[-mask4]
# Power as a function of wind speed
plt.scatter(turbine_telemetry_["Wind_ms"],turbine_telemetry_["Power_kw"], alpha =0.01)
plt.xlabel("Wind_ms")
plt.ylabel("Power_kw")
plt.show()
# Mean model calibration
predict_power_dict={}
for i in np.unique(turbine_telemetry_["Wind_ms"]):
predict_power_dict[i]= np.mean(turbine_telemetry_[turbine_telemetry_["Wind_ms"]==i]["Power_kw"])
predict_power_list=[]
for w in turbine_telemetry_["Wind_ms"]:
#print(w)
p_tmp= predict_power_dict[w]
predict_power_list.append(p_tmp)
turbine_telemetry_["Mean_Predicted_Power_kw"]= predict_power_list
# Mean model - VISUALIZATION
plt.scatter(turbine_telemetry_["Wind_ms"],turbine_telemetry_["Mean_Predicted_Power_kw"], alpha =0.01)
plt.xlabel("Wind_ms")
plt.ylabel("Power_kw")
plt.show()
# Mean model RMSE calculation
MSE_avg = mean_squared_error(turbine_telemetry_["Power_kw"], turbine_telemetry_["Mean_Predicted_Power_kw"])
RMSE_avg = math.sqrt(MSE_avg)
print("The mean model has a Root Mean Square Error of", round(RMSE_avg,2))
# Mean model estimations for power based on wind speed
predict_power_list_cur=[]
for w in turbine_telemetry_cur["Wind_ms"]:
#print(w)
if w in predict_power_dict.keys():
p_tmp= predict_power_dict[w]
else:
tmp_w=w
while tmp_w>0 :
#print(tmp_w)
tmp_w = tmp_w-0.1
if tmp_w in predict_power_dict.keys():
#print(tmp_w)
p_tmp_lower= predict_power_dict[tmp_w]
break
tmp_w=w
p_tmp_upper=900
while tmp_w<35 :
#print(tmp_w)
tmp_w = tmp_w+0.1
if tmp_w in predict_power_dict.keys():
#print(tmp_w)
p_tmp_upper= predict_power_dict[tmp_w]
break
p_tmp = (p_tmp_lower+p_tmp_upper)/2
predict_power_list_cur.append(p_tmp)
turbine_telemetry_cur["Max_Power_kw"]= predict_power_list_cur
# Curtailed power calculation
turbine_telemetry_cur["Curtailed_Power_kw"] = turbine_telemetry_cur["Max_Power_kw"] - turbine_telemetry_cur["Power_kw"]
turbine_telemetry_cur.head()
# Curtailed power calculation - refinement
turbine_telemetry_cur.loc[turbine_telemetry_cur["Curtailed_Power_kw"]< 0,"Curtailed_Power_kw"] =0
turbine_telemetry_cur[turbine_telemetry_cur["Curtailed_Power_kw"]>0].head()
# Curtailed Power - VISUALISATION
power_month_avg = turbine_telemetry_cur.resample('M').mean()
power_month_avg.plot(y='Curtailed_Power_kw', use_index=True)
plt.xlabel("")
plt.ylabel("Average_Curtailed_Power_kw")
plt.title("Average curtailed power")
plt.show()
# Mean curtailed power
print("Average curtailed power:",round(np.mean(turbine_telemetry_cur["Curtailed_Power_kw"]),2),'KW')
print("Standard deviation of curtailed power:",round(np.std(turbine_telemetry_cur["Curtailed_Power_kw"]),2), 'KW')
print("Average theoretical maximum power:", round(np.mean(turbine_telemetry_cur["Max_Power_kw"]),2), 'KW')
## Percentage of power curtailed in 2017
_2017 = turbine_telemetry_cur['2017-01-01':'2018-01-01']
print("Percentage of power curtailed in 2017:",round(_2017["Curtailed_Power_kw"].sum() / _2017["Max_Power_kw"].sum()*100,2),'%')
## Percentage of power curtailed in 2016
_2016 = turbine_telemetry_cur['2016-01-01':'2017-01-01']
print("Percentage of power curtailed in 2016:", round(_2016["Curtailed_Power_kw"].sum() / _2017["Max_Power_kw"].sum()*100,2),'%')
#inspect if maintenance occur
turbine_telemetry_cur[turbine_telemetry_cur["Power_kw"]==0].head(10000)
#1. impute data for the generated power during the maintenance
turbine_telemetry_cur['2016-01-21':'2016-02-03']['Power_kw']= turbine_telemetry_cur['2016-01-21':'2016-02-03']['Max_Power_kw']
#2. subtract expected power energy loss during maintenance over all time periods.
potential_loss= sum(turbine_telemetry_cur.loc['2016-01-21':'2016-02-03']['Power_kw'])
turbine_telemetry_cur['Power_kw'] = turbine_telemetry_cur['Power_kw'] - potential_loss/(len(turbine_telemetry_cur))
turbine_telemetry_cur['Max_Power_kw'] = turbine_telemetry_cur['Max_Power_kw'] - potential_loss/(len(turbine_telemetry_cur))
# Filters the data for the year 2017 (i.e., the year of available demand data)
turbine_curt_energy = turbine_telemetry_cur.loc['2017-01-01':'2018-01-01']
# Calculates energy in kwh as integral of power in kw - for curtailed power
start = turbine_curt_energy.index[0]
start = start - timedelta(seconds=20)
energy_integral = np.empty((17520,1))
for i in range (17520):
if len(turbine_curt_energy.loc[start:start + timedelta(minutes=30)]['Curtailed_Power_kw']) == 30:
energy_integral[i,0]= np.trapz(turbine_curt_energy.loc[start:start + timedelta(minutes=30)]['Curtailed_Power_kw']) / 60
# When the 30-minute interval does not contain 30 reccords (but more than 0), energy is estimated as the average power * 30
elif len(turbine_curt_energy.loc[start:start + timedelta(minutes=30)]['Curtailed_Power_kw']) > 0:
energy_integral[i,0]= (turbine_curt_energy.loc[start:start + timedelta(minutes=30)]['Curtailed_Power_kw'].mean() * 30) / 60
# When the 30-minute interval does not contain any reccords, energy is taken is equivalent to the preceding 30-minute interval
else:
energy_integral[i,0]= energy_integral[i-1,0]
start = start + timedelta(minutes=30)
# Calculates energy in kwh as integral of power in kw - for maximum power
start = turbine_curt_energy.index[0]
start = start - timedelta(seconds=20)
energy_integral_max = np.empty((17520,1))
for i in range (17520):
if len(turbine_curt_energy.loc[start:start + timedelta(minutes=30)]['Max_Power_kw']) == 30:
energy_integral_max[i,0]= np.trapz(turbine_curt_energy.loc[start:start + timedelta(minutes=30)]['Max_Power_kw']) / 60
# When the 30-minute interval does not contain 30 reccords (but more than 0), energy is estimated as the average power * 30
elif len(turbine_curt_energy.loc[start:start + timedelta(minutes=30)]['Max_Power_kw']) > 0:
energy_integral_max[i,0]= (turbine_curt_energy.loc[start:start + timedelta(minutes=30)]['Max_Power_kw'].mean() * 30) / 60
# When the 30-minute interval does not contain any reccords, energy is taken is equivalent to the preceding 30-minute interval
else:
energy_integral_max[i,0]= energy_integral_max[i-1,0]
start = start + timedelta(minutes=30)
# Calculates energy in kwh as integral of power in kw - for actual power
start = turbine_curt_energy.index[0]
start = start - timedelta(seconds=20)
energy_integral_act = np.empty((17520,1))
for i in range (17520):
if len(turbine_curt_energy.loc[start:start + timedelta(minutes=30)]['Power_kw']) == 30:
energy_integral_act[i,0]= np.trapz(turbine_curt_energy.loc[start:start + timedelta(minutes=30)]['Power_kw']) / 60
# When the 30-minute interval does not contain 30 reccords (but more than 0), energy is estimated as the average power * 30
elif len(turbine_curt_energy.loc[start:start + timedelta(minutes=30)]['Power_kw']) > 0:
energy_integral_act[i,0]= (turbine_curt_energy.loc[start:start + timedelta(minutes=30)]['Power_kw'].mean() * 30) / 60
# When the 30-minute interval does not contain any reccords, energy is taken is equivalent to the preceding 30-minute interval
else:
energy_integral_act[i,0]= energy_integral_act[i-1,0]
start = start + timedelta(minutes=30)
# Summarises all calculations in a data frame
start = turbine_curt_energy.index[0]
start = start - timedelta(seconds=20)
timestamps = []
for i in range(17520):
timestamps.append(start + timedelta(minutes=30))
start = start + timedelta(minutes=30)
supply_curtailment = np.column_stack((timestamps, energy_integral[:,0],energy_integral_max[:,0], energy_integral_act[:,0]))
# Cleans and organises the new turbine_energy dataframe
supply_curtailment = pd.DataFrame(supply_curtailment)
supply_curtailment.columns = ['Timestamp', 'Energy_integral_kwh', 'Energy_integral_kwh_max', 'Energy_integral_kwh_act']
supply_curtailment = supply_curtailment.set_index('Timestamp')
supply_curtailment['Energy_integral_kwh'] = supply_curtailment['Energy_integral_kwh'].astype(float, errors = 'raise')
supply_curtailment['Energy_integral_kwh_max'] = supply_curtailment['Energy_integral_kwh_max'].astype(float, errors = 'raise')
supply_curtailment['Energy_integral_kwh_act'] = supply_curtailment['Energy_integral_kwh_act'].astype(float, errors = 'raise')
supply_curtailment = supply_curtailment.round(decimals = 4)
supply_curtailment.head()
# Curtailed Energy - VISUALISATION
energy_month_avg = supply_curtailment.resample('M').mean()
energy_month_avg.plot(y='Energy_integral_kwh', use_index=True)
plt.xlabel("")
plt.ylabel("Average_Curtailed_Energy_kwh")
plt.title("Curtailed energy")
plt.show()
# Total curtailed energy for the year 2017 in GWh for this turbine
supply_curtailment['Energy_integral_kwh'].sum()/1000000
supply_curtailment['total_Rousay']= supply_curtailment['Energy_integral_kwh']*85
supply_curtailment['total_Westray']= supply_curtailment['Energy_integral_kwh']*85*(25.7/24.9)
supply_curtailment['total_Eday']= supply_curtailment['Energy_integral_kwh']*85*(25.3/24.9)
supply_curtailment['total_Stronsay']= supply_curtailment['Energy_integral_kwh']*85*(24.6/24.9)
supply_curtailment['total_Shapinsay']= supply_curtailment['Energy_integral_kwh']*85*(24/24.9)
supply_curtailment['total_Hoy']= supply_curtailment['Energy_integral_kwh']*85*(19.9/24.9)
supply_curtailment['total_all']= supply_curtailment['total_Rousay']+supply_curtailment['total_Westray']+supply_curtailment['total_Eday']+supply_curtailment['total_Stronsay']+supply_curtailment['total_Shapinsay']+supply_curtailment['total_Hoy']
supply_curtailment.head()
# Total curtailed energy for the year 2017 in GWh for a representative Orkney turbine
(supply_curtailment['total_all'].sum()/1000000)/500
# Divides the information of actual vs potential energy in each location
supply_vs = supply_curtailment.copy()
supply_vs['max_Rousay']= supply_curtailment['Energy_integral_kwh_max']*85
supply_vs['max_Westray']= supply_curtailment['Energy_integral_kwh_max']*85*(25.7/24.9)
supply_vs['max_Eday']= supply_curtailment['Energy_integral_kwh_max']*85*(25.3/24.9)
supply_vs['max_Stronsay']= supply_curtailment['Energy_integral_kwh_max']*85*(24.6/24.9)
supply_vs['max_Shapinsay']= supply_curtailment['Energy_integral_kwh_max']*85*(24/24.9)
supply_vs['max_Hoy']= supply_curtailment['Energy_integral_kwh_max']*85*(19.9/24.9)
# Aggregates the total information of actual vs potential energy
supply_vs['total_max']= supply_vs['max_Rousay']+supply_vs['max_Westray']+supply_vs['max_Eday']+supply_vs['max_Stronsay']+supply_vs['max_Shapinsay']+supply_vs['max_Hoy']
supply_vs['dif'] = supply_vs['total_max'] - supply_vs['total_all']
# Organises and cleans the dataframe of potential energy vs. actual energy in GWh
supply_vs_avg = supply_vs[['dif', 'total_max']]
supply_vs_avg = supply_vs_avg.resample('M').sum()
supply_vs_avg = supply_vs_avg.iloc[:12,:]
supply_vs_avg['total_max'] = supply_vs_avg['total_max'] / 1000000
supply_vs_avg['dif'] = supply_vs_avg['dif'] / 1000000
# Total energy potential vs total energy produced - VISUALIZATION
plt.plot(supply_vs_avg.index, supply_vs_avg['dif'], label = 'Actual Energy Generated')
plt.plot(supply_vs_avg.index, supply_vs_avg['total_max'], label = 'Potential Energy')
plt.xlabel("")
plt.ylabel("Energy_GWh")
plt.legend()
plt.title('Curtailment Opportunity: Maximum Potential Energy vs. Actual Energy Recorded')
plt.show()
Data preprocessing & descriptive analysis
resident_demand = pd.read_csv("data/residential_demand.csv", parse_dates=['Timestamp'], index_col = 0)
resident_demand.head(3)
# Yearly pattern
plt.figure(figsize=(20,10))
plt.plot(resident_demand.index, resident_demand['Demand_mean_kw'], alpha = 0.8)
plt.ylabel('Demand_mean_kw')
plt.show()
monthly_mean_demand = resident_demand.resample('M').mean()
monthly_mean_demand["Total Demand"] = monthly_mean_demand["Demand_mean_kw"] * 10385
spring_monthly_mean_demand = monthly_mean_demand[1:4]
summer_monthly_mean_demand = monthly_mean_demand[4:7]
autumn_monthly_mean_demand = monthly_mean_demand[7:10]
winter_monthly_mean_demand = monthly_mean_demand.iloc[[0, 10, 11]]
spring_demand = spring_monthly_mean_demand.sum()["Total Demand"]
summer_demand = summer_monthly_mean_demand.sum()["Total Demand"]
autumn_demand = autumn_monthly_mean_demand.sum()["Total Demand"]
winter_demand = winter_monthly_mean_demand.sum()["Total Demand"]
total_demand = spring_demand + summer_demand + autumn_demand + winter_demand
spring_prop = spring_demand/total_demand
summer_prop = summer_demand/total_demand
autumn_prop = autumn_demand/total_demand
winter_prop = winter_demand/total_demand
print("Spring Total Demand =", spring_demand)
print("Summer Total Demand =", summer_demand)
print("Autumn Total Demand =", autumn_demand)
print("Winter Total Demand =", winter_demand, "\n")
print("Spring Proportion =", spring_prop)
print("Summer Proportion =", summer_prop)
print("Autumn Proportion =", autumn_prop)
print("Winter Proportion =", winter_prop)
fig = plt.figure()
ax = fig.add_axes([0,0,1,1])
lables = ['Spring', 'Summer', 'Autumn', 'Winter']
data = [spring_demand,summer_demand,autumn_demand,winter_demand]
ax.bar(lables,data)
plt.ylabel("Total demand by season (in kwH)")
plt.show()
resident_demand["Time"] = 0
times = [i for i in resident_demand.index]
for index, timestamp in enumerate(times):
resident_demand["Time"][timestamp] = pd.to_datetime(resident_demand.index[index]).time()
times_dict={}
for i in np.unique(resident_demand["Time"]):
times_dict[i]= np.mean(resident_demand[resident_demand["Time"]==i]["Demand_mean_kw"])
times_list=[]
for w in resident_demand["Time"]:
#print(w)
average_demand = times_dict[w]
times_list.append(average_demand)
resident_demand["Mean_Demand"]= times_list
t = pd.DataFrame({"Time":[str(time) for time in times_dict.keys()], "Average Demand":[d for d in times_dict.values()]})
plt.figure(figsize=(20,10))
plt.plot(t["Time"], t["Average Demand"])
plt.xlabel("Time")
plt.ylabel("Average Demand")
x_ticks = ["00:00:00", "05:00:00", "10:00:00", "15:00:00", "20:00:00"]
plt.xticks(ticks=x_ticks)
plt.show()
day_demand1 = resident_demand[14976:15025]
plt.plot(day_demand1["Demand_mean_kw"])
plt.ylabel('Demand_mean_kw')
plt.show()
day_demand2 = resident_demand[1968:2017]
plt.plot(day_demand2["Demand_mean_kw"])
plt.ylabel('Demand_mean_kw')
plt.show()
Bottom-up demand estimation approach
heating = pd.read_csv('data/Heating_2.csv').rename(columns = {'Unnamed: 0': 'Month', 'Unnamed: 1': 'During month'})
heating
avg_perc_heating = 0.636
multipliers = (heating['Total (%)'] / heating['Total (%)'][3])**(1/2)
multipliers.index = multipliers.index + 1
heating_dict = dict(multipliers * 0.636)
pd.DataFrame({'month': list(calendar.month_name[1:]), 'heating_perc': heating_dict.values()})
avg_el_consumption = resident_demand['Demand_mean_kw'].sum() / 2
resident_demand['electricity_demand'] = resident_demand['Demand_mean_kw'] / 2
avg_gas_consumption = 13522
resident_demand['energy_index'] = (resident_demand['Demand_mean_kw'] / 2) / (avg_el_consumption / len(resident_demand))
resident_demand['gas_demand'] = resident_demand['energy_index'] * (avg_gas_consumption / len(resident_demand))
resident_demand['demand'] = resident_demand['gas_demand'] + resident_demand['electricity_demand']
heater_capacity = 15.4
by_day = resident_demand.groupby(pd.DatetimeIndex(resident_demand.index).normalize())
daily_demand = by_day.agg({'demand': 'sum'})
heating_perc = [heating_dict[month] for month in daily_demand.index.month]
daily_demand['perc_heating'] = heating_perc
num_households = 10385
daily_demand['total_demand'] = daily_demand['demand'] * num_households
daily_demand['curtailment_potential'] = np.minimum(heater_capacity, daily_demand['demand'] * daily_demand['perc_heating'] ) * 10385
plt.plot(daily_demand.index, daily_demand['demand'] * daily_demand['perc_heating'], label = 'Demand', linestyle = '--')
plt.plot(daily_demand.index, np.repeat(heater_capacity, len(daily_demand), axis = 0), label = 'Heater capacity', linestyle = '--')
plt.ylabel('Energy (kWh)')
plt.legend()
plt.show()
plt.plot(daily_demand.index, daily_demand['demand'] * daily_demand['perc_heating'], label = 'Demand', linestyle = '--', alpha = 0.55)
plt.plot(daily_demand.index, np.repeat(heater_capacity, len(daily_demand), axis = 0), label = 'Heater capacity', linestyle = '--', alpha = 0.55)
plt.plot(daily_demand.index, daily_demand['curtailment_potential'] / num_households, label = 'Curtailment reduction', color = 'black')
plt.ylabel('Energy (kWh)')
plt.legend()
plt.show()
off_peak = ['00:00', '00:30', '01:00', '01:30', '02:00', '02:30', '03:00', '03:30', '04:00', '04:30', '05:00',
'05:30', '06:00', '06:30', '10:30', '11:00', '11:30', '12:00', '12:30', '13:00', '13:30', '14:00',
'22:30', '23:00', '23:30']
off_peak = pd.to_datetime(off_peak).time
resident_demand['curtailment_potential'] = 0
for row in range(len(resident_demand)):
if resident_demand.index.time[row] in off_peak:
date = resident_demand.index[row].date()
resident_demand['curtailment_potential'].iloc[row] = daily_demand[daily_demand.index.date == date]['curtailment_potential'] / 25
else:
date = resident_demand.index[row].date()
resident_demand['curtailment_potential'].iloc[row] = 0
pd.DataFrame({'Timestamp': resident_demand.index, 'Curtailment potential': resident_demand['curtailment_potential']})
resident_demand = resident_demand[:17520]
merged_energy = supply_curtailment.copy()
merged_energy.head()
merged_energy['Energy_integral_kwh'] = supply_curtailment['total_all']
merged_energy["CD_approach_2"] = [value * 0.05 for value in resident_demand["curtailment_potential"]]
merged_energy["MIN curtailment FINAL"] = merged_energy[['Energy_integral_kwh','CD_approach_2']].apply(np.min, axis = 1)
total_yearly_curtailment_reduction = np.sum(merged_energy["MIN curtailment FINAL"])
total_yearly_curtailment_reduction