Temperature change predictions
This notebook aims to predict the evolution temperature change in 3 countries: Morocco, France, and the USA using the Temperature change change dataset in kaggle (https://www.kaggle.com/sevgisarac/temperature-change)
#import Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
Exploring the data
ETC_filepath='/work/archive/Environment_Temperature_change_E_All_Data_NOFLAG.csv'
ETC_data=pd.read_csv(ETC_filepath)
FAO_filepath='/work/archive/FAOSTAT_data_11-24-2020.csv'
FAO_data=pd.read_csv(FAO_filepath)
ETC_data.head()
ETC_data.info()
ETC_data.describe()
ETC_data.columns
ETC_data.Element.value_counts()
ETC_data.Area.unique()
Preparing and Transforming the data
Morocco_ETC=ETC_data.loc[ETC_data.Area=='Morocco']
US_ETC=ETC_data.loc[ETC_data.Area=='United States of America']
France_ETC= ETC_data.loc[ETC_data.Area=='France']
Morocco_ETC=Morocco_ETC.reset_index(drop=True)
US_ETC=US_ETC.reset_index(drop=True)
France_ETC=France_ETC.reset_index(drop=True)
Morocco_ETC=Morocco_ETC.drop(["Area Code","Months Code","Element Code"], axis=1)
US_ETC=US_ETC.drop(["Area Code","Months Code","Element Code"], axis=1)
France_ETC=France_ETC.drop(["Area Code","Months Code","Element Code"], axis=1)
Morocco_ETC_N=Morocco_ETC.loc[Morocco_ETC.Months.isin(['January', 'February', 'March', 'April', 'May', 'June', 'July','August', 'September', 'October', 'November', 'December'])]
US_ETC_N=US_ETC.loc[US_ETC.Months.isin(['January', 'February', 'March', 'April', 'May', 'June', 'July','August', 'September', 'October', 'November', 'December'])]
France_ETC_N=France_ETC.loc[France_ETC.Months.isin(['January', 'February', 'March', 'April', 'May', 'June', 'July','August', 'September', 'October', 'November', 'December'])]
Morocco_ETC_Meteo=Morocco_ETC.loc[Morocco_ETC.Months=="Meteorological year"]
US_ETC_Meteo=US_ETC.loc[US_ETC.Months=="Meteorological year"]
France_ETC_Meteo=France_ETC.loc[France_ETC.Months=="Meteorological year"]
#made to calculate the average of temperature change in every month of the year
def avg(list):
return sum(list)/len(list)
#both of the functions below create new dataframes out of the existing dataframes that are easier to handle
def transform_data(df, element):
d= {'Years':[i for i in range(1961,2020)] , element:[avg(df["Y"+str(i)].loc[df.Element==element]) for i in range(1961,2020)] }
df1 = pd.DataFrame(data=d)
return df1
def transform_data_meteo(df, element):
d= {'Years':[i for i in range(1961,2020)] , element:[avg(df["Y"+str(i)].loc[df.Element==element]) for i in range(1961,2020)] }
df1 = pd.DataFrame(data=d)
return df1
Morocco_Temp_Change=transform_data(Morocco_ETC_N,'Temperature change')
US_Temp_Change=transform_data(US_ETC_N,'Temperature change')
France_Temp_Change=transform_data(France_ETC_N,'Temperature change')
Morocco_Meteo_Temp_Change=transform_data_meteo(Morocco_ETC_Meteo,'Temperature change')
US_Meteo_Temp_Change=transform_data_meteo(US_ETC_Meteo,'Temperature change')
France_Meteo_Temp_Change=transform_data_meteo(France_ETC_Meteo,'Temperature change')
Morocco_Meteo_Temp_Change['Years']=pd.to_datetime(Morocco_Meteo_Temp_Change["Years"],format="%Y")
US_Meteo_Temp_Change['Years']=pd.to_datetime(US_Meteo_Temp_Change["Years"],format="%Y")
France_Meteo_Temp_Change['Years']=pd.to_datetime(France_Meteo_Temp_Change["Years"],format="%Y")
Morocco_Temp_Change['Years']=pd.to_datetime(Morocco_Meteo_Temp_Change["Years"],format="%Y")
US_Temp_Change['Years']=pd.to_datetime(US_Meteo_Temp_Change["Years"],format="%Y")
France_Temp_Change['Years']=pd.to_datetime(France_Meteo_Temp_Change["Years"],format="%Y")
Data Analysis
plt.figure(figsize=(20,10))
sns.lineplot(x=Morocco_Meteo_Temp_Change.Years,y=Morocco_Meteo_Temp_Change['Temperature change'], label="Morocco temperature change meteorology year")
sns.lineplot(x=Morocco_Temp_Change.Years, y=Morocco_Temp_Change['Temperature change'],label="Morocco temperature change year")
plt.figure(figsize=(20,10))
sns.lineplot(x=Morocco_Meteo_Temp_Change.Years,y=Morocco_Meteo_Temp_Change['Temperature change'], label="Morocco")
plt.legend
plt.figure(figsize=(20,10))
sns.lineplot(x=US_Meteo_Temp_Change.Years, y=US_Meteo_Temp_Change['Temperature change'], label="USA", color='red')
plt.legend
plt.figure(figsize=(20,10))
sns.lineplot(x=France_Meteo_Temp_Change.Years, y=France_Meteo_Temp_Change['Temperature change'], color='green' ,label="France")
plt.legend
Machine learning
For these 3 countries we are trying to predict the temperature change by the years by using a linear polynomial regression
from statsmodels.tsa.deterministic import CalendarFourier, DeterministicProcess
from sklearn.linear_model import LinearRegression
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
def predict(df):
df.set_index('Years', inplace=True)
y=df['Temperature change']
fourier = CalendarFourier(freq="A", order=4)
dp = DeterministicProcess(
index= y.index,
constant = True,
order = 2,
additional_terms=[fourier],
drop = True
)
X=dp.in_sample()
idx_train, idx_test = train_test_split(
y.index, test_size=0.2, shuffle=False,
)
X_train, X_test = X.loc[idx_train, :], X.loc[idx_test, :]
y_train, y_test = y.loc[idx_train], y.loc[idx_test]
# Fit trend model
model = LinearRegression(fit_intercept=False)
model.fit(X_train, y_train)
# Make predictions
y_fit = pd.DataFrame(
model.predict(X_train),
index=y_train.index,
)
y_pred = pd.DataFrame(
model.predict(X_test),
index=y_test.index,
)
# Pivot wide to long (stack) and convert DataFrame to Series (squeeze)
y_fit = y_fit.stack().squeeze() # trend from training set
y_pred = y_pred.stack().squeeze() # trend from test set
# Create residuals (the collection of detrended series) from the training set
y_resid = y_train - y_fit
# Train XGBoost on the residuals
xgb = XGBRegressor()
xgb.fit(X_train, y_resid)
# Add the predicted residuals onto the predicted trends
y_fit_boosted = xgb.predict(X_train) + y_fit
y_pred_boosted = xgb.predict(X_test) + y_pred
plt.figure(figsize=(16,8))
axs = y.plot(color='b', subplots=True, sharex=True)
axs = y_fit_boosted.unstack().plot(color='r', subplots=True, sharex=True, ax=axs)
axs = y_pred_boosted.unstack().plot(color='y', subplots=True, sharex=True, ax=axs)
predict(Morocco_Meteo_Temp_Change)
predict(US_Meteo_Temp_Change)
predict(France_Meteo_Temp_Change)