Temperature change predictions
This notebook aims to predict the evolution temperature change in 3 countries: Morocco, France, and the USA using the Temperature change change dataset in kaggle (https://www.kaggle.com/sevgisarac/temperature-change)
#import Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
Exploring the data
ETC_filepath='/work/archive/Environment_Temperature_change_E_All_Data_NOFLAG.csv'
ETC_data=pd.read_csv(ETC_filepath)
FAO_filepath='/work/archive/FAOSTAT_data_11-24-2020.csv'
FAO_data=pd.read_csv(FAO_filepath)
ETC_data.head()
ETC_data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9656 entries, 0 to 9655
Data columns (total 66 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Area Code 9656 non-null int64
1 Area 9656 non-null object
2 Months Code 9656 non-null int64
3 Months 9656 non-null object
4 Element Code 9656 non-null int64
5 Element 9656 non-null object
6 Unit 9656 non-null object
7 Y1961 8287 non-null float64
8 Y1962 8322 non-null float64
9 Y1963 8294 non-null float64
10 Y1964 8252 non-null float64
11 Y1965 8281 non-null float64
12 Y1966 8364 non-null float64
13 Y1967 8347 non-null float64
14 Y1968 8345 non-null float64
15 Y1969 8326 non-null float64
16 Y1970 8308 non-null float64
17 Y1971 8303 non-null float64
18 Y1972 8323 non-null float64
19 Y1973 8394 non-null float64
20 Y1974 8374 non-null float64
21 Y1975 8280 non-null float64
22 Y1976 8209 non-null float64
23 Y1977 8257 non-null float64
24 Y1978 8327 non-null float64
25 Y1979 8290 non-null float64
26 Y1980 8283 non-null float64
27 Y1981 8276 non-null float64
28 Y1982 8237 non-null float64
29 Y1983 8205 non-null float64
30 Y1984 8259 non-null float64
31 Y1985 8216 non-null float64
32 Y1986 8268 non-null float64
33 Y1987 8284 non-null float64
34 Y1988 8273 non-null float64
35 Y1989 8257 non-null float64
36 Y1990 8239 non-null float64
37 Y1991 8158 non-null float64
38 Y1992 8354 non-null float64
39 Y1993 8315 non-null float64
40 Y1994 8373 non-null float64
41 Y1995 8409 non-null float64
42 Y1996 8439 non-null float64
43 Y1997 8309 non-null float64
44 Y1998 8370 non-null float64
45 Y1999 8324 non-null float64
46 Y2000 8342 non-null float64
47 Y2001 8241 non-null float64
48 Y2002 8312 non-null float64
49 Y2003 8390 non-null float64
50 Y2004 8415 non-null float64
51 Y2005 8424 non-null float64
52 Y2006 8503 non-null float64
53 Y2007 8534 non-null float64
54 Y2008 8475 non-null float64
55 Y2009 8419 non-null float64
56 Y2010 8435 non-null float64
57 Y2011 8437 non-null float64
58 Y2012 8350 non-null float64
59 Y2013 8427 non-null float64
60 Y2014 8377 non-null float64
61 Y2015 8361 non-null float64
62 Y2016 8348 non-null float64
63 Y2017 8366 non-null float64
64 Y2018 8349 non-null float64
65 Y2019 8365 non-null float64
dtypes: float64(59), int64(3), object(4)
memory usage: 4.9+ MB
ETC_data.describe()
ETC_data.columns
ETC_data.Element.value_counts()
ETC_data.Area.unique()
Preparing and Transforming the data
Morocco_ETC=ETC_data.loc[ETC_data.Area=='Morocco']
US_ETC=ETC_data.loc[ETC_data.Area=='United States of America']
France_ETC= ETC_data.loc[ETC_data.Area=='France']
Morocco_ETC=Morocco_ETC.reset_index(drop=True)
US_ETC=US_ETC.reset_index(drop=True)
France_ETC=France_ETC.reset_index(drop=True)
Morocco_ETC=Morocco_ETC.drop(["Area Code","Months Code","Element Code"], axis=1)
US_ETC=US_ETC.drop(["Area Code","Months Code","Element Code"], axis=1)
France_ETC=France_ETC.drop(["Area Code","Months Code","Element Code"], axis=1)
Morocco_ETC_N=Morocco_ETC.loc[Morocco_ETC.Months.isin(['January', 'February', 'March', 'April', 'May', 'June', 'July','August', 'September', 'October', 'November', 'December'])]
US_ETC_N=US_ETC.loc[US_ETC.Months.isin(['January', 'February', 'March', 'April', 'May', 'June', 'July','August', 'September', 'October', 'November', 'December'])]
France_ETC_N=France_ETC.loc[France_ETC.Months.isin(['January', 'February', 'March', 'April', 'May', 'June', 'July','August', 'September', 'October', 'November', 'December'])]
Morocco_ETC_Meteo=Morocco_ETC.loc[Morocco_ETC.Months=="Meteorological year"]
US_ETC_Meteo=US_ETC.loc[US_ETC.Months=="Meteorological year"]
France_ETC_Meteo=France_ETC.loc[France_ETC.Months=="Meteorological year"]
#made to calculate the average of temperature change in every month of the year
def avg(list):
return sum(list)/len(list)
#both of the functions below create new dataframes out of the existing dataframes that are easier to handle
def transform_data(df, element):
d= {'Years':[i for i in range(1961,2020)] , element:[avg(df["Y"+str(i)].loc[df.Element==element]) for i in range(1961,2020)] }
df1 = pd.DataFrame(data=d)
return df1
def transform_data_meteo(df, element):
d= {'Years':[i for i in range(1961,2020)] , element:[avg(df["Y"+str(i)].loc[df.Element==element]) for i in range(1961,2020)] }
df1 = pd.DataFrame(data=d)
return df1
Morocco_Temp_Change=transform_data(Morocco_ETC_N,'Temperature change')
US_Temp_Change=transform_data(US_ETC_N,'Temperature change')
France_Temp_Change=transform_data(France_ETC_N,'Temperature change')
Morocco_Meteo_Temp_Change=transform_data_meteo(Morocco_ETC_Meteo,'Temperature change')
US_Meteo_Temp_Change=transform_data_meteo(US_ETC_Meteo,'Temperature change')
France_Meteo_Temp_Change=transform_data_meteo(France_ETC_Meteo,'Temperature change')
Morocco_Meteo_Temp_Change['Years']=pd.to_datetime(Morocco_Meteo_Temp_Change["Years"],format="%Y")
US_Meteo_Temp_Change['Years']=pd.to_datetime(US_Meteo_Temp_Change["Years"],format="%Y")
France_Meteo_Temp_Change['Years']=pd.to_datetime(France_Meteo_Temp_Change["Years"],format="%Y")
Morocco_Temp_Change['Years']=pd.to_datetime(Morocco_Meteo_Temp_Change["Years"],format="%Y")
US_Temp_Change['Years']=pd.to_datetime(US_Meteo_Temp_Change["Years"],format="%Y")
France_Temp_Change['Years']=pd.to_datetime(France_Meteo_Temp_Change["Years"],format="%Y")
Data Analysis
plt.figure(figsize=(20,10))
sns.lineplot(x=Morocco_Meteo_Temp_Change.Years,y=Morocco_Meteo_Temp_Change['Temperature change'], label="Morocco temperature change meteorology year")
sns.lineplot(x=Morocco_Temp_Change.Years, y=Morocco_Temp_Change['Temperature change'],label="Morocco temperature change year")
plt.figure(figsize=(20,10))
sns.lineplot(x=Morocco_Meteo_Temp_Change.Years,y=Morocco_Meteo_Temp_Change['Temperature change'], label="Morocco")
plt.legend
plt.figure(figsize=(20,10))
sns.lineplot(x=US_Meteo_Temp_Change.Years, y=US_Meteo_Temp_Change['Temperature change'], label="USA", color='red')
plt.legend
plt.figure(figsize=(20,10))
sns.lineplot(x=France_Meteo_Temp_Change.Years, y=France_Meteo_Temp_Change['Temperature change'], color='green' ,label="France")
plt.legend
Machine learning
For these 3 countries we are trying to predict the temperature change by the years by using a linear polynomial regression
from statsmodels.tsa.deterministic import CalendarFourier, DeterministicProcess
from sklearn.linear_model import LinearRegression
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
def predict(df):
df.set_index('Years', inplace=True)
y=df['Temperature change']
fourier = CalendarFourier(freq="A", order=4)
dp = DeterministicProcess(
index= y.index,
constant = True,
order = 2,
additional_terms=[fourier],
drop = True
)
X=dp.in_sample()
idx_train, idx_test = train_test_split(
y.index, test_size=0.2, shuffle=False,
)
X_train, X_test = X.loc[idx_train, :], X.loc[idx_test, :]
y_train, y_test = y.loc[idx_train], y.loc[idx_test]
# Fit trend model
model = LinearRegression(fit_intercept=False)
model.fit(X_train, y_train)
# Make predictions
y_fit = pd.DataFrame(
model.predict(X_train),
index=y_train.index,
)
y_pred = pd.DataFrame(
model.predict(X_test),
index=y_test.index,
)
# Pivot wide to long (stack) and convert DataFrame to Series (squeeze)
y_fit = y_fit.stack().squeeze() # trend from training set
y_pred = y_pred.stack().squeeze() # trend from test set
# Create residuals (the collection of detrended series) from the training set
y_resid = y_train - y_fit
# Train XGBoost on the residuals
xgb = XGBRegressor()
xgb.fit(X_train, y_resid)
# Add the predicted residuals onto the predicted trends
y_fit_boosted = xgb.predict(X_train) + y_fit
y_pred_boosted = xgb.predict(X_test) + y_pred
plt.figure(figsize=(16,8))
axs = y.plot(color='b', subplots=True, sharex=True)
axs = y_fit_boosted.unstack().plot(color='r', subplots=True, sharex=True, ax=axs)
axs = y_pred_boosted.unstack().plot(color='y', subplots=True, sharex=True, ax=axs)
predict(Morocco_Meteo_Temp_Change)
/shared-libs/python3.7/py/lib/python3.7/site-packages/pandas/plotting/_matplotlib/__init__.py:61: UserWarning: When passing multiple axes, sharex and sharey are ignored. These settings must be specified when creating axes
plot_obj.generate()
/shared-libs/python3.7/py/lib/python3.7/site-packages/pandas/plotting/_matplotlib/__init__.py:61: UserWarning: When passing multiple axes, sharex and sharey are ignored. These settings must be specified when creating axes
plot_obj.generate()
predict(US_Meteo_Temp_Change)
/shared-libs/python3.7/py/lib/python3.7/site-packages/pandas/plotting/_matplotlib/__init__.py:61: UserWarning: When passing multiple axes, sharex and sharey are ignored. These settings must be specified when creating axes
plot_obj.generate()
/shared-libs/python3.7/py/lib/python3.7/site-packages/pandas/plotting/_matplotlib/__init__.py:61: UserWarning: When passing multiple axes, sharex and sharey are ignored. These settings must be specified when creating axes
plot_obj.generate()
predict(France_Meteo_Temp_Change)
/shared-libs/python3.7/py/lib/python3.7/site-packages/pandas/plotting/_matplotlib/__init__.py:61: UserWarning: When passing multiple axes, sharex and sharey are ignored. These settings must be specified when creating axes
plot_obj.generate()
/shared-libs/python3.7/py/lib/python3.7/site-packages/pandas/plotting/_matplotlib/__init__.py:61: UserWarning: When passing multiple axes, sharex and sharey are ignored. These settings must be specified when creating axes
plot_obj.generate()