# Economic Impact
# B. There are reports of international/foreign investors being deterred from South
# African assets because of the effects of load shedding on businesses. How has
# foreign investment changed with increases and decreases in load shedding?
# How long does foreign investment take to react to changes in load shedding, if
# at all? How would you predict foreign investment in the next 10 years as a
# function of increased load shedding?
# importing libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
# plt.show() to display graph in code
# data showing net inflow and net outflow of foreign direct invsetment in South Africa from 2009-2014 (before load shedding)
investment1 = pd.read_csv('/datasets/investment/investment1.csv')
# from 2015-2020 (during load shedding)
investment2 = pd.read_csv('/datasets/investment/investment2.csv')
# renaming columns for investment1
colnames1=[2009,2010,2011,2012,2013,2014]
oldcolnames1 = ["2009 [YR2009]", "2010 [YR2010]", "2011 [YR2011]", "2012 [YR2012]", "2013 [YR2013]", "2014 [YR2014]"]
investment1 = pd.read_csv("/datasets/investment/investment1.csv", usecols=oldcolnames1)
investment1.columns = colnames1
# dropping rows 2 to 6 (empty rows)
investment1 = investment1.drop(index=investment1.index[2],axis=0)
investment1 = investment1.drop([3,4,5,6])
# renaming rows for investment1
investment1.index = ['FDI net inflow','FDI net outflow']
# transposing the dataset for easier use
investment1 = investment1.transpose()
investment1
# renaming columns for investment2
colnames2=[2015,2016,2017,2018,2019,2020]
oldcolnames2 = ["2015 [YR2015]", "2016 [YR2016]", "2017 [YR2017]", "2018 [YR2018]", "2019 [YR2019]", "2020 [YR2020]"]
investment2 = pd.read_csv('/datasets/investment/investment2.csv',usecols=oldcolnames2)
investment2.columns=colnames2
# dropping rows 2 to 6 (empty rows)
investment2 = investment2.drop([2,6])
investment2 = investment2.drop([3,4,5])
# rename rows
investment2.index = ['FDI net inflow','FDI net outflow']
# transposing the dataset for easier use
investment2 = investment2.transpose()
# dropping 2020 because it is empty
investment2 = investment2.drop([2020])
# converting the data to floating point numbers
investment2['FDI net inflow'] = investment2['FDI net inflow'].astype(float)
investment2['FDI net outflow'] = investment2['FDI net outflow'].astype(float)
investment2
# dataset for foreign direct investment inflow in US dollars
!ls /datasets/investmentdollars
investmentdollars = pd.read_csv('/datasets/investmentdollars/investmentdollars.csv')
investmentdollars.csv
colnames3=[2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019]
oldcolnames3 = ["2009 [YR2009]", "2010 [YR2010]", "2011 [YR2011]", "2012 [YR2012]", "2013 [YR2013]", "2014 [YR2014]",
"2015 [YR2015]", "2016 [YR2016]", "2017 [YR2017]", "2018 [YR2018]", "2019 [YR2019]"]
investmentdollars = pd.read_csv("/datasets/investmentdollars/investmentdollars.csv", usecols=oldcolnames3)
investmentdollars.columns = colnames3
# remove filler rows
investmentdollars = investmentdollars.drop([1,2,3,4,5])
investmentdollars
investmentdollars = investmentdollars.transpose()
# adding in dataset for loadshedding history
loadinghist = pd.read_csv('/datasets/loadinghist/EskomSePush Loadshedding History - EskomSePush_stages.csv')
loadinghist
# create a dataset with both
investment3 = pd.concat([investment1, investment2])
plt.figure(figsize=(10,5))
sns.lineplot(data=investment3, dashes=False)
plt.xlabel('Year')
plt.title('Direct Foreign Investment in South Africa from 2009-2019')
plt.show()
investmentplot = investmentdollars.plot()
investmentplot.set_xlabel("Year")
investmentplot.set_ylabel("FDI in US dollars (millions)")
investmentplot.set_title('Foreign Investment in South Africa')
loadinghist1 = loadinghist[loadinghist['stage'] > 0]
plt.figure(figsize=(10,5))
sns.countplot(x='stage',data=loadinghist1, palette='coolwarm')
plt.xlabel('Stage')
plt.title('Loadshedding From 2015-2020')
plt.show()
loadinghist1['stage'].value_counts()
# convert to DateTime
loadinghist1['created_at'] = pd.to_datetime(loadinghist1['created_at'])
# create new columns for hour, month, day, and year. then convert numerical days to string
loadinghist1['Hour'] = loadinghist1['created_at'].apply(lambda x: x.hour)
loadinghist1['Month'] = loadinghist1['created_at'].apply(lambda x: x.month)
loadinghist1['Day'] = loadinghist1['created_at'].apply(lambda x: x.dayofweek)
loadinghist1['Year'] = loadinghist1['created_at'].apply(lambda x: x.year)
dmap = {0:'Mon',1:'Tue',2:'Wed',3:'Thu',4:'Fri',5:'Sat',6:'Sun'}
loadinghist1['Day'] = loadinghist1['Day'].apply(lambda x: dmap[x])
sns.countplot(x = 'Day', data = loadinghist1)
plt.title('Frequency of Loadsheddings Per Day')
plt.show()
sns.countplot(x = 'Year', data = loadinghist1)
plt.title('Frequency of Loadsheddings Per Year')
plt.show()
loadinghist1['Year'].value_counts()
sns.countplot(x = 'Month', data = loadinghist1)
plt.title('Frequecncy of Loadsheddings Per Month')
plt.show()
g = sns.FacetGrid(loadinghist1, col='Year')
g.map_dataframe(sns.histplot, x="Month")
plt.show()
sns.countplot(x = 'Hour', data = loadinghist1)
plt.title('Frequency of Loadsheddings per Hour')
plt.show()
# creating a dataframe with the number of loadsheddings per year
loadinghist_year = pd.DataFrame(loadinghist1['Year'].value_counts())
loadinghist_year.columns = ['Num Loads']
# adding new dataframe to original one
investmentwithloads = pd.concat([investment3, loadinghist_year], axis=1)
# convert Nan values to 0
investmentwithloads = investmentwithloads.fillna(0)
# drop 2020 row because there is no data on FDI
investmentwithloads = investmentwithloads.drop([2020])
investmentwithloads.corr()
testdf = pd.concat([investment3, loadinghist_year], axis=1)
testdf = testdf.fillna(0)
testdf = testdf.drop([2009,2010,2011,2012,2013,2014,2015,2016,2020])
testdf.corr()