# Execute this cell prior to any of the cells below
import geopandas as gpd
import pandas as pd
import numpy as np
import urllib
import io
import requests
import matplotlib.pyplot as plt
from pprint import pprint
from shapely.geometry import Point, Polygon
import requests as re
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
import scipy as sci
base_url = 'http://ak-energy-data.analysisnorth.com/'
def get_df(file_path):
"""Returns a Pandas DataFrame that is found at the 'file_path'
below the Base URL for accessing data. The 'file_path' should end
with '.pkl' and points to a pickled, compressed (bz2), Pandas DataFrame.
"""
b = requests.get(urllib.parse.urljoin(base_url, file_path)).content
df = pd.read_pickle(io.BytesIO(b), compression='bz2')
return df
def get_csv(file_path):
"""Returns a string in CSV format that is found at the 'file_path'
below the Base URL for accessing data. The 'file_path' should end
with '.csv' and points to a CSV data file.
"""
txt = requests.get(urllib.parse.urljoin(base_url, file_path)).text
return txt
AK_city_info_df = get_df('city-util/proc/city.pkl')
AK_city_info_df.head()
AK_city_info_df.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 274 entries, 1 to 322
Data columns (total 29 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Name 274 non-null object
1 Latitude 274 non-null float64
2 Longitude 274 non-null float64
3 ERHRegionID 274 non-null int64
4 WAPRegionID 274 non-null int64
5 ImpCost 274 non-null int64
6 FuelRefer 274 non-null bool
7 FuelCityID 39 non-null float64
8 Oil1Price 263 non-null float64
9 Oil2Price 31 non-null float64
10 PropanePrice 200 non-null float64
11 BirchPrice 6 non-null float64
12 SprucePrice 5 non-null float64
13 CoalPrice 2 non-null float64
14 SteamPrice 2 non-null float64
15 HotWaterPrice 2 non-null float64
16 MunicipalSalesTax 105 non-null float64
17 BoroughSalesTax 26 non-null float64
18 TMYid 274 non-null int64
19 TMYname 274 non-null object
20 ElecUtilities 274 non-null object
21 GasPrice 28 non-null float64
22 aris_city 274 non-null object
23 census_city 274 non-null object
24 census_area 274 non-null object
25 ancsa_region 274 non-null object
26 railbelt 274 non-null object
27 hub 274 non-null bool
28 avg_elec_usage 274 non-null object
dtypes: bool(2), float64(14), int64(4), object(9)
memory usage: 60.5+ KB
pd.to_pickle(AK_city_info_df, '/work/data/processed/AK_outdated_energy_data.pkl', compression='bz2')
def get_city_solar_ACannual_kwh(latitude, longitude,):
tilt = round(latitude - 15)
payload = {
'api_key': 'swxJoItCCim7dEcYkr139ljdqCtPSD9KyA5dtaIM',
'system_capacity':1,
'module_type': 1,
'losses': 14,
'array_type': 0,
'tilt': tilt,
'azimuth': 180,
'lat': latitude,
'lon': longitude,
'dataset': 'tmy3',
'radius': 0
}
r = re.get('https://developer.nrel.gov/api/pvwatts/v6.json', params=payload)
response = r.json()
response_outputs = response['outputs']
return round(response_outputs['ac_annual'])
get_city_solar_ACannual_kwh(latitude=67,longitude=-151.51)
AK_cities = AK_city_info_df[['Name','Latitude','Longitude']]
city_list = []
ac_annual_list = []
i = 0
for city, lat, lon in zip(AK_cities['Name'], AK_cities['Latitude'], AK_cities['Longitude']):
try:
ac_annual_value = get_city_solar_ACannual_kwh(latitude=lat, longitude=lon)
ac_annual_list.append(ac_annual_value)
city_list.append(city)
# visual inspection of acquired data
print(i, ac_annual_value)
except:
print(i, city, lat, lon)
else:
pass
i+=1
0 827
1 725
2 592
3 996
4 891
5 1061
6 1047
7 725
8 859
9 770
10 960
11 996
12 997
13 1140
14 717
15 1048
16 1048
17 869
18 862
19 789
20 994
21 1048
22 1027
23 942
24 715
25 913
26 994
27 759
28 771
29 679
30 679
31 885
32 885
33 770
34 886
35 1020
36 947
37 1020
38 946
39 886
40 946
41 946
42 1020
43 1020
44 886
45 949
46 949
47 951
48 729
49 927
50 951
51 927
52 950
53 787
54 951
55 949
56 951
57 927
58 951
59 949
60 927
61 1005
62 887
63 1005
64 887
65 856
66 856
67 856
68 856
69 961
70 856
71 1005
72 887
73 960
74 960
75 960
76 770
77 960
78 938
79 960
80 770
81 960
82 960
83 960
84 770
85 960
86 960
87 1003
88 770
89 960
90 960
91 938
92 960
93 770
94 960
95 960
96 770
97 1061
98 1016
99 1015
100 1061
101 1016
102 1016
103 1061
104 786
105 786
106 786
107 786
108 786
109 786
110 938
111 938
112 869
113 869
114 869
115 789
116 870
117 1040
118 1041
119 1040
120 869
121 1041
122 1041
123 1041
124 860
125 788
126 1041
127 789
128 788
129 715
130 715
131 860
132 860
133 593
134 677
135 677
136 592
137 1051
138 1051
139 1050
140 942
141 942
142 1051
143 942
144 1048
145 1048
146 1048
147 1048
148 996
149 996
150 932
151 932
152 932
153 951
154 932
155 1083
156 818
157 1028
158 932
159 1028
160 932
161 1025
162 818
163 996
164 997
165 862
166 716
167 863
168 863
169 863
170 863
171 936
172 1057
173 1057
174 993
175 993
176 795
177 936
178 936
179 993
180 859
181 983
182 942
183 891
184 789
185 892
186 902
187 902
188 902
189 881
190 902
191 954
192 954
193 954
194 954
195 954
196 790
197 790
198 954
199 994
200 994
201 994
202 994
203 994
204 863
205 863
206 994
207 994
208 994
209 994
210 759
211 834
212 777
213 757
214 834
215 940
216 940
217 913
218 940
219 1191
220 940
221 827
222 1191
223 1038
224 913
225 1038
226 940
227 940
228 940
229 1191
230 1192
231 1038
232 1140
233 834
234 869
235 819
236 869
237 961
238 961
239 1140
240 1140
241 926
242 926
243 926
244 926
245 926
246 926
247 926
248 926
249 859
250 1025
251 887
252 786
253 875
254 827
255 1138
256 1138
257 859
258 1028
259 902
260 1027
261 592
262 902
263 716
264 1025
265 827
266 942
267 954
268 875
269 771
270 679
271 885
272 770
273 1025
city_ac_annual_df = pd.DataFrame(data=list(zip(city_list,ac_annual_list)), columns=['Name','ac_annual'])
city_ac_annual_df.head()
city_ac_annual_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 274 entries, 0 to 273
Data columns (total 2 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Name 274 non-null object
1 ac_annual 274 non-null int64
dtypes: int64(1), object(1)
memory usage: 4.4+ KB
city_ac_annual_df.to_pickle(path='/work/data/processed/AK_city_solar_ACannual.pkl', compression='bz2')
# alaska data for wind classes, dataframe includes id for location
fh = "/work/data/raw/us-doe-gov-333d394d-7256-4af3-8904-f0cc42777881/data/ak_50_main.csv"
df = pd.read_csv(fh)
df.head()
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 218362 entries, 0 to 218361
Data columns (total 3 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 id 218362 non-null int64
1 windpwrd 218362 non-null int64
2 wpc 218362 non-null int64
dtypes: int64(3)
memory usage: 5.0 MB
# shape data for alaska
shape_data_fh = "/work/data/raw/us-doe-gov-333d394d-7256-4af3-8904-f0cc42777881/original/AK_50_Main.shp"
AK_wind_gdf = gpd.read_file(shape_data_fh)
AK_wind_gdf.geometry.crs
AK_wind_gdf.plot(column='wpc', legend=True)
city_gdf = gpd.GeoDataFrame(
data=AK_cities,
geometry=gpd.points_from_xy(AK_cities['Longitude'], AK_cities['Latitude'])
)
city_gdf.head()
city_list =[]
windpwrd_list = []
wpc_list = []
for windpwrd,wpc,poly in zip(AK_wind_gdf['windpwrd'], AK_wind_gdf['wpc'], AK_wind_gdf['geometry']):
for city,pt in zip(city_gdf['Name'], city_gdf['geometry']):
if pt.within(poly) == True:
#visual testing
print(city,windpwrd,wpc)
city_list.append(city)
windpwrd_list.append(windpwrd)
wpc_list.append(wpc)
else:
pass
city_wind_dict = {'City': city_list, 'windpwrd': windpwrd_list, 'wpc': wpc_list}
Point Hope Res 739 6
Point Hope Com 739 6
Kivalina 559 5
Noatak 174 1
Bettles 245 2
Evansville 245 2
Shungnak 239 2
Kiana 257 2
Noorvik 257 2
Selawik 242 2
Deering 445 4
Buckland 174 1
Kotzebue 557 5
Shishmaref 557 5
Wales 1059 7
Teller 359 3
Brevig Mission 548 5
Koyuk 447 4
Elim 592 5
White Mountain 252 2
Nome 450 4
Golovin 446 4
Delta Junction 240 2
Shaktoolik 276 2
Healy 146 1
Denali Nat'l Park 128 1
Unalakleet 305 3
Stebbins 472 4
Saint Michael 550 5
Alakanuk 366 3
Anvik 334 3
Grayling 234 2
Trapper Creek 230 2
Holy Cross 305 3
Mountain Village 487 4
Saint Mary's 544 5
Pitkas Point 462 4
Scammon Bay 458 4
Wasilla 343 3
Russian Mission 389 3
Marshall 673 6
Big Lake 247 2
Eagle River 219 2
Valdez 118 1
Chuathbaluk 211 2
Cantwell 106 1
Chickaloon 106 1
Chicken 106 1
Chitina 106 1
Copper Center 106 1
Dot Lake 106 1
Northway 106 1
Tanacross 106 1
Tetlin 106 1
Tok 106 1
Chistochina 106 1
Gakona 106 1
Glennallen 106 1
Gulkana 106 1
Kenny Lake 106 1
McCarthy 106 1
Mentasta Lake 106 1
Paxson 106 1
Slana 106 1
Tazlina 106 1
Tonsina 106 1
Houston 106 1
Skwentna 106 1
Sutton 106 1
Talkeetna 106 1
Willow 106 1
Upper Kalskag 164 1
Aniak 249 2
Lower Kalskag 245 2
Anchorage 122 1
Palmer 122 1
Chugiak 122 1
Tatitlek 390 3
Indian 89 1
Bird Creek 92 1
Girdwood 110 1
Eyak 1641 7
Cordova 323 3
Whittier 368 3
Newtok 484 4
Tyonek 251 2
Hope 95 1
Nunapitchuk 392 3
Kasigluk 474 4
Atmautluak 641 6
Hooper Bay 702 6
Kenai 330 3
Sterling 157 1
Nikiski 237 2
Soldotna 237 2
Nightmute 490 4
Kasilof 207 2
Chenega 152 1
Cooper Landing 119 1
Moose Pass 119 1
Allakaket 108 1
Central 108 1
Clear 108 1
Ambler 108 1
Kobuk 108 1
Crooked Creek 108 1
Lime Village 108 1
Sleetmute 108 1
Stony River 108 1
McGrath 108 1
Nikolai 108 1
Shageluk 108 1
Takotna 108 1
Alatna 108 1
Galena 108 1
Hughes 108 1
Huslia 108 1
Kaltag 108 1
Koyukuk 108 1
Minchumina 108 1
Manley Hot Springs 108 1
Minto 108 1
Nulato 108 1
Rampart 108 1
Ruby 108 1
Stevens Village 108 1
Tanana 108 1
Wiseman 108 1
Eagle 108 1
Fairbanks 108 1
Anaktuvuk Pass - C 108 1
Nenana 108 1
North Pole 108 1
Anderson 108 1
Salcha 108 1
Ester 108 1
Anaktuvuk Pass Res 108 1
Faribanks - Chena Ridge 108 1
Seward 402 4
Clam Gulch 240 2
Port Alsworth 276 2
Nondalton 237 2
Ninilchik 263 2
Nikolaevsk 339 3
Mekoryok 706 6
Chevak 544 5
Nunum Iqua 544 5
Chefornak 544 5
Kipnuk 544 5
Kongiganak 544 5
Pedro Bay 430 4
Anchor Point 255 2
Homer 150 1
Iliamna 541 5
Akiak 361 3
Tuluksak 361 3
Newhalen 725 6
Bethel 458 4
Emmonak 458 4
Kotlik 458 4
Pilot Station 458 4
Akiachak 458 4
Eek 458 4
Kwethluk 458 4
Napakiak 458 4
Napaskiak 458 4
Oscarville 458 4
Quinhagak 458 4
Tuntutuliak 458 4
Seldovia 449 4
Koliganek 256 2
English Bay/Nanwal 651 6
Port Graham 661 6
Kwigillingok 707 6
Toksook Bay 707 6
Tununak 707 6
Kokhanok 969 7
Aleknagik 350 3
Goodnews Bay 355 3
Togiak 348 3
Twin Hills 266 2
Platinum 698 6
Manokotak 368 3
Clark's Point 552 5
Dillingham 247 2
Ekwok 247 2
New Stuyahok 247 2
Igiugig 247 2
Levelock 247 2
Naknek 341 3
South Naknek 411 4
King Salmon 315 3
City_wind_class_df = pd.DataFrame.from_dict(data=city_wind_dict)
City_wind_class_df
City_wind_class_df.to_pickle(path="/work/data/processed/AK_city_windpwrd_classification.pkl", compression='bz2')
complete_city = [x for x in City_wind_class_df['City']]
print(complete_city)
all_city = [i for i in city_gdf['Name']]
print(all_city)
missing_city=[]
for i in all_city:
if i not in complete_city:
missing_city.append(i)
print(len(missing_city))
print(missing_city)
['Point Hope Res', 'Point Hope Com', 'Kivalina', 'Noatak', 'Bettles', 'Evansville', 'Shungnak', 'Kiana', 'Noorvik', 'Selawik', 'Deering', 'Buckland', 'Kotzebue', 'Shishmaref', 'Wales', 'Teller', 'Brevig Mission', 'Koyuk', 'Elim', 'White Mountain', 'Nome', 'Golovin', 'Delta Junction', 'Shaktoolik', 'Healy', "Denali Nat'l Park", 'Unalakleet', 'Stebbins', 'Saint Michael', 'Alakanuk', 'Anvik', 'Grayling', 'Trapper Creek', 'Holy Cross', 'Mountain Village', "Saint Mary's", 'Pitkas Point', 'Scammon Bay', 'Wasilla', 'Russian Mission', 'Marshall', 'Big Lake', 'Eagle River', 'Valdez', 'Chuathbaluk', 'Cantwell', 'Chickaloon', 'Chicken', 'Chitina', 'Copper Center', 'Dot Lake', 'Northway', 'Tanacross', 'Tetlin', 'Tok', 'Chistochina', 'Gakona', 'Glennallen', 'Gulkana', 'Kenny Lake', 'McCarthy', 'Mentasta Lake', 'Paxson', 'Slana', 'Tazlina', 'Tonsina', 'Houston', 'Skwentna', 'Sutton', 'Talkeetna', 'Willow', 'Upper Kalskag', 'Aniak', 'Lower Kalskag', 'Anchorage', 'Palmer', 'Chugiak', 'Tatitlek', 'Indian', 'Bird Creek', 'Girdwood', 'Eyak', 'Cordova', 'Whittier', 'Newtok', 'Tyonek', 'Hope', 'Nunapitchuk', 'Kasigluk', 'Atmautluak', 'Hooper Bay', 'Kenai', 'Sterling', 'Nikiski', 'Soldotna', 'Nightmute', 'Kasilof', 'Chenega', 'Cooper Landing', 'Moose Pass', 'Allakaket', 'Central', 'Clear', 'Ambler', 'Kobuk', 'Crooked Creek', 'Lime Village', 'Sleetmute', 'Stony River', 'McGrath', 'Nikolai', 'Shageluk', 'Takotna', 'Alatna', 'Galena', 'Hughes', 'Huslia', 'Kaltag', 'Koyukuk', 'Minchumina', 'Manley Hot Springs', 'Minto', 'Nulato', 'Rampart', 'Ruby', 'Stevens Village', 'Tanana', 'Wiseman', 'Eagle', 'Fairbanks', 'Anaktuvuk Pass - C', 'Nenana', 'North Pole', 'Anderson', 'Salcha', 'Ester', 'Anaktuvuk Pass Res', 'Faribanks - Chena Ridge', 'Seward', 'Clam Gulch', 'Port Alsworth', 'Nondalton', 'Ninilchik', 'Nikolaevsk', 'Mekoryok', 'Chevak', 'Nunum Iqua', 'Chefornak', 'Kipnuk', 'Kongiganak', 'Pedro Bay', 'Anchor Point', 'Homer', 'Iliamna', 'Akiak', 'Tuluksak', 'Newhalen', 'Bethel', 'Emmonak', 'Kotlik', 'Pilot Station', 'Akiachak', 'Eek', 'Kwethluk', 'Napakiak', 'Napaskiak', 'Oscarville', 'Quinhagak', 'Tuntutuliak', 'Seldovia', 'Koliganek', 'English Bay/Nanwal', 'Port Graham', 'Kwigillingok', 'Toksook Bay', 'Tununak', 'Kokhanok', 'Aleknagik', 'Goodnews Bay', 'Togiak', 'Twin Hills', 'Platinum', 'Manokotak', "Clark's Point", 'Dillingham', 'Ekwok', 'New Stuyahok', 'Igiugig', 'Levelock', 'Naknek', 'South Naknek', 'King Salmon']
['Anchorage', 'Adak', 'Akutan', 'Allakaket', 'Angoon', 'Aniak', 'Arctic Village', 'Atka', 'Auke Bay', 'Barrow/Utqiavik', 'Bethel', 'Bettles', 'Delta Junction', 'Big Lake', 'Cantwell', 'Central', 'Chalkyitsik', 'Chickaloon', 'Chicken', 'Chignik', 'Chitina', 'Circle', 'Clear', 'Coffman Cove', 'Cold Bay', 'Cooper Landing', 'Copper Center', 'Cordova', 'Atqasuk - Com', 'Kaktovik - Res', 'Nuiqsut', 'Point Hope Res', 'Point Lay', 'Wainwright Res', 'Ambler', 'Buckland', 'Deering', 'Kiana', 'Kivalina', 'Kobuk', 'Kotzebue', 'Noatak', 'Noorvik', 'Selawik', 'Shungnak', 'Brevig Mission', 'Diomede', 'Elim', 'Gambell', 'Golovin', 'Koyuk', 'Nome', 'Saint Michael', 'Savoonga', 'Shaktoolik', 'Shishmaref', 'Stebbins', 'Teller', 'Unalakleet', 'Wales', 'White Mountain', 'Chevak', 'Emmonak', 'Hooper Bay', 'Kotlik', 'Marshall', 'Mountain Village', 'Pilot Station', 'Pitkas Point', 'Russian Mission', "Saint Mary's", 'Scammon Bay', 'Nunum Iqua', 'Akiachak', 'Akiak', 'Atmautluak', 'Chefornak', 'Eek', 'Goodnews Bay', 'Kasigluk', 'Kipnuk', 'Kongiganak', 'Kwethluk', 'Kwigillingok', 'Mekoryok', 'Napakiak', 'Napaskiak', 'Newtok', 'Nightmute', 'Nunapitchuk', 'Oscarville', 'Platinum', 'Quinhagak', 'Toksook Bay', 'Tuluksak', 'Tuntutuliak', 'Tununak', 'Chuathbaluk', 'Crooked Creek', 'Lime Village', 'Lower Kalskag', 'Sleetmute', 'Stony River', 'Upper Kalskag', "Clark's Point", 'Dillingham', 'Ekwok', 'Koliganek', 'Manokotak', 'New Stuyahok', 'Togiak', 'Twin Hills', 'King Salmon', 'Naknek', 'South Naknek', 'Chignik Lake', 'Egegik', 'Igiugig', 'Iliamna', 'Kokhanok', 'Levelock', 'Newhalen', 'Nondalton', 'Pedro Bay', 'Perryville', 'Pilot Point', 'Port Alsworth', 'Port Heiden', 'Ugashik', 'False Pass', 'King Cove', 'Nelson Lagoon', 'Sand Point', 'Nikolski', 'Saint George', 'Saint Paul', 'Unalaska', 'Anvik', 'Grayling', 'Holy Cross', 'McGrath', 'Nikolai', 'Shageluk', 'Takotna', 'Beaver', 'Birch Creek', 'Fort Yukon', 'Venetie', 'Alatna', 'Evansville', 'Galena', 'Hughes', 'Huslia', 'Kaltag', 'Koyukuk', 'Minchumina', 'Manley Hot Springs', 'Minto', 'Nulato', 'Rampart', 'Ruby', 'Stevens Village', 'Tanana', 'Wiseman', 'Dot Lake', 'Eagle', 'Healy', 'Northway', 'Tanacross', 'Tetlin', 'Tok', 'Haines', 'Elfin Cove', 'Gustavus', 'Hoonah', 'Pelican', 'Yakutat', 'Klukwan', 'Skagway', 'Tenakee Springs', 'Juneau, Airport', 'Sitka', 'Wrangell', 'Kake', 'Petersburg', 'Port Alexander', 'Ketchikan', 'Saxman', 'Ward Cove', 'Metlakatla', 'Hyder', 'Hollis', 'Craig', 'Hydaburg', 'Kasaan', 'Klawock', 'Point Baker', 'Port Protection', 'Thorne Bay', 'Chistochina', 'Gakona', 'Glennallen', 'Gulkana', 'Kenny Lake', 'McCarthy', 'Mentasta Lake', 'Paxson', 'Slana', 'Tazlina', 'Tonsina', 'Eyak', 'Chenega', 'Tatitlek', 'Valdez', 'Whittier', 'English Bay/Nanwal', 'Port Graham', 'Seward', 'Anchor Point', 'Clam Gulch', 'Homer', 'Hope', 'Kasilof', 'Kenai', 'Moose Pass', 'Nikiski', 'Nikolaevsk', 'Ninilchik', 'Seldovia', 'Soldotna', 'Sterling', 'Tyonek', 'Houston', 'Girdwood', 'Palmer', 'Skwentna', 'Sutton', 'Talkeetna', 'Trapper Creek', 'Wasilla', 'Willow', 'Akhiok', 'Chiniak', 'Karluk', 'Kodiak', 'Larsen Bay', 'Old Harbor', 'Ouzinkie', 'Port Lions', 'Juneau, City of', 'Fairbanks', 'Alakanuk', 'Aleknagik', 'Anaktuvuk Pass - C', 'Indian', 'Chugiak', 'Eagle River', 'Douglas', 'Nenana', 'North Pole', 'Anderson', 'Dutch Harbor', 'Salcha', "Denali Nat'l Park", 'Ester', 'Bird Creek', 'Whale Pass', 'Naukati', 'Anaktuvuk Pass Res', 'Atqasuk - Res', 'Kaktovik - Com', 'Point Hope Com', 'Wainwright Com', 'Faribanks - Chena Ridge']
82
['Adak', 'Akutan', 'Angoon', 'Arctic Village', 'Atka', 'Auke Bay', 'Barrow/Utqiavik', 'Chalkyitsik', 'Chignik', 'Circle', 'Coffman Cove', 'Cold Bay', 'Atqasuk - Com', 'Kaktovik - Res', 'Nuiqsut', 'Point Lay', 'Wainwright Res', 'Diomede', 'Gambell', 'Savoonga', 'Chignik Lake', 'Egegik', 'Perryville', 'Pilot Point', 'Port Heiden', 'Ugashik', 'False Pass', 'King Cove', 'Nelson Lagoon', 'Sand Point', 'Nikolski', 'Saint George', 'Saint Paul', 'Unalaska', 'Beaver', 'Birch Creek', 'Fort Yukon', 'Venetie', 'Haines', 'Elfin Cove', 'Gustavus', 'Hoonah', 'Pelican', 'Yakutat', 'Klukwan', 'Skagway', 'Tenakee Springs', 'Juneau, Airport', 'Sitka', 'Wrangell', 'Kake', 'Petersburg', 'Port Alexander', 'Ketchikan', 'Saxman', 'Ward Cove', 'Metlakatla', 'Hyder', 'Hollis', 'Craig', 'Hydaburg', 'Kasaan', 'Klawock', 'Point Baker', 'Port Protection', 'Thorne Bay', 'Akhiok', 'Chiniak', 'Karluk', 'Kodiak', 'Larsen Bay', 'Old Harbor', 'Ouzinkie', 'Port Lions', 'Juneau, City of', 'Douglas', 'Dutch Harbor', 'Whale Pass', 'Naukati', 'Atqasuk - Res', 'Kaktovik - Com', 'Wainwright Com']
missing_city_gdf = city_gdf[city_gdf['Name'].isin(missing_city)]
missing_city_gdf.head()
missing_city_gdf.info()
<class 'geopandas.geodataframe.GeoDataFrame'>
Int64Index: 82 entries, 2 to 319
Data columns (total 4 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Name 82 non-null object
1 Latitude 82 non-null float64
2 Longitude 82 non-null float64
3 geometry 82 non-null geometry
dtypes: float64(2), geometry(1), object(1)
memory usage: 3.2+ KB
missing_city_gdf.plot(alpha=0.5, edgecolor='k')
maintence_cost_usd_kWh = 0.036
turbine_kw_installation_totals_usd_kW = {50:29815, 100:20680,
500:9705, 1000:7319,
2000:5676, 5000:4236}
installation_cost_df = pd.DataFrame.from_dict(turbine_kw_installation_totals_usd_kW, orient='index')
installation_cost_df = installation_cost_df.rename(columns={0:'cost_usd_kW'})
installation_cost_df
x = np.array(installation_cost_df.index)
y = np.array(installation_cost_df.cost_usd_kW)
X = x.reshape(-1,1)
Y = y.reshape(-1,1)
curves = {}
for i in range(1,5):
z = np.polyfit(x,y,deg=i)
p = np.poly1d(z)
name = f"p{i}"
curves[name] = p
xp = np.linspace(0,5000,500)
plt.plot(
x, y, 'bo',
xp, curves['p1'](xp), '-',
xp, curves['p2'](xp), 'o-',
xp, curves['p3'](xp),'r+',
)
plt.show()
curves["p2"](1500)
def func(x, a, b, c):
return a * np.exp(-b * x) + c
df_solar = pd.read_pickle('/work/data/processed/AK_city_solar_ACannual.pkl', compression='bz2')
df_wind = pd.read_pickle('/work/data/processed/AK_city_windpwrd_classification.pkl', compression='bz2')
df_solar.info()
df_wind.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 274 entries, 0 to 273
Data columns (total 2 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Name 274 non-null object
1 ac_annual 274 non-null int64
dtypes: int64(1), object(1)
memory usage: 4.4+ KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 192 entries, 0 to 191
Data columns (total 3 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 City 192 non-null object
1 windpwrd 192 non-null int64
2 wpc 192 non-null int64
dtypes: int64(2), object(1)
memory usage: 4.6+ KB
df_combined = df_solar.merge(right=df_wind, left_on='Name', right_on='City')
df_combined.head()
df_filtered = df_combined.filter(items=['Name','ac_annual','windpwrd','wpc'])
df_filtered.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 192 entries, 0 to 191
Data columns (total 4 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Name 192 non-null object
1 ac_annual 192 non-null int64
2 windpwrd 192 non-null int64
3 wpc 192 non-null int64
dtypes: int64(3), object(1)
memory usage: 7.5+ KB
df_filtered.to_pickle(path='/work/data/processed/AK_city_solar_wind.pkl', compression='bz2')
diesel_df = pd.read_csv('/work/data/raw/pce-2019.tsv', sep='\t')
diesel_df
len(diesel_df.community_names.unique())
diesel_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2077 entries, 0 to 2076
Data columns (total 11 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 community_names 2077 non-null object
1 year 2077 non-null int64
2 month 2077 non-null int64
3 pce_rate 2077 non-null float64
4 fuel_price 2065 non-null float64
5 diesel_efficiency 1800 non-null float64
6 peak_consumption_kw 2077 non-null int64
7 residential_kwh_sold 2077 non-null int64
8 commercial_kwh_sold 2077 non-null int64
9 community_kwh_sold 2077 non-null int64
10 government_kwh_sold 2077 non-null int64
dtypes: float64(3), int64(7), object(1)
memory usage: 178.6+ KB
nonnull_diesel_df= diesel_df.replace(to_replace={0:np.nan})
nonnull_diesel_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2077 entries, 0 to 2076
Data columns (total 11 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 community_names 2077 non-null object
1 year 2077 non-null float64
2 month 2077 non-null float64
3 pce_rate 1991 non-null float64
4 fuel_price 1675 non-null float64
5 diesel_efficiency 1680 non-null float64
6 peak_consumption_kw 1177 non-null float64
7 residential_kwh_sold 2077 non-null float64
8 commercial_kwh_sold 1957 non-null float64
9 community_kwh_sold 2028 non-null float64
10 government_kwh_sold 2019 non-null float64
dtypes: float64(10), object(1)
memory usage: 178.6+ KB
null_list = list(nonnull_diesel_df.pce_rate.replace({np.nan:0}))
nonnull_diesel_df.pce_rate = null_list
nonnull_diesel_df.describe()
grouped_df = nonnull_diesel_df.groupby(by=["community_names"]).median()
grouped_df
filtered_df = grouped_df.drop(columns=['year','month']).reset_index()
len(filtered_df.fuel_price.unique())
filtered_df.describe()
filtered_df.diesel_efficiency.replace(to_replace=[0, 'nan', np.nan], value=12.76, inplace=True)
filtered_df.describe()
filtered_df.fuel_price.hist(bins=100, range=[0,8])
months_per_year = 12
annual_diesel_df = filtered_df
annual_diesel_df['total_kwh_sold'] = (
annual_diesel_df.residential_kwh_sold
+ annual_diesel_df.commercial_kwh_sold
+ annual_diesel_df.community_kwh_sold
+ annual_diesel_df.government_kwh_sold
) * months_per_year
annual_diesel_df
annual_diesel_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 175 entries, 0 to 174
Data columns (total 10 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 community_names 175 non-null object
1 pce_rate 175 non-null float64
2 fuel_price 165 non-null float64
3 diesel_efficiency 175 non-null float64
4 peak_consumption_kw 103 non-null float64
5 residential_kwh_sold 175 non-null float64
6 commercial_kwh_sold 165 non-null float64
7 community_kwh_sold 171 non-null float64
8 government_kwh_sold 171 non-null float64
9 total_kwh_sold 158 non-null float64
dtypes: float64(9), object(1)
memory usage: 13.8+ KB
def annual_total_kwh(df):
total_list = []
months_per_year = 12
for res, comr, comm, govt in zip(
annual_diesel_df.residential_kwh_sold,
annual_diesel_df.commercial_kwh_sold,
annual_diesel_df.community_kwh_sold,
annual_diesel_df.government_kwh_sold,
):
if pd.isnull(res) == True:
res = 0
if pd.isnull(comr) == True:
comr = 0
if pd.isnull(comm) == True:
comm= 0
if pd.isnull(govt) == True:
govt = 0
annual_total = (res + comr + comm + govt) * months_per_year
total_list.append(annual_total)
return total_list
annual_diesel_df['total_kwh_sold'] = annual_total_kwh(annual_diesel_df)
annual_diesel_df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 175 entries, 0 to 174
Data columns (total 10 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 community_names 175 non-null object
1 pce_rate 175 non-null float64
2 fuel_price 165 non-null float64
3 diesel_efficiency 175 non-null float64
4 peak_consumption_kw 103 non-null float64
5 residential_kwh_sold 175 non-null float64
6 commercial_kwh_sold 165 non-null float64
7 community_kwh_sold 171 non-null float64
8 government_kwh_sold 171 non-null float64
9 total_kwh_sold 175 non-null float64
dtypes: float64(9), object(1)
memory usage: 13.8+ KB
annual_diesel_df.describe()
pd.to_pickle(annual_diesel_df, '/work/data/processed/AK_city_diesel_data.pkl', compression='bz2')
wind_solar_df = pd.read_pickle('/work/data/processed/AK_city_solar_wind.pkl', compression='bz2')
wind_solar_df = wind_solar_df.rename(columns={'ac_annual': 'solar_ac_annual_1kw'})
wind_solar_df.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 192 entries, 0 to 191
Data columns (total 4 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Name 192 non-null object
1 solar_ac_annual_1kw 192 non-null int64
2 windpwrd 192 non-null int64