import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
#!wget -q "https://www.dropbox.com/s/6la0kswf7vri6ek/hurdat2-1851-2019-052520.txt"
#!wget -q "https://www.dropbox.com/s/hpawhrn8uav3nhn/hurdat2-nepac-1949-2019-042320.txt"
atlantic_local = "/content/hurdat2-1851-2019-052520.txt"
pacific_local = "/content/hurdat2-nepac-1949-2019-042320.txt"
#https://drive.google.com/file/d/1Zm5grFt7knNFyBM4HjCRow4-qK2shCVJ/view?usp=sharing
#https://drive.google.com/file/d/1EYGdyBvT3aGyIQgrV3Wk_zj1hIQTFvCz/view?usp=sharing
names = ["name", "number", "date", "time","id","c.status","latitude","longitude","max windspeed[kt]","min pressure[mb]"] + list(range(12)) #reffer to dok1
atlantic_upraveny = pd.read_csv("https://www.dropbox.com/s/e2wt2gvx8chw93n/Antlantic%20-%20upraveny.txt?dl=1", names=names)
atlantic_upraveny.set_index('number', inplace=True)
atlantic_upraveny = atlantic_upraveny.replace(-999,np.nan,).replace(-99,np.nan,)
display(atlantic_upraveny.query("'KATRINA'== name"))
x = atlantic_upraveny.groupby(["year","number"]).size().reset_index().groupby("year").size().reset_index().rename(columns={0:"size"})
plot = sns.regplot(data=x,x="year",y="size",line_kws={"color":"red"})
plot.set_ylabel("quantity of huricanes")
plot.set_title("quantity of huricanes over the years")
#display(atlantic_upraveny.tail(20))
x = atlantic_upraveny.groupby(["month","number"]).size().reset_index().groupby("month").size().reset_index().rename(columns={0:"count"})
#display(x)
grid = sns.catplot(kind="bar", data=x,
x='month', y='count', color="cyan", aspect=1.5)
plt.show(plot)
# zrejme nie az tak uspesny pokus
years_count = atlantic_upraveny.groupby("year").size().reset_index().rename(columns={0:"count"})
#display(years_count)
figure, axes = plt.subplots()
axes.plot(years_count['year'], years_count['count'])
fig,axes = plt.subplots(ncols=2,figsize=(20,4))
power = atlantic_upraveny.loc[:, ["year", "max windspeed[kt]"]].groupby("year").mean().reset_index()
#display(power)
plot = sns.regplot(data=power,x="year",y="max windspeed[kt]",line_kws={"color":"red"},ax=axes[1])
x = atlantic_upraveny.groupby(["year","number"]).size().reset_index().groupby("year").size().reset_index().rename(columns={0:"size"})
plot = sns.regplot(data=x,x="year",y="size",line_kws={"color":"red"},ax=axes[0])
axes[1].set_title('average speed of huricanes')
axes[0].set_title('quantity of huricanes')
axes[0].set_ylabel('number')
fig.subplots_adjust(hspace=0.45, wspace=0.45)
new = atlantic_upraveny.groupby("number").mean()
pal = sns.color_palette("flare_r", as_cmap=True)
plot = sns.scatterplot(data=new,x="min pressure[mb]",y="max windspeed[kt]", hue='year', palette=pal)
plot.set_title('min pressure affecting max speed')
table = atlantic_upraveny.reset_index()
def eastwest(ins):
if "E" in ins:
ins = ins.strip("E")
ins = -float(ins)
return ins
else:
ins = ins.strip("W")
return -float(ins)
def northsouth(ins):
if "S" in ins:
ins = ins.strip("S")
ins = float(ins)
return -ins
else:
ins = ins.strip("N")
return float(ins)
slim = table.loc[:,["number","name","time","latitude","longitude","max windspeed[kt]","year","month"]]
slim.loc[:,"longitude"] = slim.loc[:,"longitude"].apply(eastwest)
slim.loc[:,"latitude"] = slim.loc[:,"latitude"].apply(northsouth)
cols = pd.Series(("name","year","Category","Damages[adjusted]","Deathtoll"))
#NAJNICIVESICH 15 od 1950 pretoze pred tym hurikany nemali formalne mena
a = pd.Series(("Floyd",1999,2,9.813,87))
b = pd.Series(("Betsy",1965,3,10.681,81))
c = pd.Series(("Agnes",1972,1,11.904,128))
d = pd.Series(("Frances",2004,2,12.335,50))
e = pd.Series(("Georges",1998,2,14.129,604))
f = pd.Series(("Rita",2005,3,14.559,125))
g = pd.Series(("Irene",2011,1,17.486,61))
h = pd.Series(("Hugo",1989,4,19.108,107))
i = pd.Series(("Charley",2004,4,20.446,35))
j = pd.Series(("Irvan",2004,3,29.226,124))
k = pd.Series(("Wilma",2005,3,35.548,87))
l = pd.Series(("Ike",2008,2,41.269,195))
m = pd.Series(("Andrew",1992,5,44.754,65))
n = pd.Series(("Sandy",2012,1,73.686,233))
o = pd.Series(("Katrina",2005,3,151.665,1836))
expensive = pd.DataFrame([b,c,d,e,f,g,h,i,j,k,l,m,n,o])
def up(x):
return x.upper()
expensive[0] = expensive[0].apply(up)
expensive = expensive.rename(columns={0:"name",1:"year",2:"category",3:"damages[b$]",4:"DeathToll"})
to_join = table.loc[:,["number","name","max windspeed[kt]","latitude","longitude","time","c.status","year","month","day"]].replace("UNNAMED",np.nan)
to_join.dropna(inplace=True)
new = pd.merge(expensive,to_join,left_on=["name","year"],right_on=["name","year"])
new.loc[:,"longitude"] = new.loc[:,"longitude"].apply(eastwest)
new.loc[:,"latitude"] = new.loc[:,"latitude"].apply(northsouth)
dt = new.loc[:,["name","time","year","month","day","DeathToll","damages[b$]"]].copy(deep=True)
td = dt.copy(deep=True)
mi = dt.groupby("name")["day"].min("day")
min_hur = pd.DataFrame()
for i,j in zip(mi.index,mi):
min_hur = min_hur.append(dt[(dt["name"] == i) & (dt["day"] == j)],ignore_index=True)
min_hur = min_hur.groupby("name").min("time")
ma = dt.groupby("name").max("day")
le = ma - min_hur
le.drop(["DeathToll","damages[b$]"],axis=1,inplace=True)
le = pd.merge(le,expensive,left_on="name",right_on="name")
lengths = []
for i in le.index:
p = le.loc[i,["day","time"]]
length = p["time"] // 100 + p["day"] * 24
lengths.append(length)
le["duration[h]"] = lengths
display(le) # hotova tabulka
gif = px.scatter(le,x="duration[h]",y="DeathToll",color="damages[b$]",text="name",color_continuous_scale="ylorrd",hover_name="name",hover_data=["category","DeathToll","damages[b$]","duration[h]"])
gif.update_traces(marker=dict(size=20))
display(gif)
display(new.loc[:,["name","max windspeed[kt]"]].groupby("name").max())
first = True
figs = {}
clrscale = 'ylorrd'
import plotly.graph_objects as go
for name in new.loc[:,"name"].drop_duplicates():
tp = new[new["name"] == name]
if first:
fig = go.Figure(px.scatter_mapbox(tp, lat="latitude", lon="longitude", hover_name="name", hover_data=["max windspeed[kt]", "damages[b$]","year"],
zoom=3, height=500,color="max windspeed[kt]",labels="hur",color_continuous_scale=clrscale))
first = False
else:
fig.add_trace(px.scatter_mapbox(tp, lat="latitude", lon="longitude", hover_name="name", hover_data=["max windspeed[kt]", "damages[b$]","year"],
zoom=3, height=500,color="max windspeed[kt]",labels="hur",color_continuous_scale=clrscale).data[0])
fig.update_traces(mode="lines+markers", selector=dict(type='scattermapbox'))
;
named_damage = new.loc[:,["name","damages[b$]"]].drop_duplicates("name")
named_damage.set_index("name",inplace=True)
import colorsys
def get_N_HexCol(N=5):
HSV_tuples = [(x * 1 / N, 1, 0.6) for x in range(N)]
hex_out = []
for rgb in HSV_tuples:
rgb = map(lambda x: int(x * 255), colorsys.hsv_to_rgb(*rgb))
hex_out.append('#%02x%02x%02x' % tuple(rgb))
return hex_out
cmap = get_N_HexCol(named_damage.shape[0])
coldict = {}
for c,n in zip(cmap,named_damage.index):
coldict[str(n)] = c
def update_line_color(trace):
p = trace.hovertext[0]
trace.update(name=p)
trace.update(marker_size=8)
trace.update(line=dict(color=coldict[p]))
trace.update(showlegend=True)
return trace
fig.update_traces(mode="lines+markers", selector=dict(type='scattermapbox'))
fig.for_each_trace(lambda trace: update_line_color(trace))
;
fig.update_layout(legend=dict(title = "Hurricane"))
fig.add_annotation(x=0, y=0,
text="Single click to deselect hurricane <br>Double click to select specific hurricane",
bgcolor="#AAD3DF",
showarrow=False,
yshift=10)
;
fig.update_layout(mapbox_style = "open-street-map")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.update_layout(legend=dict(x=0,y=0.25,traceorder="normal",bgcolor="#AAD3DF"))
fig.show()
#Toto vyzera nefunkcne aj bez tohto komentu
long_w = atlantic_upraveny.groupby('longitude')['max windspeed[kt]'].mean()
lat_w = atlantic_upraveny.groupby('latitude')['max windspeed[kt]'].mean()
tab_lt = atlantic_upraveny.groupby(['latitude','longitude'])['max windspeed[kt]'].mean()
long_w = long_w.reset_index()
long_w['longitude'] = long_w.loc[:,'longitude'].apply(eastwest)
long_w = long_w.query('longitude > -200')
lat_w = lat_w.reset_index()
lat_w['latitude'] = lat_w.loc[:,'latitude'].apply(northsouth)
figure, axes = plt.subplots(1,3, figsize = (20,10))
tab_lt = tab_lt.reset_index()
#display(tab_lt)
tab_lt['longitude'] = tab_lt.loc[:,'longitude'].apply(eastwest)
tab_lt['latitude'] = tab_lt.loc[:,'latitude'].apply(northsouth)
#display(tab_lt)
sns.scatterplot(data = long_w, x = 'longitude', y = 'max windspeed[kt]',ax = axes[0])
sns.scatterplot(data = lat_w, x = 'max windspeed[kt]', y = 'latitude', ax = axes[1])
sns.histplot(data = tab_lt.query('longitude > -300'), x = 'latitude', y = 'longitude', ax = axes[2],palette = 'viridis',bins = 25)
"""latlong = tab_lt.loc[:,["latitude","longitude"]]
heatmap, xedges, yedges = np.histogram2d(latlong["latitude"], latlong["longitude"], bins=50)
extent = [xedges[0], xedges[-1], yedges[1], -100]"""
#plt.clf()
#plt.imshow(heatmap.T, extent=extent, origin='lower')
axes[0].set_xlabel('longtitude W = + , E = -' )
axes[1].set_ylabel('latitude N = + , S = -' )
axes[2].set_ylabel('longtitude W = + , E = -')
axes[2].set_ylabel('longtitude W = + , E = -')
axes[2].set_xlabel('latitude N = + , S = -')
global_temp = pd.read_csv('https://pkgstore.datahub.io/core/global-temp/annual_csv/data/a26b154688b061cdd04f1df36e4408be/annual_csv.csv').query("Source == 'GCAG'").reset_index(drop=True).loc[ :, ['Year', 'Mean']]
new2 = atlantic_upraveny.groupby(["year","number"]).size().reset_index().groupby("year").size().reset_index().rename(columns={0:"size", 'year':'Year'})
result = pd.merge(global_temp, new2, on=["Year"])
pal = sns.color_palette("viridis", as_cmap=True)
plot = sns.scatterplot(data=result,x="Year",y="Mean", hue='size', palette=pal)
plot.set_ylabel('Temperature anomalies')
plot.legend(title='count')
plot.set_title('Temperature change and its affect on hurricanes count')
# temperature anomalies are calculated with respect to the 20th century average = 12 C
#display(result)
new3 = atlantic_upraveny.groupby(["year","number"]).mean().reset_index().groupby("year").mean().reset_index().rename(columns={'year':'Year'})
new3 = new3.loc[:, ['Year', 'max windspeed[kt]', 'min pressure[mb]']]
result2 = pd.merge(global_temp, new3, on=["Year"])
pal = sns.color_palette("rocket", as_cmap=True) # try coolwarm?
hn = max(abs(result2['Mean'].min()), result2['Mean'].max())
plot = sns.scatterplot(data=result2,x="Year",y="max windspeed[kt]", hue='Mean',
size='min pressure[mb]', palette=pal, hue_norm=(-hn, hn))
plot.legend(bbox_to_anchor=(1, 1))
plot.set_title('Temperature change and its affect on hurricanes speed and pressure')
#sns.choose_diverging_palette()
plot = sns.regplot(data=result,x="size",y="Mean",line_kws={"color":"red"})
plot.set_xlabel('Hurricane count')
plot.set_ylabel('Temperature anomalies')
plot.set_title('Temperature vs hurricane count')
times = atlantic_upraveny.copy(deep=True)
times['time'] = (times['time'] / 200 + 0.2).round() * 200
# zrejme je to lepsie podrobnejsie, ale chybaju hodnoty (600 je bez Nan)
times['time'] = times['time'].replace(2400.0, 0.0).astype(int)
times = times.reset_index().pivot(columns='time', values='max windspeed[kt]')
times['year'] = list(atlantic_upraveny['year'])
times = times.groupby((times.year//10)*10).mean()
del times['year']
#display(times)
months = atlantic_upraveny.reset_index().pivot(columns='month', values='max windspeed[kt]')
months['year'] = list(atlantic_upraveny['year'])
months = months.groupby((months.year//10)*10).mean()
del months['year']
pal = sns.color_palette("magma", as_cmap=True)
fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(10, 5), sharey=True)
fig.suptitle("Change of windspeed over years in different times and months")
sns.heatmap(times, cmap=pal, square=1, ax=axes[0])
sns.heatmap(months, cmap=pal, square=1, ax=axes[1])
fig.show()
# toto som predtym skusal
#years = atlantic_upraveny.groupby((atlantic_upraveny.year//10)*10).agg({'time':'mean', 'max windspeed[kt]':'mean'})
#times = years.groupby((atlantic_upraveny.time//600)*600).mean()['max windspeed[kt]']
#result3 = result3.groupby((result.Year//10)*10).agg({'Mean':'mean', 'size':'sum', 'max windspeed[kt]':'mean', 'min pressure[mb]':'mean'})
#result3 = result.loc[:, ['Mean', 'size']].set_index('size').sort_values(by='size')
sns