import pandas as pd
import plotly.express as px
import seaborn as sns
df = pd.read_csv("/content/sample_data/0083021-210914110416597.csv", sep="\t")
df
df.describe()
df.dtypes
df["eventDate"] = df["eventDate"].apply(lambda x: pd.to_datetime(x))
df["lastInterpreted"] = df["lastInterpreted"].apply(lambda x: pd.to_datetime(x))
df[df["countryCode"] == "AR"]["gbifID"].count()
df_ar = df[df["countryCode"] == "AR"]
df_ar["stateProvince"].value_counts().sort_values(ascending=False)
# Dale doble click a la derecha para ver solamente el tipo de ave que estas buscando
# Github no tiene soporte para la libreria plotly, si queres ver e interactuar con el mapa podes verlo desde aca:
# https://colab.research.google.com/drive/1tbHlglUFTnQj4ayVUqgFQVVqISP47KTB?usp=sharing
px.set_mapbox_access_token("pk.eyJ1IjoiemVkZW1lIiwiYSI6ImNreGppeWcwNzJ1c2cydXN0NnNlbHdmZGgifQ.Go70x85Lf2IcXHez423EhA")
fig = px.scatter_mapbox(df_ar, lat="decimalLatitude", lon="decimalLongitude", color="family",
color_continuous_scale=px.colors.cyclical.IceFire, size_max=100, zoom=7)
fig.show()
df.isnull().sum()
n = df.isnull().sum()
r = []
for i in list(df_ar.columns):
if n[i] == 107252:
continue
r.append(i)
len(r) == len(list(df_ar.columns))
df_ar = df_ar[r]
df_ar
df_ar.isnull().sum()
df[df["issue"].isnull() == False]
df_ar.groupby("year")["gbifID"].count().plot.bar()
df_ar["family"].value_counts().sort_values(ascending=False)
round((df_ar["family"].value_counts().sort_values(ascending=False) / 107252) * 100, 3) # Percent
df_s_p = pd.pivot_table(df_ar, index="stateProvince")
df_s_p["count"] = df_ar[df_ar["stateProvince"] == "Buenos Aires"]["gbifID"].count()
df_s_p[df_s_p.index == "La Pampa"] = df_ar[df_ar["stateProvince"] == "La Pampa"]["gbifID"].count()
df_s_p[df_s_p.index == "Río Negro"] = df_ar[df_ar["stateProvince"] == "Río Negro"]["gbifID"].count()
sns.barplot(data=df_s_p, x=df_s_p.index, y="count")
df_ar["hour"] = df_ar["lastInterpreted"].apply(lambda x: x.hour)
df_ar
df_ar.groupby("hour")["gbifID"].count().plot.bar()
df_ar.groupby("day")["gbifID"].count().plot.bar()
df_ar.groupby("month")["gbifID"].count().plot.bar()
def round_station(m):
if m <= 3:
return "Summer"
if m > 3 and m <= 6:
return "Fall"
if m > 6 and m <= 9:
return "Winter"
if m > 9 and m <= 12:
return "Spring"
df_ar["roundState"] = df_ar["month"].apply(lambda x: round_station(x))
df_ar.groupby("roundState")["gbifID"].count().plot.bar()
pd.pivot_table(df_ar, index="roundState")
# Dale doble click a la derecha para ver solamente el tipo de ave que estas buscando
# Github no tiene soporte para la libreria plotly, si queres ver e interactuar con el mapa podes verlo desde aca:
# https://colab.research.google.com/drive/1tbHlglUFTnQj4ayVUqgFQVVqISP47KTB?usp=sharing
# Sector por familia de aves
px.set_mapbox_access_token("pk.eyJ1IjoiemVkZW1lIiwiYSI6ImNreGppeWcwNzJ1c2cydXN0NnNlbHdmZGgifQ.Go70x85Lf2IcXHez423EhA")
df_ar["roundLatitude"] = round(df["decimalLatitude"], 1)
df_ar["roundLongitude"] = round(df["decimalLongitude"], 1)
fig = px.scatter_mapbox(df_ar, lat="roundLatitude", lon="roundLongitude", color="family",
color_continuous_scale=px.colors.cyclical.IceFire, size_max=100, zoom=7)
fig.show()