import bitdotio
import datetime
import dotenv
from getpass import getpass
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import pyarrow
if os.getenv("BITDOTIO_API_KEY"):
print("bit.io API Key Present in Global Environment")
else:
os.environ["BITDOTIO_API_KEY"] = getpass("Please enter your API Key")
# Check out https://github.com/bitdotioinc/python-bitdotio for more examples and documentation
client = bitdotio.bitdotio(os.environ.get("BITDOTIO_API_KEY"))
neiss = '''
SELECT *
FROM "bitdotio/neiss"."neiss_2016_2020"
WHERE DATE_PART('year',"Treatment_Date") = 2020
LIMIT 100000;
'''
def pandas_query_bitdotio(client, sql):
'''Query bit.io and return a pandas dataframe'''
try:
# Connect to bit.io
conn = client.get_connection()
# Execute sql
return pd.read_sql(sql, conn)
except (Exception) as e:
print(e)
finally:
if conn is not None:
conn.close()
df = pandas_query_bitdotio(client, neiss)
neiss = df.set_index("CPSC_Case_Number")
neiss.head()
prod1_dummies = pd.get_dummies(neiss.Product_1, prefix="prod")
prod2_dummies = pd.get_dummies(neiss.Product_2, prefix="prod")
prod3_dummies = pd.get_dummies(neiss.Product_3, prefix="prod")
# Add the three dummy-coded columns by ID
_ = prod1_dummies.add(prod2_dummies, axis=1, fill_value=0)
df_prod = _.add(prod3_dummies, axis=1, fill_value=0)
del(_)
df_prod.set_index(neiss.index, inplace=True)
# Combine dummy-coded columns with date and weight by ID
sub = neiss.iloc[:, [0, -1]]
analysis = pd.concat([sub, df_prod], axis=1)
del(sub, df_prod, prod1_dummies, prod2_dummies, prod3_dummies)
analysis.iloc[0:10, 600:610]
weighted_incidents = analysis.iloc[:,2:].values * np.expand_dims(analysis.iloc[:,1].values,1)
df_wt = pd.DataFrame(weighted_incidents, index = analysis.index, columns=analysis.iloc[:,2:].columns)
df_out = pd.concat([analysis.iloc[:,0:2], df_wt], axis=1)
del(analysis)
del(weighted_incidents)
del(df_wt)
df_out['date'] = pd.to_datetime(df_out['Treatment_Date'],format='%Y-%m-%d')
df_out['year'] = pd.DatetimeIndex(df_out['date']).year
df_out['month'] = pd.DatetimeIndex(df_out['date']).month
df_out['doy'] = pd.DatetimeIndex(df_out['date']).dayofyear
df_out.iloc[1:10, 600:610]
df_out['day'] = pd.DatetimeIndex(df_out['date']).day
byday = df_out.drop(["Weight", "Treatment_Date", "year", "doy", "date"], axis=1)
byday.iloc[1:10, 598:608]
byday_grouped = byday.groupby(['month','day']).agg('sum')
byday_grouped.iloc[1:10, 598:608]
from sklearn.neighbors import LocalOutlierFactor
clf = LocalOutlierFactor(n_neighbors=20) # Looking for about 10 days
X = byday_grouped.values
y_pred = clf.fit_predict(X)
byday_grouped.index[y_pred==-1]
import matplotlib.dates as mdates
import matplotlib.cbook as cbook
BLUE = '#0059ff'
RED = '#EE5149'
y_plot = y_pred
y_plot[y_pred==-1]=0
fig, ax = plt.subplots(figsize=(5.5,3), dpi=300)
dates = [datetime.date(2020, x, y) for x, y in byday_grouped.index.values]
scatter1 = ax.scatter(np.array(dates)[y_pred == 0], -1*clf.negative_outlier_factor_[y_pred == 0], c=RED,
alpha = 0.5, label="Outlier", s=20)
scatter2 = ax.scatter(np.array(dates)[y_pred == 1], -1*clf.negative_outlier_factor_[y_pred == 1], c=BLUE,
alpha = 0.5, label = "Not Outlier", s=5)
fmt_month = mdates.MonthLocator()
fmt_day = mdates.DayLocator(15)
ax.xaxis.set_major_locator(fmt_month)
ax.xaxis.set_minor_locator(fmt_day)
ax.xaxis.set_major_formatter(mdates.DateFormatter('%b'))
# Legend
leg = plt.legend(loc="upper right", bbox_to_anchor=(1, 1), prop = {"family": "DejaVu Sans", "size":7}, ncol=1,
frameon=True)
# grid
ax.grid(b=True, color="grey", alpha=0.1, linewidth=1)
ax.grid(b=True, axis="y", color="grey", alpha=0.1, linewidth=1)
plt.xticks(fontfamily="DejaVu Sans", size=8)
plt.yticks(fontfamily="DejaVu Sans", size=8)
#ax.xaxis.set_minor_formatter(mdates.DateFormatter('%d'))
ax.set_xlim([datetime.date(2019, 12, 25), datetime.date(2020, 12, 31)])
#ax.set_xlabel("Month of Year", fontfamily="Roboto", color="black", alpha = 0.5, weight="bold")
ax.set_ylabel("Local Outlier Factor Score", fontfamily="DejaVu Sans", color="black", alpha = 0.5, weight="bold", size=8)
ax.set_title("Holiday Injury Outliers", fontdict = {'fontfamily':"DejaVu Sans", "weight":"bold"}, loc="left")
fig.autofmt_xdate()
fig.tight_layout()
fig.patch.set_facecolor('white')
plt.show()