import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
import statsmodels.api as sm
from skmisc import *
from plotnine import *
from plotnine.data import *
import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.mplot3d import Axes3D
SELECT
game_date, event_type,event_detail,
event_team,event_player_1,coords_x,
coords_y,home_goalie,away_goalie,
home_team,away_team,home_skaters,
away_skaters,pbp_distance,event_angle,
season,
CASE
WHEN event_type = 'GOAL' THEN 1
ELSE 0
END AS goal,
CASE
WHEN home_goalie is NULL THEN home_team
ELSE 'NONE'
END AS pulled_home_goal,
CASE
WHEN away_goalie is NULL THEN away_team
ELSE 'NONE'
END AS pulled_away_goal
FROM data
WHERE event_type IN ('SHOT','MISS','GOAL')
AND event_detail IN ('Wrist','Slap','Snap','Backhand','Wrap-around')
AND pbp_distance<=60
df=df.copy()
df['opp_goalie_pulled']=np.where( (df.event_team==df.home_team) & (df.pulled_away_goal==df.away_team),1,0)
df['opp_goalie_pulled']=np.where( (df.event_team==df.away_team) & (df.pulled_home_goal==df.home_team),1,df.opp_goalie_pulled)
df['is_home']=np.where(df['event_team']==df['home_team'],1,0)
df['is_home']=df['is_home'].astype('category')
df['man_advantage']=np.where(df['is_home']==1,df['home_skaters']-df['away_skaters'],df['away_skaters']-df['home_skaters'])
df['event_detail']=df['event_detail'].astype('category')
df['Quadrant']='NA'
df['Quadrant']=np.where( (df['coords_y']>=0) & (df['coords_x']>=0),"I",df['Quadrant'])
df['Quadrant']=np.where( (df['coords_y']>=0) & (df['coords_x']<=0),"II",df['Quadrant'])
df['Quadrant']=np.where( (df['coords_y']<0) & (df['coords_x']<0),"III",df['Quadrant'])
df['Quadrant']=np.where( (df['coords_y']<0) & (df['coords_x']>0),"IV",df['Quadrant'])
df['AdjAngle']=np.where( (df['Quadrant']=="II")| (df['Quadrant']=="IV"),-1*df['event_angle'],df['event_angle'])
df['OppGoalie']=np.where(df['event_team']==df['home_team'],df['away_goalie'],df['home_goalie'])
df['OppGoalie']=np.where(df['event_team']==df['away_team'],df['home_goalie'],df['away_goalie'])
df['IsGoalieOfInterest']=np.where(df['OppGoalie']=='ANDREI.VASILEVSKIY','Andrei Vasilevskiy','Other')
df['IsShooterOfInterest']=np.where( df['event_player_1']=='AUSTON.MATTHEWS','Auston Matthews','Other')
df=df[ (df['man_advantage']>=-2) & (df['man_advantage']<=2)]
MODEL='goal~pbp_distance+AdjAngle+I(AdjAngle**2)+AdjAngle*C(event_detail)+pbp_distance*C(event_detail)+C(IsGoalieOfInterest)+C(IsShooterOfInterest)+C(opp_goalie_pulled)+man_advantage'
mod=smf.logit(formula=MODEL,data=df).fit()
mod.summary()
df['y_pred_prob']=mod.predict(df)
df['DistanceBin']=pd.qcut(df['pbp_distance'],9, precision=2)
df['AngleBin']=pd.qcut(df['AdjAngle'],9, precision=2)
heatmap=df.groupby(['DistanceBin','AngleBin'])['y_pred_prob'].mean().reset_index()
heatmap=heatmap.pivot('DistanceBin','AngleBin','y_pred_prob')
heatmap=heatmap.sort_index(level=0,ascending=False)
heatmap=heatmap.fillna(0)
f,ax = plt.subplots(figsize=(12, 9))
sns.heatmap(heatmap, annot=True, linewidths=.1, ax=ax)
d3_plots=df.sample(3000).dropna()
sns.set(style = "darkgrid")
fig = plt.figure(figsize=(12,12))
ax = fig.add_subplot(111, projection = '3d')
x = d3_plots['pbp_distance']
y = d3_plots['AdjAngle']
z = d3_plots['y_pred_prob']
c=d3_plots['y_pred_prob']
ax.set_xlabel("Angle")
ax.set_ylabel("Distance")
ax.set_zlabel("PredProb")
ax.scatter(x, y, z,c=c)
plt.show()
fig = plt.figure(figsize=(12,12))
ax = fig.gca(projection='3d')
ax.plot_trisurf( d3_plots['pbp_distance'],d3_plots['AdjAngle'],d3_plots['y_pred_prob'], cmap=plt.cm.viridis, linewidth=0.2)
plt.show()
theme1 = theme(
plot_title = element_text(size = 12, color = "white"),
axis_text_x = element_text(size = 12, color = "white"),
axis_text_y = element_text(size = 12, color = "white"),
axis_title_y = element_text(size = 12, color = "white"),
axis_title_x = element_text(size = 12, color = "white"),
strip_text_x=element_text(size = 12, color = "white"),
plot_background = element_rect(fill = "#676a6e"),
panel_background = element_rect(fill = "#676a6e"),
legend_title = element_text(size = 16, color = "white"),
legend_background = element_rect(fill = "#676a6e",color="#676a6e",size=1),
legend_text = element_text(size = 14, color = "white"),
strip_background = element_rect(fill = "#676a6e"),
legend_key=element_rect(fill = "#676a6e"),
panel_spacing=(0.5),
aspect_ratio=4/3
)
df_plots=df.sample(40000).dropna()
df_plots=df_plots[ (df_plots['man_advantage']<=2) & (df_plots['man_advantage']>=-2)]
(ggplot(df_plots,aes(x='AdjAngle',y='y_pred_prob',color='factor(IsGoalieOfInterest)'))+geom_smooth(span=.7,method='loess')+
labs(x='Angle',y='Predicted Probability',color='Distance')+coord_cartesian(ylim = (0, .1)) +theme1)
(ggplot(df_plots,aes(x='AdjAngle',y='y_pred_prob',color='factor(IsShooterOfInterest)'))+geom_smooth(span=.5,method='loess')+
labs(x='Angle',y='Predicted Probability',color='Distance')+coord_cartesian(ylim = (0, .20))+theme1)
(ggplot(df_plots.dropna(),aes(x='AdjAngle',y='y_pred_prob',color='factor(event_detail)'))
+geom_smooth(span=.75,method='loess')+labs(x='Angle',y='Predicted Probability',color='Distance')+theme1)