import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from footyscripts.footyviz import draw_events, draw_pitch, type_names
#plotting settings
%matplotlib inline
#pd.options.display.mpl_style = 'default'
df = pd.read_csv("../datasets/germany-vs-argentina-731830.csv", encoding='utf-8', index_col=0)
#standard dimensions
x_size = 105.0
y_size = 68.0
box_height = 16.5 * 2 + 7.32
box_width = 16.5
y_box_start = y_size/2-box_height/2
y_box_end = y_size/2+box_height/2
#scale of dataset is 100 by 100. Normalizing for a standard soccer pitch size
df['x']=df['x']/100*x_size
df['y']=df['y']/100*y_size
df['to_x']=df['to_x']/100*x_size
df['to_y']=df['to_y']/100*y_size
#creating some measures and classifiers from the original
df['count'] = 1
df['dx'] = df['to_x'] - df['x']
df['dy'] = df['to_y'] - df['y']
df['distance'] = np.sqrt(df['dx']**2+df['dy']**2)
df['fivemin'] = np.floor(df['min']/5)*5
df['type_name'] = df['type'].map(type_names.get)
df['to_box'] = (df['to_x'] > x_size - box_width) & (y_box_start < df['to_y']) & (df['to_y'] < y_box_end)
df['from_box'] = (df['x'] > x_size - box_width) & (y_box_start < df['y']) & (df['y'] < y_box_end)
df['on_offense'] = df['x']>x_size/2
#preslicing of the main DataFrame in smaller DFs that will be reused along the notebook
dfPeriod1 = df[df['period']==1]
dfP1Shots = dfPeriod1[dfPeriod1['type'].isin([13, 14, 15, 16])]
dfPeriod2 = df[df['period']==2]
dfP2Shots = dfPeriod2[dfPeriod2['type'].isin([13, 14, 15, 16])]
dfExtraTime = df[df['period']>2]
dfETShots = dfExtraTime[dfExtraTime['type'].isin([13, 14, 15, 16])]
fig = plt.figure(figsize=(12,4))
avg_x = (dfPeriod1[dfPeriod1['team_name']=='Germany'].groupby('min').apply(np.mean)[0] -
dfPeriod1[dfPeriod1['team_name']=='Argentina'].groupby('min').apply(np.mean)[0])
plt.stackplot(list(avg_x.index.values), list([x if x>0 else 0 for x in avg_x]))
plt.stackplot(list(avg_x.index.values), list([x if x<0 else 0 for x in avg_x]))
for i, shot in dfP1Shots.iterrows():
x = shot['min']
try:
y = avg_x.iloc[x]
signal = 1 if shot['team_name']=='Germany' else -1
plt.annotate(text=(shot['type_name']+' ('+shot['team_name'][0]+")"), xy=(x, y), xytext=(x-5,y+30*signal), arrowprops=dict(facecolor='black'))
except:
pass
plt.gca().set_xlabel('minute')
plt.title("First Half Profile")
plt.show()
draw_pitch()
draw_events(dfPeriod1[(dfPeriod1['type']==1) & (dfPeriod1['outcome']==1) & (dfPeriod1['team_name']=='Argentina')], mirror_away=True)
plt.text(x_size/4, -3, "Germany's defense", color='black', bbox=dict(facecolor='white', alpha=0.5), horizontalalignment='center')
plt.text(x_size*3/4, -3, "Argentina's defense", color='black', bbox=dict(facecolor='white', alpha=0.5), horizontalalignment='center')
plt.title("Argentina's passes during the first half")
plt.show()
dfPeriod1.groupby('team_name').agg({'x': np.mean, 'on_offense': np.mean})
dfPeriod1[dfPeriod1['type']==1].groupby('team_name').agg({'outcome': np.mean})
draw_pitch()
draw_events(df[(df['to_box']==True) & (df['type']==1) & (df['from_box']==False) & (df['period']==1) & (df['outcome']==1)], mirror_away=True)
draw_events(df[(df['to_box']==True) & (df['type']==1) & (df['from_box']==False) & (df['period']==1) & (df['outcome']==0)], mirror_away=True, alpha=0.2)
draw_events(dfP1Shots, mirror_away=True, base_color='#a93e3e')
plt.text(x_size/4, -3, "Germany's defense", color='black', bbox=dict(facecolor='white', alpha=0.5), horizontalalignment='center')
plt.text(x_size*3/4, -3, "Argentina's defense", color='black', bbox=dict(facecolor='white', alpha=0.5), horizontalalignment='center')
plt.show()
dfPeriod1[
(dfPeriod1["to_box"] == True) & (dfPeriod1["from_box"] == False) & (df["type"] == 1)
].groupby(["team_name"]).agg({"outcome": np.mean, "count": np.sum})
/shared-libs/python3.7/py-core/lib/python3.7/site-packages/ipykernel_launcher.py:2: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
dfKramer = df[df['player_name']=='Christoph Kramer']
pd.pivot_table(dfKramer, values='count', index='type_name', columns='min', aggfunc=sum, fill_value=0)
dfKramer.loc['action'] = dfKramer['outcome'].map(str) + '-' + dfKramer['type_name']
dfKramer['action'].unique()
/shared-libs/python3.7/py/lib/python3.7/site-packages/pandas/core/indexing.py:1700: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
self._setitem_single_column(loc, v, pi)
score = {'1-LINEUP': 0, '1-RUN WITH BALL': 0.5, '1-RECEPTION': 0, '1-PASS': 1, '0-PASS': -1,
'0-TACKLE (NO CONTROL)': 0, '1-CLEAR BALL (OUT OF PITCH)': 0.5,
'0-LOST CONTROL OF BALL': -1, '1-SUBSTITUTION (OFF)': 0}
dfKramer.loc['score'] = dfKramer['action'].map(score.get)
/shared-libs/python3.7/py/lib/python3.7/site-packages/pandas/core/indexing.py:692: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
iloc._setitem_with_indexer(indexer, value, self.name)
dfKramer.groupby('min')['score'].sum().reindex(range(32), fill_value=0).plot(kind='bar')
plt.annotate('Injury', (19,0.5), (14,1.1), arrowprops=dict(facecolor='black'))
plt.annotate('Substitution', (31,0), (22,1.6), arrowprops=dict(facecolor='black'))
plt.gca().set_xlabel('minute')
plt.gca().set_ylabel('no. events')
plt.show()
fig = plt.figure(figsize=(12,4))
avg_x = (dfPeriod2[dfPeriod2['team_name']=='Germany'].groupby('min').apply(np.mean) -
dfPeriod2[dfPeriod2['team_name']=='Argentina'].groupby('min').apply(np.mean))
plt.stackplot(list(avg_x), list([x if x>0 else 0 for x in avg_x]))
plt.stackplot(list(avg_x), list([x if x<0 else 0 for x in avg_x]))
for i, shot in dfP2Shots.iterrows():
try:
x = shot['min']
y = avg_x.iloc[x]
signal = 1 if shot['team_name']=='Germany' else -1
plt.annotate(text=(shot['type_name']+' ('+shot['team_name'][0]+")"), xy=(x, y), xytext=(x-5,y+30*signal), arrowprops=dict(facecolor='black'))
except:
pass
plt.gca().set_xlabel('minute')
plt.title("Second Half Profile")
plt.show()
dfPeriod2.groupby('team_name').agg({'x': np.mean, 'on_offense': np.mean})
dfPeriod2[dfPeriod2['type']==1].groupby('team_name').agg({'outcome': np.mean})
draw_pitch()
draw_events(df[(df['to_box']==True) & (df['type']==1) & (df['from_box']==False) & (df['period']==2) & (df['outcome']==1)], mirror_away=True)
draw_events(df[(df['to_box']==True) & (df['type']==1) & (df['from_box']==False) & (df['period']==2) & (df['outcome']==0)], mirror_away=True, alpha=0.2)
draw_events(dfP2Shots, mirror_away=True, base_color='#a93e3e')
plt.text(x_size/4, -3, "Germany's defense", color='black', bbox=dict(facecolor='white', alpha=0.5), horizontalalignment='center')
plt.text(x_size*3/4, -3, "Argentina's defense", color='black', bbox=dict(facecolor='white', alpha=0.5), horizontalalignment='center')
dfPeriod2[(dfPeriod2['to_box']==True) & (dfPeriod2['from_box']==False) & (df['type']==1)].groupby(['team_name']).agg({'outcome': np.mean,
'count': np.sum})
/shared-libs/python3.7/py-core/lib/python3.7/site-packages/ipykernel_launcher.py:1: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
"""Entry point for launching an IPython kernel.
df.groupby(['team_name', 'period']).agg({'count': np.sum, 'x': np.mean, 'on_offense': np.mean})
goal_ix = df[df['type']==16].index[0]
df.iloc[goal_ix+1:].groupby(['team_name', 'period']).agg({'count': np.sum, 'x': np.mean, 'on_offense': np.mean})
draw_pitch()
draw_events(df.iloc[goal_ix+1:][(df['to_box']==True) & (df['type']==1) & (df['from_box']==False) & (df['outcome']==1)], mirror_away=True)
draw_events(df.iloc[goal_ix+1:][(df['to_box']==True) & (df['type']==1) & (df['from_box']==False) & (df['outcome']==0)], mirror_away=True, alpha=0.2)
draw_events(df.iloc[goal_ix+1:][df['type'].isin([13,14,15,16])], mirror_away=True, base_color='#a93e3e')
plt.text(x_size/4, -3, "Germany's defense", color='black', bbox=dict(facecolor='white', alpha=0.5), horizontalalignment='center')
plt.text(x_size*3/4, -3, "Argentina's defense", color='black', bbox=dict(facecolor='white', alpha=0.5), horizontalalignment='center')
plt.show()
/shared-libs/python3.7/py-core/lib/python3.7/site-packages/ipykernel_launcher.py:2: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
/shared-libs/python3.7/py-core/lib/python3.7/site-packages/ipykernel_launcher.py:3: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
This is separate from the ipykernel package so we can avoid doing imports until
/shared-libs/python3.7/py-core/lib/python3.7/site-packages/ipykernel_launcher.py:4: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
after removing the cwd from sys.path.
df.iloc[goal_ix+1:][df.iloc[goal_ix+1:]['type'].isin([13,14,15,16])][['min', 'player_name', 'team_name', 'type_name']]
goal = df[df['type']==16].index[0]
dfGoal = df.iloc[goal-30:goal]
draw_pitch()
draw_events(dfGoal[dfGoal.team_name=='Germany'], base_color='white')
draw_events(dfGoal[dfGoal.team_name=='Argentina'], base_color='cyan')
#Germany's players involved in the play
dfGoal['progression']=dfGoal['to_x']-dfGoal['x']
dfGoal[dfGoal['type'].isin([1, 101, 16])][['player_name', 'type_name', 'progression']]
/shared-libs/python3.7/py-core/lib/python3.7/site-packages/ipykernel_launcher.py:2: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
# passing accuracy
df.groupby(['player_name', 'team_name']).agg({'count': np.sum, 'outcome': np.mean}).sort_values('count', ascending=False)
#shots
pd.pivot_table(df[df['type'].isin([13,14,15,16])],
values='count',
aggfunc=sum,
index=['player_name', 'team_name'],
columns='type_name',
fill_value=0)
#defensive play
pd.pivot_table(df[df['type'].isin([7, 8, 49])],
values='count',
aggfunc=np.sum,
index=['player_name', 'team_name'],
columns='type_name',
fill_value=0)
from IPython.display import Image
Image(url='https://media.giphy.com/media/eiXrTRHk9WCDYjv54z/giphy.gif?cid=790b761149e9afcbe91dba898852cc8f84ad3a19b19ab9b5&rid=giphy.gif&ct=g')