# Virou passeio

Based in: https://github.com/rjtavares/football-crunching

### Some preprocessing

Module imports and data preparation. You can ignore this section unless you want to play with the data yourself.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from footyscripts.footyviz import draw_events, draw_pitch, type_names

#plotting settings
%matplotlib inline
pd.options.display.mpl_style = 'default'

In [None]:
import pickle

df_records = pickle.load(open("data/wc2014.p", "rb"))['brazil-vs-germany-731827']

In [None]:
import pickle

df_records = pickle.load(open("data/wc2014.p", "rb"))['brazil-vs-germany-731827']['data']['events']
for r in df_records:
    r['team_name'] = 'Germany' if int(r['team']) == 357 else 'Brazil'
    for key in r:
        try:
            r[key] = int(r[key])
        except:
            pass

df = pd.DataFrame.from_records(df_records)

#standard dimensions
x_size = 105.0
y_size = 68.0
box_height = 16.5*2 + 7.32
box_width = 16.5
y_box_start = y_size/2-box_height/2
y_box_end = y_size/2+box_height/2

#scale of dataset is 100 by 100. Normalizing for a standard soccer pitch size
df['x']=df['x']/100*x_size 
df['y']=df['y']/100*y_size
df['to_x']=df['to_x']/100*x_size
df['to_y']=df['to_y']/100*y_size

#creating some measures and classifiers from the original 
df['count'] = 1
df['dx'] = df['to_x'] - df['x']
df['dy'] = df['to_y'] - df['y']
df['distance'] = np.sqrt(df['dx']**2+df['dy']**2)
df['fivemin'] = np.floor(df['min']/5)*5
df['type_name'] = df['type'].map(type_names.get)
df['to_box'] = (df['to_x'] > x_size - box_width) & (y_box_start < df['to_y']) & (df['to_y'] < y_box_end)
df['from_box'] = (df['x'] > x_size - box_width) & (y_box_start < df['y']) & (df['y'] < y_box_end)
df['on_offense'] = df['x']>x_size/2

In [None]:
#preslicing of the main DataFrame in smaller DFs that will be reused along the notebook
dfPeriod1 = df[df['period']==1]
dfP1Shots = dfPeriod1[dfPeriod1['type'].isin([13, 14, 15, 16])]
dfPeriod2 = df[df['period']==2]
dfP2Shots = dfPeriod2[dfPeriod2['type'].isin([13, 14, 15, 16])]
dfExtraTime = df[df['period']>2]
dfETShots = dfExtraTime[dfExtraTime['type'].isin([13, 14, 15, 16])]

## The first half

Let's get a quick profile of the first half. The chart below shows where in the field most events took place (positive numbers correspond to Germany's offensive half, negative numbers to its defensive half), with each team's shots pointed out.

In [None]:
fig = plt.figure(figsize=(12,4))

avg_x = (dfPeriod1[dfPeriod1['team_name']=='Germany'].groupby('min').apply(np.mean)['x'] - 
         dfPeriod1[dfPeriod1['team_name']=='Brazil'].groupby('min').apply(np.mean)['x'])

plt.stackplot(list(avg_x.index.values), list([x if x>0 else 0 for x in avg_x]))
plt.stackplot(list(avg_x.index.values), list([x if x<0 else 0 for x in avg_x]))

for i, shot in dfP1Shots.iterrows():
    x = shot['min']
    y = avg_x.ix[shot['min']]
    signal = 1 if shot['team_name']=='Germany' else -1
    plt.annotate(s=(shot['type_name']+' ('+shot['team_name'][0]+")"), xy=(x, y), xytext=(x-5,y+30*signal), arrowprops=dict(facecolor='black'))

plt.gca().set_xlabel('minute')
plt.title("First Half Profile")

In [None]:
draw_pitch()
team_name_passes = 'Brazil'
draw_events(dfPeriod1[(dfPeriod1['type']==1) & (dfPeriod1['outcome']==1) & (dfPeriod1['team_name']==team_name_passes)], mirror_away=True)
plt.text(x_size/4, -3, "Brazil's defense", color='black', bbox=dict(facecolor='white', alpha=0.5), horizontalalignment='center')
plt.text(x_size*3/4, -3, "Germany's defense", color='black', bbox=dict(facecolor='white', alpha=0.5), horizontalalignment='center')
plt.title("{}'s passes during the first half".format(team_name_passes))

In [None]:
dfPeriod1.groupby('team_name').agg({'x': np.mean, 'on_offense': np.mean})

In [None]:
dfPeriod1[dfPeriod1['type']==1].groupby('team_name').agg({'outcome': np.mean})

In [None]:
draw_pitch()
draw_events(df[(df['to_box']==True) & (df['type']==1) & (df['from_box']==False) & (df['period']==1) & (df['outcome']==1)], mirror_away=True)
draw_events(df[(df['to_box']==True) & (df['type']==1) & (df['from_box']==False) & (df['period']==1) & (df['outcome']==0)], mirror_away=True, alpha=0.2)
draw_events(dfP1Shots, mirror_away=True, base_color='#a93e3e')
plt.text(x_size/4, -3, "Brazil's defense", color='black', bbox=dict(facecolor='white', alpha=0.5), horizontalalignment='center')
plt.text(x_size*3/4, -3, "Germany's defense", color='black', bbox=dict(facecolor='white', alpha=0.5), horizontalalignment='center')

In [None]:
dfPeriod1[(dfPeriod1['to_box']==True) & (dfPeriod1['from_box']==False) & (df['type']==1)].groupby(['team_name']).agg({'outcome': np.mean,
                                                                                                                      'count': np.sum})

## The second half

In [None]:
fig = plt.figure(figsize=(12,4))

avg_x = (dfPeriod2[dfPeriod2['team_name']=='Germany'].groupby('min').apply(np.mean)['x'] - 
         dfPeriod2[dfPeriod2['team_name']=='Brazil'].groupby('min').apply(np.mean)['x'])

plt.stackplot(list(avg_x.index.values), list([x if x>0 else 0 for x in avg_x]))
plt.stackplot(list(avg_x.index.values), list([x if x<0 else 0 for x in avg_x]))

for i, shot in dfP2Shots.iterrows():
    x = shot['min']
    y = avg_x.ix[shot['min']]
    signal = 1 if shot['team_name']=='Germany' else -1
    plt.annotate(s=(shot['type_name']+' ('+shot['team_name'][0]+")"), xy=(x, y), xytext=(x-5,y+30*signal), arrowprops=dict(facecolor='black'))

plt.gca().set_xlabel('minute')
plt.title("Second Half Profile")

In [None]:
dfPeriod2.groupby('team_name').agg({'x': np.mean, 'on_offense': np.mean})

In [None]:
dfPeriod2[dfPeriod2['type']==1].groupby('team_name').agg({'outcome': np.mean})

In [None]:
draw_pitch()
draw_events(df[(df['to_box']==True) & (df['type']==1) & (df['from_box']==False) & (df['period']==2) & (df['outcome']==1)], mirror_away=True)
draw_events(df[(df['to_box']==True) & (df['type']==1) & (df['from_box']==False) & (df['period']==2) & (df['outcome']==0)], mirror_away=True, alpha=0.2)
draw_events(dfP2Shots, mirror_away=True, base_color='#a93e3e')
plt.text(x_size/4, -3, "Brazil's defense", color='black', bbox=dict(facecolor='white', alpha=0.5), horizontalalignment='center')
plt.text(x_size*3/4, -3, "Germany's defense", color='black', bbox=dict(facecolor='white', alpha=0.5), horizontalalignment='center')

In [None]:
dfPeriod2[(dfPeriod2['to_box']==True) & (dfPeriod2['from_box']==False) & (df['type']==1)].groupby(['team_name']).agg({'outcome': np.mean,
                                                                                                                      'count': np.sum})

# Fact: 7x1 foi pouco