
**Capstone Project Submission**

* Student Name: Wes Swager
* Student Pace: Full Time
* Instructor Name: Claude Fried
* Scheduled Project Review Date/Time
    * Friday, June 11, 2021, 2:30pm CST

# Feature Engineering Notebook

<a id = 'packages'></a>
# Packages

In [None]:
# Drive  and IO to access saved data
from google.colab import drive, files
drive.mount('/content/drive')

import io

# Pandas for Dataframes
import pandas as pd

# Numpy and for mathematical functions
import numpy as np

import math
from math import atan2

# Shapely for geometric functions
import shapely
from shapely import wkt
from shapely.geometry import Point, Polygon, LineString, GeometryCollection

import warnings
warnings.filterwarnings('ignore')

Mounted at /content/drive


<a id = 'data'></a>
# Data

Data sourced from [StatsBomb Open Data](https://github.com/statsbomb/open-data)

Data extracted in [expected_goals_data_extraction_notebook](https://github.com/wswager/expected_goals/blob/main/data_extraction/expected_goals_data_extraction_notebook.ipynb)

Data organized in [expected_goals_data_organization_notebook](https://github.com/wswager/expected_goals/blob/main/data_organization/expected_goals_data_organization_notebook.ipynb)

In [None]:
# Import organized_data from expected_goals_organized_data_notebook

organized_data = pd.read_csv('/content/drive/MyDrive/flatiron/expected_goals/data_organization/organized_data.csv')

In [None]:
organized_data.head()

Unnamed: 0,location_x,location_y,time,statsbomb_xg,outcome,player_shot,team,bodypart,technique,first_time,state_of_play,assist,assist2,assist3,assist_state_of_play,shot_distance,shot_angle
0,109.0,46.0,2021-06-11 00:04:38.609,0.266154,Blocked,Francesca Kirby,Chelsea FCW,Left Foot,Normal,False,Open Play,Ground Pass,,,Regular Play,12.529964,118.61
1,113.0,35.0,2021-06-11 00:11:45.046,0.093521,Off T,Bethany England,Chelsea FCW,Head,Normal,False,Open Play,High Pass,,,From Free Kick,8.602325,54.46
2,94.0,43.0,2021-06-11 00:18:03.461,0.036171,Saved,Drew Spence,Chelsea FCW,Left Foot,Normal,False,Open Play,Ground Pass,,,Regular Play,26.172505,96.58
3,86.0,34.0,2021-06-11 00:23:11.935,0.016625,Off T,Chloe Arthur,Birmingham City WFC,Left Foot,Normal,False,Open Play,Ground Pass,,,From Goal Kick,34.525353,79.99
4,94.0,33.0,2021-06-11 00:23:45.810,0.030716,Off T,Bethany England,Chelsea FCW,Right Foot,Normal,False,Open Play,Ground Pass,,,From Goal Kick,26.925824,74.93


## Distance from Goal

In [None]:
# Define goal center

goal_center = [120, 40]

In [None]:
# Use location_x and location_y to define shot coordinates

shot_location_list = []
for i in range(0, len(organized_data)):
  shot_location_list.append((organized_data.iloc[i]['location_x'], organized_data.iloc[i]['location_y']))

# Calculate distance from shot location to goal_center

shot_distance_list = []
for sl in shot_location_list:
  shot_distance_list.append(round(Point(sl).distance(Point(goal_center)), 2))

organized_data['shot_distance'] = shot_distance_list

In [None]:
organized_data.head()

Unnamed: 0,location_x,location_y,time,statsbomb_xg,outcome,player_shot,team,bodypart,technique,first_time,state_of_play,assist,assist2,assist3,assist_state_of_play,shot_distance,shot_angle
0,109.0,46.0,2021-06-11 00:04:38.609,0.266154,Blocked,Francesca Kirby,Chelsea FCW,Left Foot,Normal,False,Open Play,Ground Pass,,,Regular Play,12.529964,118.61
1,113.0,35.0,2021-06-11 00:11:45.046,0.093521,Off T,Bethany England,Chelsea FCW,Head,Normal,False,Open Play,High Pass,,,From Free Kick,8.602325,54.46
2,94.0,43.0,2021-06-11 00:18:03.461,0.036171,Saved,Drew Spence,Chelsea FCW,Left Foot,Normal,False,Open Play,Ground Pass,,,Regular Play,26.172505,96.58
3,86.0,34.0,2021-06-11 00:23:11.935,0.016625,Off T,Chloe Arthur,Birmingham City WFC,Left Foot,Normal,False,Open Play,Ground Pass,,,From Goal Kick,34.525353,79.99
4,94.0,33.0,2021-06-11 00:23:45.810,0.030716,Off T,Bethany England,Chelsea FCW,Right Foot,Normal,False,Open Play,Ground Pass,,,From Goal Kick,26.925824,74.93


## Shot Angle

In [None]:
# Calculate angle between the shot location and goal_center

shot_angle_list = []
for i in range(0, len(organized_data)):
  shot_angle_list.append(round(math.degrees(math.atan2((goal_center[0] - organized_data.iloc[i]['location_x']),
                                                       (goal_center[1] - organized_data.iloc[i]['location_y']))), 2))

organized_data['shot_angle'] = shot_angle_list

In [None]:
organized_data.head()

Unnamed: 0,location_x,location_y,time,statsbomb_xg,outcome,player_shot,team,bodypart,technique,first_time,state_of_play,assist,assist2,assist3,assist_state_of_play,shot_distance,shot_angle
0,109.0,46.0,2021-06-11 00:04:38.609,0.266154,Blocked,Francesca Kirby,Chelsea FCW,Left Foot,Normal,False,Open Play,Ground Pass,,,Regular Play,12.529964,118.61
1,113.0,35.0,2021-06-11 00:11:45.046,0.093521,Off T,Bethany England,Chelsea FCW,Head,Normal,False,Open Play,High Pass,,,From Free Kick,8.602325,54.46
2,94.0,43.0,2021-06-11 00:18:03.461,0.036171,Saved,Drew Spence,Chelsea FCW,Left Foot,Normal,False,Open Play,Ground Pass,,,Regular Play,26.172505,96.58
3,86.0,34.0,2021-06-11 00:23:11.935,0.016625,Off T,Chloe Arthur,Birmingham City WFC,Left Foot,Normal,False,Open Play,Ground Pass,,,From Goal Kick,34.525353,79.99
4,94.0,33.0,2021-06-11 00:23:45.810,0.030716,Off T,Bethany England,Chelsea FCW,Right Foot,Normal,False,Open Play,Ground Pass,,,From Goal Kick,26.925824,74.93


In [None]:
data_with_engineered_features = organized_data
data_with_engineered_features.to_csv('/content/drive/MyDrive/flatiron/expected_goals/feature_engineering/data_with_engineered_features.csv')

Continued in [expected_goals_feature_data_cleaning_notebook](https://github.com/wswager/expected_goals/blob/main/data_cleaning/expected_goals_data_cleaning_notebook.ipynb)