# Data Transformation

## Importing Raw Dataset

In [1]:
import re

import pandas as pd
pd.set_option('display.max_columns', None)

import sys
import os
sys.path.append(os.path.abspath('../../..'))
from utils import pandas_column_utils

url = '../../../datasets/raw/FIFA21_official_data.csv'
raw_fifa_df = pd.read_csv(url, low_memory=False)
transformed_fifa_df = raw_fifa_df

## Standardizing Format

### Checking Data

In [2]:
transformed_fifa_df.head(2)

Unnamed: 0,ID,Name,Age,Photo,Nationality,Flag,Overall,Potential,Club,Club Logo,Value,Wage,Special,Preferred Foot,International Reputation,Weak Foot,Skill Moves,Work Rate,Body Type,Real Face,Position,Jersey Number,Joined,Loaned From,Contract Valid Until,Height,Weight,Crossing,Finishing,HeadingAccuracy,ShortPassing,Volleys,Dribbling,Curve,FKAccuracy,LongPassing,BallControl,Acceleration,SprintSpeed,Agility,Reactions,Balance,ShotPower,Jumping,Stamina,Strength,LongShots,Aggression,Interceptions,Positioning,Vision,Penalties,Composure,Marking,StandingTackle,SlidingTackle,GKDiving,GKHandling,GKKicking,GKPositioning,GKReflexes,Best Position,Best Overall Rating,Release Clause,DefensiveAwareness
0,176580,L. Suárez,33,https://cdn.sofifa.com/players/176/580/20_60.png,Uruguay,https://cdn.sofifa.com/flags/uy.png,87,87,Atlético Madrid,https://cdn.sofifa.com/teams/240/light_30.png,€31.5M,€115K,2316,Right,5.0,4.0,3.0,High/ Medium,PLAYER_BODY_TYPE_374,Yes,"<span class=""pos pos24"">RS",9.0,"Sep 25, 2020",,2022,6'0,190lbs,80.0,92.0,84.0,83.0,90.0,84.0,86.0,82.0,77.0,84.0,72.0,68.0,76.0,92.0,78.0,89.0,69.0,78.0,85.0,88.0,87.0,41.0,91.0,84.0,83.0,85.0,,45.0,38.0,27.0,25.0,31.0,33.0,37.0,ST,87.0,€64.6M,57.0
1,192985,K. De Bruyne,29,https://cdn.sofifa.com/players/192/985/20_60.png,Belgium,https://cdn.sofifa.com/flags/be.png,91,91,Manchester City,https://cdn.sofifa.com/teams/10/light_30.png,€87M,€370K,2304,Right,4.0,5.0,4.0,High/ High,PLAYER_BODY_TYPE_321,Yes,"<span class=""pos pos13"">RCM",17.0,"Aug 30, 2015",,2023,5'11,154lbs,94.0,82.0,55.0,94.0,82.0,88.0,85.0,83.0,93.0,92.0,77.0,76.0,78.0,91.0,76.0,91.0,63.0,89.0,74.0,91.0,76.0,66.0,88.0,94.0,84.0,91.0,,65.0,53.0,15.0,13.0,5.0,10.0,13.0,CAM,91.0,€161M,68.0


In [3]:
for column in transformed_fifa_df.columns:
    safe_column_name = re.sub(r'\W+', '_', column[:20])
    pd.Series(transformed_fifa_df[column].unique()).to_csv(f'../../../temp/unique_{safe_column_name}.csv')

NOTE:
1. Remove currency symbol (Value, Wage, Release Clause)
2. Remove UoM (Height, Weight)
3. Remove HTML tag (Position)
4. Change inconsistent header format
    
    (HeadingAccuracy, ShortPassing, FKAccuracy, LongPassing, BallControl, SprintSpeed, ShotPower, LongShots, StandingTackle, SlidingTackle, GKDiving, GKHandling, GKKicking, GKPositioning, GKReflexes, DefensiveAwareness)
5. Change inconsistent column data

    (Contract Valid Until)

### Removing Currency Symbol

In [4]:
currency_column_names = ['Value', 'Wage', 'Release Clause']

for column_name in currency_column_names:
    transformed_fifa_df[column_name] = pandas_column_utils.convert_currency_to_number(transformed_fifa_df[column_name], '€')
    
new_column_names = {
    'Value': 'Value (€)', 
    'Wage': 'Wage (€)', 
    'Release Clause': 'Release Clause (€)', 
}
transformed_fifa_df = transformed_fifa_df.rename(columns=new_column_names)

transformed_fifa_df.head(2)

Unnamed: 0,ID,Name,Age,Photo,Nationality,Flag,Overall,Potential,Club,Club Logo,Value (€),Wage (€),Special,Preferred Foot,International Reputation,Weak Foot,Skill Moves,Work Rate,Body Type,Real Face,Position,Jersey Number,Joined,Loaned From,Contract Valid Until,Height,Weight,Crossing,Finishing,HeadingAccuracy,ShortPassing,Volleys,Dribbling,Curve,FKAccuracy,LongPassing,BallControl,Acceleration,SprintSpeed,Agility,Reactions,Balance,ShotPower,Jumping,Stamina,Strength,LongShots,Aggression,Interceptions,Positioning,Vision,Penalties,Composure,Marking,StandingTackle,SlidingTackle,GKDiving,GKHandling,GKKicking,GKPositioning,GKReflexes,Best Position,Best Overall Rating,Release Clause (€),DefensiveAwareness
0,176580,L. Suárez,33,https://cdn.sofifa.com/players/176/580/20_60.png,Uruguay,https://cdn.sofifa.com/flags/uy.png,87,87,Atlético Madrid,https://cdn.sofifa.com/teams/240/light_30.png,31500000,115000,2316,Right,5.0,4.0,3.0,High/ Medium,PLAYER_BODY_TYPE_374,Yes,"<span class=""pos pos24"">RS",9.0,"Sep 25, 2020",,2022,6'0,190lbs,80.0,92.0,84.0,83.0,90.0,84.0,86.0,82.0,77.0,84.0,72.0,68.0,76.0,92.0,78.0,89.0,69.0,78.0,85.0,88.0,87.0,41.0,91.0,84.0,83.0,85.0,,45.0,38.0,27.0,25.0,31.0,33.0,37.0,ST,87.0,64599999.0,57.0
1,192985,K. De Bruyne,29,https://cdn.sofifa.com/players/192/985/20_60.png,Belgium,https://cdn.sofifa.com/flags/be.png,91,91,Manchester City,https://cdn.sofifa.com/teams/10/light_30.png,87000000,370000,2304,Right,4.0,5.0,4.0,High/ High,PLAYER_BODY_TYPE_321,Yes,"<span class=""pos pos13"">RCM",17.0,"Aug 30, 2015",,2023,5'11,154lbs,94.0,82.0,55.0,94.0,82.0,88.0,85.0,83.0,93.0,92.0,77.0,76.0,78.0,91.0,76.0,91.0,63.0,89.0,74.0,91.0,76.0,66.0,88.0,94.0,84.0,91.0,,65.0,53.0,15.0,13.0,5.0,10.0,13.0,CAM,91.0,161000000.0,68.0


### Removing UoM

In [5]:
transformed_fifa_df['Height'] = pandas_column_utils.convert_feet_to_number_in_meter(transformed_fifa_df['Height'])

transformed_fifa_df['Weight'] = pandas_column_utils.remove_character(transformed_fifa_df['Weight'], 'lbs')
transformed_fifa_df['Weight'] = pandas_column_utils.convert_lbs_to_kg(transformed_fifa_df['Weight'])

new_column_names = {
    'Height': 'Height (m)', 
    'Weight': 'Weight (kg)', 
}
transformed_fifa_df = transformed_fifa_df.rename(columns=new_column_names)

transformed_fifa_df.head(2)

Unnamed: 0,ID,Name,Age,Photo,Nationality,Flag,Overall,Potential,Club,Club Logo,Value (€),Wage (€),Special,Preferred Foot,International Reputation,Weak Foot,Skill Moves,Work Rate,Body Type,Real Face,Position,Jersey Number,Joined,Loaned From,Contract Valid Until,Height (m),Weight (kg),Crossing,Finishing,HeadingAccuracy,ShortPassing,Volleys,Dribbling,Curve,FKAccuracy,LongPassing,BallControl,Acceleration,SprintSpeed,Agility,Reactions,Balance,ShotPower,Jumping,Stamina,Strength,LongShots,Aggression,Interceptions,Positioning,Vision,Penalties,Composure,Marking,StandingTackle,SlidingTackle,GKDiving,GKHandling,GKKicking,GKPositioning,GKReflexes,Best Position,Best Overall Rating,Release Clause (€),DefensiveAwareness
0,176580,L. Suárez,33,https://cdn.sofifa.com/players/176/580/20_60.png,Uruguay,https://cdn.sofifa.com/flags/uy.png,87,87,Atlético Madrid,https://cdn.sofifa.com/teams/240/light_30.png,31500000,115000,2316,Right,5.0,4.0,3.0,High/ Medium,PLAYER_BODY_TYPE_374,Yes,"<span class=""pos pos24"">RS",9.0,"Sep 25, 2020",,2022,1.8288,86.18248,80.0,92.0,84.0,83.0,90.0,84.0,86.0,82.0,77.0,84.0,72.0,68.0,76.0,92.0,78.0,89.0,69.0,78.0,85.0,88.0,87.0,41.0,91.0,84.0,83.0,85.0,,45.0,38.0,27.0,25.0,31.0,33.0,37.0,ST,87.0,64599999.0,57.0
1,192985,K. De Bruyne,29,https://cdn.sofifa.com/players/192/985/20_60.png,Belgium,https://cdn.sofifa.com/flags/be.png,91,91,Manchester City,https://cdn.sofifa.com/teams/10/light_30.png,87000000,370000,2304,Right,4.0,5.0,4.0,High/ High,PLAYER_BODY_TYPE_321,Yes,"<span class=""pos pos13"">RCM",17.0,"Aug 30, 2015",,2023,1.8034,69.853168,94.0,82.0,55.0,94.0,82.0,88.0,85.0,83.0,93.0,92.0,77.0,76.0,78.0,91.0,76.0,91.0,63.0,89.0,74.0,91.0,76.0,66.0,88.0,94.0,84.0,91.0,,65.0,53.0,15.0,13.0,5.0,10.0,13.0,CAM,91.0,161000000.0,68.0


### Removing HTML Tag

In [6]:
transformed_fifa_df['Position'] = transformed_fifa_df['Position'].str.replace(r'<.*?>', '', regex=True)

transformed_fifa_df.head(2)

Unnamed: 0,ID,Name,Age,Photo,Nationality,Flag,Overall,Potential,Club,Club Logo,Value (€),Wage (€),Special,Preferred Foot,International Reputation,Weak Foot,Skill Moves,Work Rate,Body Type,Real Face,Position,Jersey Number,Joined,Loaned From,Contract Valid Until,Height (m),Weight (kg),Crossing,Finishing,HeadingAccuracy,ShortPassing,Volleys,Dribbling,Curve,FKAccuracy,LongPassing,BallControl,Acceleration,SprintSpeed,Agility,Reactions,Balance,ShotPower,Jumping,Stamina,Strength,LongShots,Aggression,Interceptions,Positioning,Vision,Penalties,Composure,Marking,StandingTackle,SlidingTackle,GKDiving,GKHandling,GKKicking,GKPositioning,GKReflexes,Best Position,Best Overall Rating,Release Clause (€),DefensiveAwareness
0,176580,L. Suárez,33,https://cdn.sofifa.com/players/176/580/20_60.png,Uruguay,https://cdn.sofifa.com/flags/uy.png,87,87,Atlético Madrid,https://cdn.sofifa.com/teams/240/light_30.png,31500000,115000,2316,Right,5.0,4.0,3.0,High/ Medium,PLAYER_BODY_TYPE_374,Yes,RS,9.0,"Sep 25, 2020",,2022,1.8288,86.18248,80.0,92.0,84.0,83.0,90.0,84.0,86.0,82.0,77.0,84.0,72.0,68.0,76.0,92.0,78.0,89.0,69.0,78.0,85.0,88.0,87.0,41.0,91.0,84.0,83.0,85.0,,45.0,38.0,27.0,25.0,31.0,33.0,37.0,ST,87.0,64599999.0,57.0
1,192985,K. De Bruyne,29,https://cdn.sofifa.com/players/192/985/20_60.png,Belgium,https://cdn.sofifa.com/flags/be.png,91,91,Manchester City,https://cdn.sofifa.com/teams/10/light_30.png,87000000,370000,2304,Right,4.0,5.0,4.0,High/ High,PLAYER_BODY_TYPE_321,Yes,RCM,17.0,"Aug 30, 2015",,2023,1.8034,69.853168,94.0,82.0,55.0,94.0,82.0,88.0,85.0,83.0,93.0,92.0,77.0,76.0,78.0,91.0,76.0,91.0,63.0,89.0,74.0,91.0,76.0,66.0,88.0,94.0,84.0,91.0,,65.0,53.0,15.0,13.0,5.0,10.0,13.0,CAM,91.0,161000000.0,68.0


### Changing Inconsistent Header Format

In [7]:
new_column_names = {
    'HeadingAccuracy': 'Heading Accuracy', 
    'ShortPassing': 'Short Passing', 
    'FKAccuracy': 'FK Accuracy', 
    'LongPassing': 'Long Passing', 
    'BallControl': 'Ball Control', 
    'SprintSpeed': 'Sprint Speed', 
    'ShotPower': 'Shot Power', 
    'LongShots': 'Long Shots', 
    'StandingTackle': 'Standing Tackle', 
    'SlidingTackle': 'Sliding Tackle', 
    'GKDiving': 'GK Diving', 
    'GKHandling': 'GK Handling', 
    'GKKicking': 'GK Kicking', 
    'GKPositioning': 'GK Positioning', 
    'GKReflexes': 'GK Reflexes', 
    'DefensiveAwareness': 'Defensive Awareness',
}
transformed_fifa_df = transformed_fifa_df.rename(columns=new_column_names)

transformed_fifa_df.head(2)

Unnamed: 0,ID,Name,Age,Photo,Nationality,Flag,Overall,Potential,Club,Club Logo,Value (€),Wage (€),Special,Preferred Foot,International Reputation,Weak Foot,Skill Moves,Work Rate,Body Type,Real Face,Position,Jersey Number,Joined,Loaned From,Contract Valid Until,Height (m),Weight (kg),Crossing,Finishing,Heading Accuracy,Short Passing,Volleys,Dribbling,Curve,FK Accuracy,Long Passing,Ball Control,Acceleration,Sprint Speed,Agility,Reactions,Balance,Shot Power,Jumping,Stamina,Strength,Long Shots,Aggression,Interceptions,Positioning,Vision,Penalties,Composure,Marking,Standing Tackle,Sliding Tackle,GK Diving,GK Handling,GK Kicking,GK Positioning,GK Reflexes,Best Position,Best Overall Rating,Release Clause (€),Defensive Awareness
0,176580,L. Suárez,33,https://cdn.sofifa.com/players/176/580/20_60.png,Uruguay,https://cdn.sofifa.com/flags/uy.png,87,87,Atlético Madrid,https://cdn.sofifa.com/teams/240/light_30.png,31500000,115000,2316,Right,5.0,4.0,3.0,High/ Medium,PLAYER_BODY_TYPE_374,Yes,RS,9.0,"Sep 25, 2020",,2022,1.8288,86.18248,80.0,92.0,84.0,83.0,90.0,84.0,86.0,82.0,77.0,84.0,72.0,68.0,76.0,92.0,78.0,89.0,69.0,78.0,85.0,88.0,87.0,41.0,91.0,84.0,83.0,85.0,,45.0,38.0,27.0,25.0,31.0,33.0,37.0,ST,87.0,64599999.0,57.0
1,192985,K. De Bruyne,29,https://cdn.sofifa.com/players/192/985/20_60.png,Belgium,https://cdn.sofifa.com/flags/be.png,91,91,Manchester City,https://cdn.sofifa.com/teams/10/light_30.png,87000000,370000,2304,Right,4.0,5.0,4.0,High/ High,PLAYER_BODY_TYPE_321,Yes,RCM,17.0,"Aug 30, 2015",,2023,1.8034,69.853168,94.0,82.0,55.0,94.0,82.0,88.0,85.0,83.0,93.0,92.0,77.0,76.0,78.0,91.0,76.0,91.0,63.0,89.0,74.0,91.0,76.0,66.0,88.0,94.0,84.0,91.0,,65.0,53.0,15.0,13.0,5.0,10.0,13.0,CAM,91.0,161000000.0,68.0


### Changing Inconsistent Column Data

In [8]:
transformed_fifa_df['Contract Valid Until'] = pandas_column_utils.convert_date_to_year(transformed_fifa_df['Contract Valid Until'])
transformed_fifa_df['Joined'] = pandas_column_utils.convert_date_to_year(transformed_fifa_df['Joined'])

transformed_fifa_df.head(2)

Unnamed: 0,ID,Name,Age,Photo,Nationality,Flag,Overall,Potential,Club,Club Logo,Value (€),Wage (€),Special,Preferred Foot,International Reputation,Weak Foot,Skill Moves,Work Rate,Body Type,Real Face,Position,Jersey Number,Joined,Loaned From,Contract Valid Until,Height (m),Weight (kg),Crossing,Finishing,Heading Accuracy,Short Passing,Volleys,Dribbling,Curve,FK Accuracy,Long Passing,Ball Control,Acceleration,Sprint Speed,Agility,Reactions,Balance,Shot Power,Jumping,Stamina,Strength,Long Shots,Aggression,Interceptions,Positioning,Vision,Penalties,Composure,Marking,Standing Tackle,Sliding Tackle,GK Diving,GK Handling,GK Kicking,GK Positioning,GK Reflexes,Best Position,Best Overall Rating,Release Clause (€),Defensive Awareness
0,176580,L. Suárez,33,https://cdn.sofifa.com/players/176/580/20_60.png,Uruguay,https://cdn.sofifa.com/flags/uy.png,87,87,Atlético Madrid,https://cdn.sofifa.com/teams/240/light_30.png,31500000,115000,2316,Right,5.0,4.0,3.0,High/ Medium,PLAYER_BODY_TYPE_374,Yes,RS,9.0,2020,,2022,1.8288,86.18248,80.0,92.0,84.0,83.0,90.0,84.0,86.0,82.0,77.0,84.0,72.0,68.0,76.0,92.0,78.0,89.0,69.0,78.0,85.0,88.0,87.0,41.0,91.0,84.0,83.0,85.0,,45.0,38.0,27.0,25.0,31.0,33.0,37.0,ST,87.0,64599999.0,57.0
1,192985,K. De Bruyne,29,https://cdn.sofifa.com/players/192/985/20_60.png,Belgium,https://cdn.sofifa.com/flags/be.png,91,91,Manchester City,https://cdn.sofifa.com/teams/10/light_30.png,87000000,370000,2304,Right,4.0,5.0,4.0,High/ High,PLAYER_BODY_TYPE_321,Yes,RCM,17.0,2015,,2023,1.8034,69.853168,94.0,82.0,55.0,94.0,82.0,88.0,85.0,83.0,93.0,92.0,77.0,76.0,78.0,91.0,76.0,91.0,63.0,89.0,74.0,91.0,76.0,66.0,88.0,94.0,84.0,91.0,,65.0,53.0,15.0,13.0,5.0,10.0,13.0,CAM,91.0,161000000.0,68.0


## Encoding Categorical Data

### Checking Data

In [9]:
transformed_fifa_df.head(2)

Unnamed: 0,ID,Name,Age,Photo,Nationality,Flag,Overall,Potential,Club,Club Logo,Value (€),Wage (€),Special,Preferred Foot,International Reputation,Weak Foot,Skill Moves,Work Rate,Body Type,Real Face,Position,Jersey Number,Joined,Loaned From,Contract Valid Until,Height (m),Weight (kg),Crossing,Finishing,Heading Accuracy,Short Passing,Volleys,Dribbling,Curve,FK Accuracy,Long Passing,Ball Control,Acceleration,Sprint Speed,Agility,Reactions,Balance,Shot Power,Jumping,Stamina,Strength,Long Shots,Aggression,Interceptions,Positioning,Vision,Penalties,Composure,Marking,Standing Tackle,Sliding Tackle,GK Diving,GK Handling,GK Kicking,GK Positioning,GK Reflexes,Best Position,Best Overall Rating,Release Clause (€),Defensive Awareness
0,176580,L. Suárez,33,https://cdn.sofifa.com/players/176/580/20_60.png,Uruguay,https://cdn.sofifa.com/flags/uy.png,87,87,Atlético Madrid,https://cdn.sofifa.com/teams/240/light_30.png,31500000,115000,2316,Right,5.0,4.0,3.0,High/ Medium,PLAYER_BODY_TYPE_374,Yes,RS,9.0,2020,,2022,1.8288,86.18248,80.0,92.0,84.0,83.0,90.0,84.0,86.0,82.0,77.0,84.0,72.0,68.0,76.0,92.0,78.0,89.0,69.0,78.0,85.0,88.0,87.0,41.0,91.0,84.0,83.0,85.0,,45.0,38.0,27.0,25.0,31.0,33.0,37.0,ST,87.0,64599999.0,57.0
1,192985,K. De Bruyne,29,https://cdn.sofifa.com/players/192/985/20_60.png,Belgium,https://cdn.sofifa.com/flags/be.png,91,91,Manchester City,https://cdn.sofifa.com/teams/10/light_30.png,87000000,370000,2304,Right,4.0,5.0,4.0,High/ High,PLAYER_BODY_TYPE_321,Yes,RCM,17.0,2015,,2023,1.8034,69.853168,94.0,82.0,55.0,94.0,82.0,88.0,85.0,83.0,93.0,92.0,77.0,76.0,78.0,91.0,76.0,91.0,63.0,89.0,74.0,91.0,76.0,66.0,88.0,94.0,84.0,91.0,,65.0,53.0,15.0,13.0,5.0,10.0,13.0,CAM,91.0,161000000.0,68.0


NOTE:
1. Encode Preferred Foot
2. Encode Offensive Work Rate
3. Encode Defensive Work Rate
4. Encode Real Face
5. Encode Position
6. Encode Best Position

### Encoding Preferred Foot

In [10]:
preferred_foot_mapping = {'Left': 0, 'Right': 1}

transformed_fifa_df['Encoded Preferred Foot'] = transformed_fifa_df['Preferred Foot'].map(preferred_foot_mapping)

transformed_fifa_df.head(2)

Unnamed: 0,ID,Name,Age,Photo,Nationality,Flag,Overall,Potential,Club,Club Logo,Value (€),Wage (€),Special,Preferred Foot,International Reputation,Weak Foot,Skill Moves,Work Rate,Body Type,Real Face,Position,Jersey Number,Joined,Loaned From,Contract Valid Until,Height (m),Weight (kg),Crossing,Finishing,Heading Accuracy,Short Passing,Volleys,Dribbling,Curve,FK Accuracy,Long Passing,Ball Control,Acceleration,Sprint Speed,Agility,Reactions,Balance,Shot Power,Jumping,Stamina,Strength,Long Shots,Aggression,Interceptions,Positioning,Vision,Penalties,Composure,Marking,Standing Tackle,Sliding Tackle,GK Diving,GK Handling,GK Kicking,GK Positioning,GK Reflexes,Best Position,Best Overall Rating,Release Clause (€),Defensive Awareness,Encoded Preferred Foot
0,176580,L. Suárez,33,https://cdn.sofifa.com/players/176/580/20_60.png,Uruguay,https://cdn.sofifa.com/flags/uy.png,87,87,Atlético Madrid,https://cdn.sofifa.com/teams/240/light_30.png,31500000,115000,2316,Right,5.0,4.0,3.0,High/ Medium,PLAYER_BODY_TYPE_374,Yes,RS,9.0,2020,,2022,1.8288,86.18248,80.0,92.0,84.0,83.0,90.0,84.0,86.0,82.0,77.0,84.0,72.0,68.0,76.0,92.0,78.0,89.0,69.0,78.0,85.0,88.0,87.0,41.0,91.0,84.0,83.0,85.0,,45.0,38.0,27.0,25.0,31.0,33.0,37.0,ST,87.0,64599999.0,57.0,1
1,192985,K. De Bruyne,29,https://cdn.sofifa.com/players/192/985/20_60.png,Belgium,https://cdn.sofifa.com/flags/be.png,91,91,Manchester City,https://cdn.sofifa.com/teams/10/light_30.png,87000000,370000,2304,Right,4.0,5.0,4.0,High/ High,PLAYER_BODY_TYPE_321,Yes,RCM,17.0,2015,,2023,1.8034,69.853168,94.0,82.0,55.0,94.0,82.0,88.0,85.0,83.0,93.0,92.0,77.0,76.0,78.0,91.0,76.0,91.0,63.0,89.0,74.0,91.0,76.0,66.0,88.0,94.0,84.0,91.0,,65.0,53.0,15.0,13.0,5.0,10.0,13.0,CAM,91.0,161000000.0,68.0,1


### Encoding Offensive Work Rate

In [11]:
transformed_fifa_df['Offensive Work Rate'] = transformed_fifa_df['Work Rate'].apply(lambda value: value.split('/')[0])

work_rate_mapping = {'High': 3, 'Medium': 2, 'Low': 1}

transformed_fifa_df['Encoded Offensive Work Rate'] = transformed_fifa_df['Offensive Work Rate'].map(work_rate_mapping)

transformed_fifa_df.head(2)

Unnamed: 0,ID,Name,Age,Photo,Nationality,Flag,Overall,Potential,Club,Club Logo,Value (€),Wage (€),Special,Preferred Foot,International Reputation,Weak Foot,Skill Moves,Work Rate,Body Type,Real Face,Position,Jersey Number,Joined,Loaned From,Contract Valid Until,Height (m),Weight (kg),Crossing,Finishing,Heading Accuracy,Short Passing,Volleys,Dribbling,Curve,FK Accuracy,Long Passing,Ball Control,Acceleration,Sprint Speed,Agility,Reactions,Balance,Shot Power,Jumping,Stamina,Strength,Long Shots,Aggression,Interceptions,Positioning,Vision,Penalties,Composure,Marking,Standing Tackle,Sliding Tackle,GK Diving,GK Handling,GK Kicking,GK Positioning,GK Reflexes,Best Position,Best Overall Rating,Release Clause (€),Defensive Awareness,Encoded Preferred Foot,Offensive Work Rate,Encoded Offensive Work Rate
0,176580,L. Suárez,33,https://cdn.sofifa.com/players/176/580/20_60.png,Uruguay,https://cdn.sofifa.com/flags/uy.png,87,87,Atlético Madrid,https://cdn.sofifa.com/teams/240/light_30.png,31500000,115000,2316,Right,5.0,4.0,3.0,High/ Medium,PLAYER_BODY_TYPE_374,Yes,RS,9.0,2020,,2022,1.8288,86.18248,80.0,92.0,84.0,83.0,90.0,84.0,86.0,82.0,77.0,84.0,72.0,68.0,76.0,92.0,78.0,89.0,69.0,78.0,85.0,88.0,87.0,41.0,91.0,84.0,83.0,85.0,,45.0,38.0,27.0,25.0,31.0,33.0,37.0,ST,87.0,64599999.0,57.0,1,High,3.0
1,192985,K. De Bruyne,29,https://cdn.sofifa.com/players/192/985/20_60.png,Belgium,https://cdn.sofifa.com/flags/be.png,91,91,Manchester City,https://cdn.sofifa.com/teams/10/light_30.png,87000000,370000,2304,Right,4.0,5.0,4.0,High/ High,PLAYER_BODY_TYPE_321,Yes,RCM,17.0,2015,,2023,1.8034,69.853168,94.0,82.0,55.0,94.0,82.0,88.0,85.0,83.0,93.0,92.0,77.0,76.0,78.0,91.0,76.0,91.0,63.0,89.0,74.0,91.0,76.0,66.0,88.0,94.0,84.0,91.0,,65.0,53.0,15.0,13.0,5.0,10.0,13.0,CAM,91.0,161000000.0,68.0,1,High,3.0


### Encoding Defensive Work Rate

In [12]:
transformed_fifa_df['Defensive Work Rate'] = transformed_fifa_df['Work Rate'].apply(lambda value: value.split('/')[1])

work_rate_mapping = {'High': 3, 'Medium': 2, 'Low': 1}

transformed_fifa_df['Encoded Defensive Work Rate'] = transformed_fifa_df['Defensive Work Rate'].str.strip().map(work_rate_mapping)

transformed_fifa_df.head(2)

Unnamed: 0,ID,Name,Age,Photo,Nationality,Flag,Overall,Potential,Club,Club Logo,Value (€),Wage (€),Special,Preferred Foot,International Reputation,Weak Foot,Skill Moves,Work Rate,Body Type,Real Face,Position,Jersey Number,Joined,Loaned From,Contract Valid Until,Height (m),Weight (kg),Crossing,Finishing,Heading Accuracy,Short Passing,Volleys,Dribbling,Curve,FK Accuracy,Long Passing,Ball Control,Acceleration,Sprint Speed,Agility,Reactions,Balance,Shot Power,Jumping,Stamina,Strength,Long Shots,Aggression,Interceptions,Positioning,Vision,Penalties,Composure,Marking,Standing Tackle,Sliding Tackle,GK Diving,GK Handling,GK Kicking,GK Positioning,GK Reflexes,Best Position,Best Overall Rating,Release Clause (€),Defensive Awareness,Encoded Preferred Foot,Offensive Work Rate,Encoded Offensive Work Rate,Defensive Work Rate,Encoded Defensive Work Rate
0,176580,L. Suárez,33,https://cdn.sofifa.com/players/176/580/20_60.png,Uruguay,https://cdn.sofifa.com/flags/uy.png,87,87,Atlético Madrid,https://cdn.sofifa.com/teams/240/light_30.png,31500000,115000,2316,Right,5.0,4.0,3.0,High/ Medium,PLAYER_BODY_TYPE_374,Yes,RS,9.0,2020,,2022,1.8288,86.18248,80.0,92.0,84.0,83.0,90.0,84.0,86.0,82.0,77.0,84.0,72.0,68.0,76.0,92.0,78.0,89.0,69.0,78.0,85.0,88.0,87.0,41.0,91.0,84.0,83.0,85.0,,45.0,38.0,27.0,25.0,31.0,33.0,37.0,ST,87.0,64599999.0,57.0,1,High,3.0,Medium,2.0
1,192985,K. De Bruyne,29,https://cdn.sofifa.com/players/192/985/20_60.png,Belgium,https://cdn.sofifa.com/flags/be.png,91,91,Manchester City,https://cdn.sofifa.com/teams/10/light_30.png,87000000,370000,2304,Right,4.0,5.0,4.0,High/ High,PLAYER_BODY_TYPE_321,Yes,RCM,17.0,2015,,2023,1.8034,69.853168,94.0,82.0,55.0,94.0,82.0,88.0,85.0,83.0,93.0,92.0,77.0,76.0,78.0,91.0,76.0,91.0,63.0,89.0,74.0,91.0,76.0,66.0,88.0,94.0,84.0,91.0,,65.0,53.0,15.0,13.0,5.0,10.0,13.0,CAM,91.0,161000000.0,68.0,1,High,3.0,High,3.0


### Encoding Real Face

In [13]:
real_face_mapping = {'No': 0, 'Yes': 1}

transformed_fifa_df['Encoded Real Face'] = transformed_fifa_df['Real Face'].map(real_face_mapping)

transformed_fifa_df.head(2)

Unnamed: 0,ID,Name,Age,Photo,Nationality,Flag,Overall,Potential,Club,Club Logo,Value (€),Wage (€),Special,Preferred Foot,International Reputation,Weak Foot,Skill Moves,Work Rate,Body Type,Real Face,Position,Jersey Number,Joined,Loaned From,Contract Valid Until,Height (m),Weight (kg),Crossing,Finishing,Heading Accuracy,Short Passing,Volleys,Dribbling,Curve,FK Accuracy,Long Passing,Ball Control,Acceleration,Sprint Speed,Agility,Reactions,Balance,Shot Power,Jumping,Stamina,Strength,Long Shots,Aggression,Interceptions,Positioning,Vision,Penalties,Composure,Marking,Standing Tackle,Sliding Tackle,GK Diving,GK Handling,GK Kicking,GK Positioning,GK Reflexes,Best Position,Best Overall Rating,Release Clause (€),Defensive Awareness,Encoded Preferred Foot,Offensive Work Rate,Encoded Offensive Work Rate,Defensive Work Rate,Encoded Defensive Work Rate,Encoded Real Face
0,176580,L. Suárez,33,https://cdn.sofifa.com/players/176/580/20_60.png,Uruguay,https://cdn.sofifa.com/flags/uy.png,87,87,Atlético Madrid,https://cdn.sofifa.com/teams/240/light_30.png,31500000,115000,2316,Right,5.0,4.0,3.0,High/ Medium,PLAYER_BODY_TYPE_374,Yes,RS,9.0,2020,,2022,1.8288,86.18248,80.0,92.0,84.0,83.0,90.0,84.0,86.0,82.0,77.0,84.0,72.0,68.0,76.0,92.0,78.0,89.0,69.0,78.0,85.0,88.0,87.0,41.0,91.0,84.0,83.0,85.0,,45.0,38.0,27.0,25.0,31.0,33.0,37.0,ST,87.0,64599999.0,57.0,1,High,3.0,Medium,2.0,1.0
1,192985,K. De Bruyne,29,https://cdn.sofifa.com/players/192/985/20_60.png,Belgium,https://cdn.sofifa.com/flags/be.png,91,91,Manchester City,https://cdn.sofifa.com/teams/10/light_30.png,87000000,370000,2304,Right,4.0,5.0,4.0,High/ High,PLAYER_BODY_TYPE_321,Yes,RCM,17.0,2015,,2023,1.8034,69.853168,94.0,82.0,55.0,94.0,82.0,88.0,85.0,83.0,93.0,92.0,77.0,76.0,78.0,91.0,76.0,91.0,63.0,89.0,74.0,91.0,76.0,66.0,88.0,94.0,84.0,91.0,,65.0,53.0,15.0,13.0,5.0,10.0,13.0,CAM,91.0,161000000.0,68.0,1,High,3.0,High,3.0,1.0


### Encoding Position

NOTE:
- 0: Goalkeeper (GK)
- 1: Defender (DEF)
- 2: Midfielder (MID)
- 3: Forward (FWD)
- 4: Substitute/Reserve (SUB/RES)

In [14]:
position_mapping = {
    'GK': 0,
    'LB': 1, 'LWB': 1, 'CB': 1, 'RCB': 1, 'LCB': 1, 'RB': 1, 'RWB': 1,
    'CDM': 2, 'LDM': 2, 'RDM': 2, 'CM': 2, 'LCM': 2, 'RCM': 2,
    'LM': 2, 'RM': 2, 'CAM': 2, 'LAM': 2, 'RAM': 2,
    'ST': 3, 'LS': 3, 'RS': 3, 'CF': 3, 'LW': 3, 'RW': 3, 'LF': 3, 'RF': 3,
    'SUB': 4, 'RES': 4,
}

transformed_fifa_df['Encoded Position'] = transformed_fifa_df['Position'].map(position_mapping)

transformed_fifa_df.head(2)

Unnamed: 0,ID,Name,Age,Photo,Nationality,Flag,Overall,Potential,Club,Club Logo,Value (€),Wage (€),Special,Preferred Foot,International Reputation,Weak Foot,Skill Moves,Work Rate,Body Type,Real Face,Position,Jersey Number,Joined,Loaned From,Contract Valid Until,Height (m),Weight (kg),Crossing,Finishing,Heading Accuracy,Short Passing,Volleys,Dribbling,Curve,FK Accuracy,Long Passing,Ball Control,Acceleration,Sprint Speed,Agility,Reactions,Balance,Shot Power,Jumping,Stamina,Strength,Long Shots,Aggression,Interceptions,Positioning,Vision,Penalties,Composure,Marking,Standing Tackle,Sliding Tackle,GK Diving,GK Handling,GK Kicking,GK Positioning,GK Reflexes,Best Position,Best Overall Rating,Release Clause (€),Defensive Awareness,Encoded Preferred Foot,Offensive Work Rate,Encoded Offensive Work Rate,Defensive Work Rate,Encoded Defensive Work Rate,Encoded Real Face,Encoded Position
0,176580,L. Suárez,33,https://cdn.sofifa.com/players/176/580/20_60.png,Uruguay,https://cdn.sofifa.com/flags/uy.png,87,87,Atlético Madrid,https://cdn.sofifa.com/teams/240/light_30.png,31500000,115000,2316,Right,5.0,4.0,3.0,High/ Medium,PLAYER_BODY_TYPE_374,Yes,RS,9.0,2020,,2022,1.8288,86.18248,80.0,92.0,84.0,83.0,90.0,84.0,86.0,82.0,77.0,84.0,72.0,68.0,76.0,92.0,78.0,89.0,69.0,78.0,85.0,88.0,87.0,41.0,91.0,84.0,83.0,85.0,,45.0,38.0,27.0,25.0,31.0,33.0,37.0,ST,87.0,64599999.0,57.0,1,High,3.0,Medium,2.0,1.0,3.0
1,192985,K. De Bruyne,29,https://cdn.sofifa.com/players/192/985/20_60.png,Belgium,https://cdn.sofifa.com/flags/be.png,91,91,Manchester City,https://cdn.sofifa.com/teams/10/light_30.png,87000000,370000,2304,Right,4.0,5.0,4.0,High/ High,PLAYER_BODY_TYPE_321,Yes,RCM,17.0,2015,,2023,1.8034,69.853168,94.0,82.0,55.0,94.0,82.0,88.0,85.0,83.0,93.0,92.0,77.0,76.0,78.0,91.0,76.0,91.0,63.0,89.0,74.0,91.0,76.0,66.0,88.0,94.0,84.0,91.0,,65.0,53.0,15.0,13.0,5.0,10.0,13.0,CAM,91.0,161000000.0,68.0,1,High,3.0,High,3.0,1.0,2.0


### Encoding Best Position

NOTE:
- 0: Goalkeeper (GK)
- 1: Defender (DEF)
- 2: Midfielder (MID)
- 3: Forward (FWD)
- 4: Substitute/Reserve (SUB/RES)

In [15]:
transformed_fifa_df['Encoded Best Position'] = transformed_fifa_df['Best Position'].map(position_mapping)

transformed_fifa_df.head(2)

Unnamed: 0,ID,Name,Age,Photo,Nationality,Flag,Overall,Potential,Club,Club Logo,Value (€),Wage (€),Special,Preferred Foot,International Reputation,Weak Foot,Skill Moves,Work Rate,Body Type,Real Face,Position,Jersey Number,Joined,Loaned From,Contract Valid Until,Height (m),Weight (kg),Crossing,Finishing,Heading Accuracy,Short Passing,Volleys,Dribbling,Curve,FK Accuracy,Long Passing,Ball Control,Acceleration,Sprint Speed,Agility,Reactions,Balance,Shot Power,Jumping,Stamina,Strength,Long Shots,Aggression,Interceptions,Positioning,Vision,Penalties,Composure,Marking,Standing Tackle,Sliding Tackle,GK Diving,GK Handling,GK Kicking,GK Positioning,GK Reflexes,Best Position,Best Overall Rating,Release Clause (€),Defensive Awareness,Encoded Preferred Foot,Offensive Work Rate,Encoded Offensive Work Rate,Defensive Work Rate,Encoded Defensive Work Rate,Encoded Real Face,Encoded Position,Encoded Best Position
0,176580,L. Suárez,33,https://cdn.sofifa.com/players/176/580/20_60.png,Uruguay,https://cdn.sofifa.com/flags/uy.png,87,87,Atlético Madrid,https://cdn.sofifa.com/teams/240/light_30.png,31500000,115000,2316,Right,5.0,4.0,3.0,High/ Medium,PLAYER_BODY_TYPE_374,Yes,RS,9.0,2020,,2022,1.8288,86.18248,80.0,92.0,84.0,83.0,90.0,84.0,86.0,82.0,77.0,84.0,72.0,68.0,76.0,92.0,78.0,89.0,69.0,78.0,85.0,88.0,87.0,41.0,91.0,84.0,83.0,85.0,,45.0,38.0,27.0,25.0,31.0,33.0,37.0,ST,87.0,64599999.0,57.0,1,High,3.0,Medium,2.0,1.0,3.0,3
1,192985,K. De Bruyne,29,https://cdn.sofifa.com/players/192/985/20_60.png,Belgium,https://cdn.sofifa.com/flags/be.png,91,91,Manchester City,https://cdn.sofifa.com/teams/10/light_30.png,87000000,370000,2304,Right,4.0,5.0,4.0,High/ High,PLAYER_BODY_TYPE_321,Yes,RCM,17.0,2015,,2023,1.8034,69.853168,94.0,82.0,55.0,94.0,82.0,88.0,85.0,83.0,93.0,92.0,77.0,76.0,78.0,91.0,76.0,91.0,63.0,89.0,74.0,91.0,76.0,66.0,88.0,94.0,84.0,91.0,,65.0,53.0,15.0,13.0,5.0,10.0,13.0,CAM,91.0,161000000.0,68.0,1,High,3.0,High,3.0,1.0,2.0,2


## Exporting Transformed Dataset

In [16]:
transformed_fifa_df.to_csv('../../../datasets/preprocessed/transformed.csv', index=False)
transformed_fifa_df.to_csv('../../../datasets/processed/processed.csv', index=False)