# Playing with the FIFA19 dataset

### Imports and loading dataset

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Dataset is stored as a .csv file in the same directory
df = pd.read_csv('./data.csv')
df.head(3)

Unnamed: 0.1,Unnamed: 0,ID,Name,Age,Photo,Nationality,Flag,Overall,Potential,Club,...,Composure,Marking,StandingTackle,SlidingTackle,GKDiving,GKHandling,GKKicking,GKPositioning,GKReflexes,Release Clause
0,0,158023,L. Messi,31,https://cdn.sofifa.org/players/4/19/158023.png,Argentina,https://cdn.sofifa.org/flags/52.png,94,94,FC Barcelona,...,96.0,33.0,28.0,26.0,6.0,11.0,15.0,14.0,8.0,€226.5M
1,1,20801,Cristiano Ronaldo,33,https://cdn.sofifa.org/players/4/19/20801.png,Portugal,https://cdn.sofifa.org/flags/38.png,94,94,Juventus,...,95.0,28.0,31.0,23.0,7.0,11.0,15.0,14.0,11.0,€127.1M
2,2,190871,Neymar Jr,26,https://cdn.sofifa.org/players/4/19/190871.png,Brazil,https://cdn.sofifa.org/flags/54.png,92,93,Paris Saint-Germain,...,94.0,27.0,24.0,33.0,9.0,9.0,15.0,15.0,11.0,€228.1M


### List of columns and general information of the dataset

In [2]:
df.columns.values

array(['Unnamed: 0', 'ID', 'Name', 'Age', 'Photo', 'Nationality', 'Flag',
       'Overall', 'Potential', 'Club', 'Club Logo', 'Value', 'Wage',
       'Special', 'Preferred Foot', 'International Reputation',
       'Weak Foot', 'Skill Moves', 'Work Rate', 'Body Type', 'Real Face',
       'Position', 'Jersey Number', 'Joined', 'Loaned From',
       'Contract Valid Until', 'Height', 'Weight', 'LS', 'ST', 'RS', 'LW',
       'LF', 'CF', 'RF', 'RW', 'LAM', 'CAM', 'RAM', 'LM', 'LCM', 'CM',
       'RCM', 'RM', 'LWB', 'LDM', 'CDM', 'RDM', 'RWB', 'LB', 'LCB', 'CB',
       'RCB', 'RB', 'Crossing', 'Finishing', 'HeadingAccuracy',
       'ShortPassing', 'Volleys', 'Dribbling', 'Curve', 'FKAccuracy',
       'LongPassing', 'BallControl', 'Acceleration', 'SprintSpeed',
       'Agility', 'Reactions', 'Balance', 'ShotPower', 'Jumping',
       'Stamina', 'Strength', 'LongShots', 'Aggression', 'Interceptions',
       'Positioning', 'Vision', 'Penalties', 'Composure', 'Marking',
       'StandingTackle', 'S

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18207 entries, 0 to 18206
Data columns (total 89 columns):
Unnamed: 0                  18207 non-null int64
ID                          18207 non-null int64
Name                        18207 non-null object
Age                         18207 non-null int64
Photo                       18207 non-null object
Nationality                 18207 non-null object
Flag                        18207 non-null object
Overall                     18207 non-null int64
Potential                   18207 non-null int64
Club                        17966 non-null object
Club Logo                   18207 non-null object
Value                       18207 non-null object
Wage                        18207 non-null object
Special                     18207 non-null int64
Preferred Foot              18159 non-null object
International Reputation    18159 non-null float64
Weak Foot                   18159 non-null float64
Skill Moves                 18159 non-null fl

In [4]:
df.describe

<bound method DataFrame.describe of        Unnamed: 0      ID                  Name  Age  \
0               0  158023              L. Messi   31   
1               1   20801     Cristiano Ronaldo   33   
2               2  190871             Neymar Jr   26   
3               3  193080                De Gea   27   
4               4  192985          K. De Bruyne   27   
5               5  183277             E. Hazard   27   
6               6  177003             L. Modrić   32   
7               7  176580             L. Suárez   31   
8               8  155862          Sergio Ramos   32   
9               9  200389              J. Oblak   25   
10             10  188545        R. Lewandowski   29   
11             11  182521              T. Kroos   28   
12             12  182493              D. Godín   32   
13             13  168542           David Silva   32   
14             14  215914              N. Kanté   27   
15             15  211110             P. Dybala   24   
16          

### We notice that this dataframe has around 18,000 rows and 89 columns. We will now decide what are the columns and rows we don't need

In [5]:
list_of_columns = df.columns.values
for i in list_of_columns:
    print(i)
    print(df[i].head(2))

Unnamed: 0
0    0
1    1
Name: Unnamed: 0, dtype: int64
ID
0    158023
1     20801
Name: ID, dtype: int64
Name
0             L. Messi
1    Cristiano Ronaldo
Name: Name, dtype: object
Age
0    31
1    33
Name: Age, dtype: int64
Photo
0    https://cdn.sofifa.org/players/4/19/158023.png
1     https://cdn.sofifa.org/players/4/19/20801.png
Name: Photo, dtype: object
Nationality
0    Argentina
1     Portugal
Name: Nationality, dtype: object
Flag
0    https://cdn.sofifa.org/flags/52.png
1    https://cdn.sofifa.org/flags/38.png
Name: Flag, dtype: object
Overall
0    94
1    94
Name: Overall, dtype: int64
Potential
0    94
1    94
Name: Potential, dtype: int64
Club
0    FC Barcelona
1        Juventus
Name: Club, dtype: object
Club Logo
0    https://cdn.sofifa.org/teams/2/light/241.png
1     https://cdn.sofifa.org/teams/2/light/45.png
Name: Club Logo, dtype: object
Value
0    €110.5M
1       €77M
Name: Value, dtype: object
Wage
0    €565K
1    €405K
Name: Wage, dtype: object
Special
0    2202
1 

In [6]:
unnecessary_columns = ['Photo','Flag','Club Logo','Special','International Reputation','Body Type','Real Face','Jersey Number','Joined','Loaned From','Contract Valid Until','Release Clause']
df.drop(unnecessary_columns, axis=1, inplace=True)

In [7]:
len(df.columns.values)

77

In [8]:
len(df)

18207

### We now remove all rows with any of its values as NULL

In [9]:
df.isna().sum()

Unnamed: 0           0
ID                   0
Name                 0
Age                  0
Nationality          0
Overall              0
Potential            0
Club               241
Value                0
Wage                 0
Preferred Foot      48
Weak Foot           48
Skill Moves         48
Work Rate           48
Position            60
Height              48
Weight              48
LS                2085
ST                2085
RS                2085
LW                2085
LF                2085
CF                2085
RF                2085
RW                2085
LAM               2085
CAM               2085
RAM               2085
LM                2085
LCM               2085
                  ... 
Volleys             48
Dribbling           48
Curve               48
FKAccuracy          48
LongPassing         48
BallControl         48
Acceleration        48
SprintSpeed         48
Agility             48
Reactions           48
Balance             48
ShotPower           48
Jumping    

In [10]:
for i in df.columns.values:
    print(i),
    print(df[i].isna().sum())

Unnamed: 0 0
ID 0
Name 0
Age 0
Nationality 0
Overall 0
Potential 0
Club 241
Value 0
Wage 0
Preferred Foot 48
Weak Foot 48
Skill Moves 48
Work Rate 48
Position 60
Height 48
Weight 48
LS 2085
ST 2085
RS 2085
LW 2085
LF 2085
CF 2085
RF 2085
RW 2085
LAM 2085
CAM 2085
RAM 2085
LM 2085
LCM 2085
CM 2085
RCM 2085
RM 2085
LWB 2085
LDM 2085
CDM 2085
RDM 2085
RWB 2085
LB 2085
LCB 2085
CB 2085
RCB 2085
RB 2085
Crossing 48
Finishing 48
HeadingAccuracy 48
ShortPassing 48
Volleys 48
Dribbling 48
Curve 48
FKAccuracy 48
LongPassing 48
BallControl 48
Acceleration 48
SprintSpeed 48
Agility 48
Reactions 48
Balance 48
ShotPower 48
Jumping 48
Stamina 48
Strength 48
LongShots 48
Aggression 48
Interceptions 48
Positioning 48
Vision 48
Penalties 48
Composure 48
Marking 48
StandingTackle 48
SlidingTackle 48
GKDiving 48
GKHandling 48
GKKicking 48
GKPositioning 48
GKReflexes 48


In [11]:
df.dropna(how='any',axis=0,inplace=True) 

In [12]:
for i in df.columns.values:
    print(i),
    print(df[i].isna().sum())

Unnamed: 0 0
ID 0
Name 0
Age 0
Nationality 0
Overall 0
Potential 0
Club 0
Value 0
Wage 0
Preferred Foot 0
Weak Foot 0
Skill Moves 0
Work Rate 0
Position 0
Height 0
Weight 0
LS 0
ST 0
RS 0
LW 0
LF 0
CF 0
RF 0
RW 0
LAM 0
CAM 0
RAM 0
LM 0
LCM 0
CM 0
RCM 0
RM 0
LWB 0
LDM 0
CDM 0
RDM 0
RWB 0
LB 0
LCB 0
CB 0
RCB 0
RB 0
Crossing 0
Finishing 0
HeadingAccuracy 0
ShortPassing 0
Volleys 0
Dribbling 0
Curve 0
FKAccuracy 0
LongPassing 0
BallControl 0
Acceleration 0
SprintSpeed 0
Agility 0
Reactions 0
Balance 0
ShotPower 0
Jumping 0
Stamina 0
Strength 0
LongShots 0
Aggression 0
Interceptions 0
Positioning 0
Vision 0
Penalties 0
Composure 0
Marking 0
StandingTackle 0
SlidingTackle 0
GKDiving 0
GKHandling 0
GKKicking 0
GKPositioning 0
GKReflexes 0


### Let's now get to processing the data.
### But, before that, let's analyse pitfalls that may occur during this process

In [14]:
for i in df.columns.values:
    print(i),
    print(df[i].dtype)

Unnamed: 0 int64
ID int64
Name object
Age int64
Nationality object
Overall int64
Potential int64
Club object
Value object
Wage object
Preferred Foot object
Weak Foot float64
Skill Moves float64
Work Rate object
Position object
Height object
Weight object
LS object
ST object
RS object
LW object
LF object
CF object
RF object
RW object
LAM object
CAM object
RAM object
LM object
LCM object
CM object
RCM object
RM object
LWB object
LDM object
CDM object
RDM object
RWB object
LB object
LCB object
CB object
RCB object
RB object
Crossing float64
Finishing float64
HeadingAccuracy float64
ShortPassing float64
Volleys float64
Dribbling float64
Curve float64
FKAccuracy float64
LongPassing float64
BallControl float64
Acceleration float64
SprintSpeed float64
Agility float64
Reactions float64
Balance float64
ShotPower float64
Jumping float64
Stamina float64
Strength float64
LongShots float64
Aggression float64
Interceptions float64
Positioning float64
Vision float64
Penalties float64
Composure float6

### ```Value``` and ```Wage``` columns are of type 'object'; we would like to make them into numeric types

In [15]:
df['Value'].head(3)

0    €110.5M
1       €77M
2    €118.5M
Name: Value, dtype: object

In [29]:
df['Wage'].head((3)

0    €565K
1    €405K
2    €290K
Name: Wage, dtype: object

In [30]:
million = df[df['Value'].str.contains("M")]
thousand = df[df['Value'].str.contains("K")]
print(len(million))
print(len(thousand))


6360
9558


In [31]:
million[:3]

Unnamed: 0.1,Unnamed: 0,ID,Name,Age,Nationality,Overall,Potential,Club,Value,Wage,...,Penalties,Composure,Marking,StandingTackle,SlidingTackle,GKDiving,GKHandling,GKKicking,GKPositioning,GKReflexes
0,0,158023,L. Messi,31,Argentina,94,94,FC Barcelona,€110.5M,€565K,...,75.0,96.0,33.0,28.0,26.0,6.0,11.0,15.0,14.0,8.0
1,1,20801,Cristiano Ronaldo,33,Portugal,94,94,Juventus,€77M,€405K,...,85.0,95.0,28.0,31.0,23.0,7.0,11.0,15.0,14.0,11.0
2,2,190871,Neymar Jr,26,Brazil,92,93,Paris Saint-Germain,€118.5M,€290K,...,81.0,94.0,27.0,24.0,33.0,9.0,9.0,15.0,15.0,11.0


In [32]:
thousand[-4:-2]

Unnamed: 0.1,Unnamed: 0,ID,Name,Age,Nationality,Overall,Potential,Club,Value,Wage,...,Penalties,Composure,Marking,StandingTackle,SlidingTackle,GKDiving,GKHandling,GKKicking,GKPositioning,GKReflexes
18203,18203,243165,N. Christoffersson,19,Sweden,47,63,Trelleborgs FF,€60K,€1K,...,43.0,42.0,22.0,15.0,19.0,10.0,9.0,9.0,5.0,12.0
18204,18204,241638,B. Worman,16,England,47,67,Cambridge United,€60K,€1K,...,55.0,41.0,32.0,13.0,11.0,6.0,5.0,10.0,6.0,13.0


### Simply converting a value/wage like ```100.5K``` into ```100.5``` will not be enough as we have both M and K in ```values```

In [57]:
def convert_value(s):
    s = s.replace('€','')    
    x = float(s[:-1])
    if(s[-1]=='K'):
        return x*1000
    else:
        return x*1000000

In [59]:
convert_value('€100.5M')

100500000.0

In [60]:
million_wage = df[df['Wage'].str.contains("M")]
thousand_wage = df[df['Wage'].str.contains("K")]
print(len(million_wage))
print(len(thousand_wage))

0
15926


In [61]:
df['Wage'].apply(lambda x: convert_value(x))

0        565000.0
1        405000.0
2        290000.0
4        355000.0
5        340000.0
6        420000.0
7        455000.0
8        380000.0
10       205000.0
11       355000.0
12       125000.0
13       285000.0
14       225000.0
15       205000.0
16       205000.0
17       145000.0
20       315000.0
21       200000.0
23       300000.0
24       215000.0
25       100000.0
26       255000.0
27       285000.0
28       315000.0
29       165000.0
30       315000.0
31       205000.0
32       340000.0
33       265000.0
34       160000.0
           ...   
18171      1000.0
18173      1000.0
18174      1000.0
18175      1000.0
18176      1000.0
18177      1000.0
18179      1000.0
18181      1000.0
18182      1000.0
18184      1000.0
18185      1000.0
18186      1000.0
18187      1000.0
18188      1000.0
18189      1000.0
18190      1000.0
18191      1000.0
18192      1000.0
18193      1000.0
18195      1000.0
18196      1000.0
18197      1000.0
18199      1000.0
18200      1000.0
18201     

In [62]:
df['Wage'].head()

0    €565K
1    €405K
2    €290K
4    €355K
5    €340K
Name: Wage, dtype: object

In [63]:
df['Wage'] = df['Wage'].apply(lambda x: convert_value(x))

In [64]:
df['Wage'].head()

0    565000.0
1    405000.0
2    290000.0
4    355000.0
5    340000.0
Name: Wage, dtype: float64

In [67]:
convert_value(df['Value'][0])

110500000.0

In [68]:
convert_value(df['Value'][1])

77000000.0

In [71]:
convert_value('€100K')

100000.0

### Now, notice that the positional attributes are of type 'float' instead of 'int64'. That won't mess with our analysis, so we will let that slide.

In [81]:
for i in df.columns.values:
    print(i),
    print(df[i].dtype)
    

Unnamed: 0 int64
ID int64
Name object
Age int64
Nationality object
Overall int64
Potential int64
Club object
Value object
Wage float64
Preferred Foot object
Weak Foot float64
Skill Moves float64
Work Rate object
Position object
Height object
Weight object
LS object
ST object
RS object
LW object
LF object
CF object
RF object
RW object
LAM object
CAM object
RAM object
LM object
LCM object
CM object
RCM object
RM object
LWB object
LDM object
CDM object
RDM object
RWB object
LB object
LCB object
CB object
RCB object
RB object
Crossing float64
Finishing float64
HeadingAccuracy float64
ShortPassing float64
Volleys float64
Dribbling float64
Curve float64
FKAccuracy float64
LongPassing float64
BallControl float64
Acceleration float64
SprintSpeed float64
Agility float64
Reactions float64
Balance float64
ShotPower float64
Jumping float64
Stamina float64
Strength float64
LongShots float64
Aggression float64
Interceptions float64
Positioning float64
Vision float64
Penalties float64
Composure float

In [82]:
plus_columns=[
'LS',
'ST',
'RS',
'LW',
'LF',
'CF',
'RF',
'RW',
'LAM',
'CAM',
'RAM',
'LM',
'LCM',
'CM',
'RCM',
'RM',
'LWB',
'LDM',
'CDM',
'RDM',
'RWB',
'LB',
'LCB',
'CB',
'RCB',
'RB'
]

In [83]:
for i in plus_columns:
    print(df[i].head(2))

0    88+2
1    91+3
Name: LS, dtype: object
0    88+2
1    91+3
Name: ST, dtype: object
0    88+2
1    91+3
Name: RS, dtype: object
0    92+2
1    89+3
Name: LW, dtype: object
0    93+2
1    90+3
Name: LF, dtype: object
0    93+2
1    90+3
Name: CF, dtype: object
0    93+2
1    90+3
Name: RF, dtype: object
0    92+2
1    89+3
Name: RW, dtype: object
0    93+2
1    88+3
Name: LAM, dtype: object
0    93+2
1    88+3
Name: CAM, dtype: object
0    93+2
1    88+3
Name: RAM, dtype: object
0    91+2
1    88+3
Name: LM, dtype: object
0    84+2
1    81+3
Name: LCM, dtype: object
0    84+2
1    81+3
Name: CM, dtype: object
0    84+2
1    81+3
Name: RCM, dtype: object
0    91+2
1    88+3
Name: RM, dtype: object
0    64+2
1    65+3
Name: LWB, dtype: object
0    61+2
1    61+3
Name: LDM, dtype: object
0    61+2
1    61+3
Name: CDM, dtype: object
0    61+2
1    61+3
Name: RDM, dtype: object
0    64+2
1    65+3
Name: RWB, dtype: object
0    59+2
1    61+3
Name: LB, dtype: object
0    47+2
1    53+3
Na

### We see that these values are of type ```object``` and we need to make them into ```int```

In [84]:
k=0
for i in df['LS']:
    if len(i)==4:
        k+=1
        
print(k-len(df['LS']))

0


In [85]:
for j in plus_columns:
    k=0
    for i in df[j]:
        if len(i)==4:
            k+=1

    print(k-len(df['LS']))

0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0


In [104]:
def convert_to_int(s):
    return (int(s[0:2])+int(s[-1]))

convert_to_int('14+3')

17

In [107]:
df['LS'].head(3)

0    90
1    94
2    87
Name: LS, dtype: int64

In [108]:
test = df['ST']
test = test.apply(lambda x: convert_to_int(x))

In [109]:
test

0        90
1        94
2        87
4        85
5        86
6        80
7        92
8        76
10       90
11       81
12       67
13       80
14       75
15       86
16       89
17       89
20       74
21       88
23       89
24       61
25       88
26       86
27       75
28       83
29       81
30       79
31       82
32       82
33       87
34       72
         ..
18171    42
18173    46
18174    48
18175    39
18176    47
18177    49
18179    36
18181    46
18182    42
18184    43
18185    45
18186    45
18187    48
18188    47
18189    47
18190    44
18191    41
18192    46
18193    37
18195    46
18196    42
18197    47
18199    44
18200    47
18201    34
18202    44
18203    47
18204    47
18205    49
18206    45
Name: ST, Length: 15926, dtype: int64

In [110]:
df['ST'] = df['ST'].apply(lambda x:convert_to_int(x))

In [111]:
df['ST'].head()

0    90
1    94
2    87
4    85
5    86
Name: ST, dtype: int64

In [113]:
plus_columns.pop(0)

'LS'

In [114]:
plus_columns.pop(0)

'ST'

In [115]:
plus_columns

['RS',
 'LW',
 'LF',
 'CF',
 'RF',
 'RW',
 'LAM',
 'CAM',
 'RAM',
 'LM',
 'LCM',
 'CM',
 'RCM',
 'RM',
 'LWB',
 'LDM',
 'CDM',
 'RDM',
 'RWB',
 'LB',
 'LCB',
 'CB',
 'RCB',
 'RB']

In [116]:
for i in plus_columns:
    df[i] = df[i].apply(lambda x:convert_to_int(x))

In [117]:
plus_columns_new = ['LS','ST']
plus_columns_new = plus_columns_new+plus_columns

for i in plus_columns_new:
    print(df[i].head(2))

0    90
1    94
Name: LS, dtype: int64
0    90
1    94
Name: ST, dtype: int64
0    90
1    94
Name: RS, dtype: int64
0    94
1    92
Name: LW, dtype: int64
0    95
1    93
Name: LF, dtype: int64
0    95
1    93
Name: CF, dtype: int64
0    95
1    93
Name: RF, dtype: int64
0    94
1    92
Name: RW, dtype: int64
0    95
1    91
Name: LAM, dtype: int64
0    95
1    91
Name: CAM, dtype: int64
0    95
1    91
Name: RAM, dtype: int64
0    93
1    91
Name: LM, dtype: int64
0    86
1    84
Name: LCM, dtype: int64
0    86
1    84
Name: CM, dtype: int64
0    86
1    84
Name: RCM, dtype: int64
0    93
1    91
Name: RM, dtype: int64
0    66
1    68
Name: LWB, dtype: int64
0    63
1    64
Name: LDM, dtype: int64
0    63
1    64
Name: CDM, dtype: int64
0    63
1    64
Name: RDM, dtype: int64
0    66
1    68
Name: RWB, dtype: int64
0    61
1    64
Name: LB, dtype: int64
0    49
1    56
Name: LCB, dtype: int64
0    49
1    56
Name: CB, dtype: int64
0    49
1    56
Name: RCB, dtype: int64
0    61
1    

### Let us take one more look at the data types and see if we need to make any changes to the columns

In [118]:
for i in df.columns.values:
    print(i),
    print(df[i].dtype)

Unnamed: 0 int64
ID int64
Name object
Age int64
Nationality object
Overall int64
Potential int64
Club object
Value object
Wage float64
Preferred Foot object
Weak Foot float64
Skill Moves float64
Work Rate object
Position object
Height object
Weight object
LS int64
ST int64
RS int64
LW int64
LF int64
CF int64
RF int64
RW int64
LAM int64
CAM int64
RAM int64
LM int64
LCM int64
CM int64
RCM int64
RM int64
LWB int64
LDM int64
CDM int64
RDM int64
RWB int64
LB int64
LCB int64
CB int64
RCB int64
RB int64
Crossing float64
Finishing float64
HeadingAccuracy float64
ShortPassing float64
Volleys float64
Dribbling float64
Curve float64
FKAccuracy float64
LongPassing float64
BallControl float64
Acceleration float64
SprintSpeed float64
Agility float64
Reactions float64
Balance float64
ShotPower float64
Jumping float64
Stamina float64
Strength float64
LongShots float64
Aggression float64
Interceptions float64
Positioning float64
Vision float64
Penalties float64
Composure float64
Marking float64
Standin

### Heights and weights are objects; time to change that

In [119]:
df['Height'].head()

0     5'7
1     6'2
2     5'9
4    5'11
5     5'8
Name: Height, dtype: object

In [120]:
df['Weight'].head()

0    159lbs
1    183lbs
2    150lbs
4    154lbs
5    163lbs
Name: Weight, dtype: object

In [124]:
def convert_weights(s):
    s=s.replace("lbs","")
    return int(s)

In [125]:
convert_weights('159lbs')

159

In [126]:
df['Weight'] = df['Weight'].apply(lambda x: convert_weights(x))

In [127]:
df['Weight'].head(3)

0    159
1    183
2    150
Name: Weight, dtype: int64

In [128]:
def convert_heights(s):
    l = s.split("'")
    x = l[0]
    y = l[1]
    return int(x)*12 + int(y)

In [129]:
convert_heights("5'7")

67

In [130]:
df['Height'] = df['Height'].apply(lambda x: convert_heights(x))

In [131]:
df['Height'].head(2)

0    67
1    74
Name: Height, dtype: int64

### Let's look at the data types again

In [132]:
for i in df.columns.values:
    print(i),
    print(df[i].dtype)

Unnamed: 0 int64
ID int64
Name object
Age int64
Nationality object
Overall int64
Potential int64
Club object
Value object
Wage float64
Preferred Foot object
Weak Foot float64
Skill Moves float64
Work Rate object
Position object
Height int64
Weight int64
LS int64
ST int64
RS int64
LW int64
LF int64
CF int64
RF int64
RW int64
LAM int64
CAM int64
RAM int64
LM int64
LCM int64
CM int64
RCM int64
RM int64
LWB int64
LDM int64
CDM int64
RDM int64
RWB int64
LB int64
LCB int64
CB int64
RCB int64
RB int64
Crossing float64
Finishing float64
HeadingAccuracy float64
ShortPassing float64
Volleys float64
Dribbling float64
Curve float64
FKAccuracy float64
LongPassing float64
BallControl float64
Acceleration float64
SprintSpeed float64
Agility float64
Reactions float64
Balance float64
ShotPower float64
Jumping float64
Stamina float64
Strength float64
LongShots float64
Aggression float64
Interceptions float64
Positioning float64
Vision float64
Penalties float64
Composure float64
Marking float64
StandingT

### Let's convert the ```float``` values to ```int```. We did mention previously that this was unnecessary, but let's do it 

In [133]:
float_columns=[]
for i in df.columns.values:
    if(df[i].dtype == 'float64'):
        float_columns.append(i)

In [139]:
float_columns

['Wage',
 'Weak Foot',
 'Skill Moves',
 'Crossing',
 'Finishing',
 'HeadingAccuracy',
 'ShortPassing',
 'Volleys',
 'Dribbling',
 'Curve',
 'FKAccuracy',
 'LongPassing',
 'BallControl',
 'Acceleration',
 'SprintSpeed',
 'Agility',
 'Reactions',
 'Balance',
 'ShotPower',
 'Jumping',
 'Stamina',
 'Strength',
 'LongShots',
 'Aggression',
 'Interceptions',
 'Positioning',
 'Vision',
 'Penalties',
 'Composure',
 'Marking',
 'StandingTackle',
 'SlidingTackle',
 'GKDiving',
 'GKHandling',
 'GKKicking',
 'GKPositioning',
 'GKReflexes']

In [140]:
float_columns.pop(0)

'Wage'

In [141]:
float_columns

['Weak Foot',
 'Skill Moves',
 'Crossing',
 'Finishing',
 'HeadingAccuracy',
 'ShortPassing',
 'Volleys',
 'Dribbling',
 'Curve',
 'FKAccuracy',
 'LongPassing',
 'BallControl',
 'Acceleration',
 'SprintSpeed',
 'Agility',
 'Reactions',
 'Balance',
 'ShotPower',
 'Jumping',
 'Stamina',
 'Strength',
 'LongShots',
 'Aggression',
 'Interceptions',
 'Positioning',
 'Vision',
 'Penalties',
 'Composure',
 'Marking',
 'StandingTackle',
 'SlidingTackle',
 'GKDiving',
 'GKHandling',
 'GKKicking',
 'GKPositioning',
 'GKReflexes']

In [142]:
for i in float_columns:
    print(df[i].head(2))

0    4.0
1    4.0
Name: Weak Foot, dtype: float64
0    4.0
1    5.0
Name: Skill Moves, dtype: float64
0    84.0
1    84.0
Name: Crossing, dtype: float64
0    95.0
1    94.0
Name: Finishing, dtype: float64
0    70.0
1    89.0
Name: HeadingAccuracy, dtype: float64
0    90.0
1    81.0
Name: ShortPassing, dtype: float64
0    86.0
1    87.0
Name: Volleys, dtype: float64
0    97.0
1    88.0
Name: Dribbling, dtype: float64
0    93.0
1    81.0
Name: Curve, dtype: float64
0    94.0
1    76.0
Name: FKAccuracy, dtype: float64
0    87.0
1    77.0
Name: LongPassing, dtype: float64
0    96.0
1    94.0
Name: BallControl, dtype: float64
0    91.0
1    89.0
Name: Acceleration, dtype: float64
0    86.0
1    91.0
Name: SprintSpeed, dtype: float64
0    91.0
1    87.0
Name: Agility, dtype: float64
0    95.0
1    96.0
Name: Reactions, dtype: float64
0    95.0
1    70.0
Name: Balance, dtype: float64
0    85.0
1    95.0
Name: ShotPower, dtype: float64
0    68.0
1    95.0
Name: Jumping, dtype: float64
0    72.

In [143]:
test2 = df['SlidingTackle']
test2 = test2.apply(lambda(x): int(x))

In [144]:
test2

0        26
1        23
2        33
4        51
5        22
6        73
7        38
8        91
10       19
11       69
12       89
13       29
14       85
15       20
16       38
17       48
20       80
21       39
23       12
24       90
25       32
26       41
27       87
28       44
29       22
30       51
31       22
32       47
33       36
34       88
         ..
18171    42
18173    46
18174    41
18175    49
18176    25
18177    11
18179    49
18181    45
18182    46
18184    53
18185    48
18186    51
18187    42
18188    41
18189    22
18190    47
18191    46
18192    44
18193    48
18195    48
18196    54
18197    38
18199    49
18200    14
18201    53
18202    47
18203    19
18204    11
18205    27
18206    50
Name: SlidingTackle, Length: 15926, dtype: int64

In [145]:
for i in float_columns:
    df[i] = df[i].apply(lambda(x): int(x))


In [146]:
for i in float_columns:
    print(df[i].head(2))

0    4
1    4
Name: Weak Foot, dtype: int64
0    4
1    5
Name: Skill Moves, dtype: int64
0    84
1    84
Name: Crossing, dtype: int64
0    95
1    94
Name: Finishing, dtype: int64
0    70
1    89
Name: HeadingAccuracy, dtype: int64
0    90
1    81
Name: ShortPassing, dtype: int64
0    86
1    87
Name: Volleys, dtype: int64
0    97
1    88
Name: Dribbling, dtype: int64
0    93
1    81
Name: Curve, dtype: int64
0    94
1    76
Name: FKAccuracy, dtype: int64
0    87
1    77
Name: LongPassing, dtype: int64
0    96
1    94
Name: BallControl, dtype: int64
0    91
1    89
Name: Acceleration, dtype: int64
0    86
1    91
Name: SprintSpeed, dtype: int64
0    91
1    87
Name: Agility, dtype: int64
0    95
1    96
Name: Reactions, dtype: int64
0    95
1    70
Name: Balance, dtype: int64
0    85
1    95
Name: ShotPower, dtype: int64
0    68
1    95
Name: Jumping, dtype: int64
0    72
1    88
Name: Stamina, dtype: int64
0    59
1    79
Name: Strength, dtype: int64
0    94
1    93
Name: LongShots, 

In [147]:
for i in df.columns.values:
    print(i),
    print(df[i].dtype)

Unnamed: 0 int64
ID int64
Name object
Age int64
Nationality object
Overall int64
Potential int64
Club object
Value object
Wage float64
Preferred Foot object
Weak Foot int64
Skill Moves int64
Work Rate object
Position object
Height int64
Weight int64
LS int64
ST int64
RS int64
LW int64
LF int64
CF int64
RF int64
RW int64
LAM int64
CAM int64
RAM int64
LM int64
LCM int64
CM int64
RCM int64
RM int64
LWB int64
LDM int64
CDM int64
RDM int64
RWB int64
LB int64
LCB int64
CB int64
RCB int64
RB int64
Crossing int64
Finishing int64
HeadingAccuracy int64
ShortPassing int64
Volleys int64
Dribbling int64
Curve int64
FKAccuracy int64
LongPassing int64
BallControl int64
Acceleration int64
SprintSpeed int64
Agility int64
Reactions int64
Balance int64
ShotPower int64
Jumping int64
Stamina int64
Strength int64
LongShots int64
Aggression int64
Interceptions int64
Positioning int64
Vision int64
Penalties int64
Composure int64
Marking int64
StandingTackle int64
SlidingTackle int64
GKDiving int64
GKHandling 

In [148]:
numerical_columns=[]
for i in df.columns.values:
    if df[i].dtype == 'int64':
        numerical_columns.append(i)

In [149]:
numerical_columns

['Unnamed: 0',
 'ID',
 'Age',
 'Overall',
 'Potential',
 'Weak Foot',
 'Skill Moves',
 'Height',
 'Weight',
 'LS',
 'ST',
 'RS',
 'LW',
 'LF',
 'CF',
 'RF',
 'RW',
 'LAM',
 'CAM',
 'RAM',
 'LM',
 'LCM',
 'CM',
 'RCM',
 'RM',
 'LWB',
 'LDM',
 'CDM',
 'RDM',
 'RWB',
 'LB',
 'LCB',
 'CB',
 'RCB',
 'RB',
 'Crossing',
 'Finishing',
 'HeadingAccuracy',
 'ShortPassing',
 'Volleys',
 'Dribbling',
 'Curve',
 'FKAccuracy',
 'LongPassing',
 'BallControl',
 'Acceleration',
 'SprintSpeed',
 'Agility',
 'Reactions',
 'Balance',
 'ShotPower',
 'Jumping',
 'Stamina',
 'Strength',
 'LongShots',
 'Aggression',
 'Interceptions',
 'Positioning',
 'Vision',
 'Penalties',
 'Composure',
 'Marking',
 'StandingTackle',
 'SlidingTackle',
 'GKDiving',
 'GKHandling',
 'GKKicking',
 'GKPositioning',
 'GKReflexes']

In [150]:
numerical_columns.pop(0)

'Unnamed: 0'

In [151]:
numerical_columns.pop(0)

'ID'

In [152]:
numerical_columns

['Age',
 'Overall',
 'Potential',
 'Weak Foot',
 'Skill Moves',
 'Height',
 'Weight',
 'LS',
 'ST',
 'RS',
 'LW',
 'LF',
 'CF',
 'RF',
 'RW',
 'LAM',
 'CAM',
 'RAM',
 'LM',
 'LCM',
 'CM',
 'RCM',
 'RM',
 'LWB',
 'LDM',
 'CDM',
 'RDM',
 'RWB',
 'LB',
 'LCB',
 'CB',
 'RCB',
 'RB',
 'Crossing',
 'Finishing',
 'HeadingAccuracy',
 'ShortPassing',
 'Volleys',
 'Dribbling',
 'Curve',
 'FKAccuracy',
 'LongPassing',
 'BallControl',
 'Acceleration',
 'SprintSpeed',
 'Agility',
 'Reactions',
 'Balance',
 'ShotPower',
 'Jumping',
 'Stamina',
 'Strength',
 'LongShots',
 'Aggression',
 'Interceptions',
 'Positioning',
 'Vision',
 'Penalties',
 'Composure',
 'Marking',
 'StandingTackle',
 'SlidingTackle',
 'GKDiving',
 'GKHandling',
 'GKKicking',
 'GKPositioning',
 'GKReflexes']

### Now, let us print out all the attributes of a given player

In [154]:
def print_attributes_of_a_player(playerName,attributes):
    try:
        player = df[df['Name']==playerName]
        for i in attributes:
            print(i)
            print(player[i][0])
    except:
        print('Player not found')


In [155]:
df['Name'].head()

0             L. Messi
1    Cristiano Ronaldo
2            Neymar Jr
4         K. De Bruyne
5            E. Hazard
Name: Name, dtype: object

In [157]:
print_attributes_of_a_player('L. Messi',numerical_columns)

Age
31
Overall
94
Potential
94
Weak Foot
4
Skill Moves
4
Height
67
Weight
159
LS
90
ST
90
RS
90
LW
94
LF
95
CF
95
RF
95
RW
94
LAM
95
CAM
95
RAM
95
LM
93
LCM
86
CM
86
RCM
86
RM
93
LWB
66
LDM
63
CDM
63
RDM
63
RWB
66
LB
61
LCB
49
CB
49
RCB
49
RB
61
Crossing
84
Finishing
95
HeadingAccuracy
70
ShortPassing
90
Volleys
86
Dribbling
97
Curve
93
FKAccuracy
94
LongPassing
87
BallControl
96
Acceleration
91
SprintSpeed
86
Agility
91
Reactions
95
Balance
95
ShotPower
85
Jumping
68
Stamina
72
Strength
59
LongShots
94
Aggression
48
Interceptions
22
Positioning
94
Vision
94
Penalties
75
Composure
96
Marking
33
StandingTackle
28
SlidingTackle
26
GKDiving
6
GKHandling
11
GKKicking
15
GKPositioning
14
GKReflexes
8


In [158]:
df.head()

Unnamed: 0.1,Unnamed: 0,ID,Name,Age,Nationality,Overall,Potential,Club,Value,Wage,...,Penalties,Composure,Marking,StandingTackle,SlidingTackle,GKDiving,GKHandling,GKKicking,GKPositioning,GKReflexes
0,0,158023,L. Messi,31,Argentina,94,94,FC Barcelona,€110.5M,565000.0,...,75,96,33,28,26,6,11,15,14,8
1,1,20801,Cristiano Ronaldo,33,Portugal,94,94,Juventus,€77M,405000.0,...,85,95,28,31,23,7,11,15,14,11
2,2,190871,Neymar Jr,26,Brazil,92,93,Paris Saint-Germain,€118.5M,290000.0,...,81,94,27,24,33,9,9,15,15,11
4,4,192985,K. De Bruyne,27,Belgium,91,92,Manchester City,€102M,355000.0,...,79,88,68,58,51,15,13,5,10,13
5,5,183277,E. Hazard,27,Belgium,91,91,Chelsea,€93M,340000.0,...,86,91,34,27,22,11,12,6,8,8
