In [2]:
import pandas as pd
import numpy as np

from ipyvizzu import Chart, Data, Config, Style
from ipyvizzustory import Story, Slide, Step

#Create data object, read csv to data frame and add data frame to data object.
data = Data()

df = pd.read_csv("./premier-league.csv", dtype={"year": str})

#Rename columns
df.rename(columns={"fee": "fee_raw", "fee_cleaned": "fee[m€]"}, inplace = True)

# # convert 'Age' from float to int
df['age'] = df['age'].fillna(0)
df['age'] = df['age'].astype(int)

df['fee[m€]'] = df['fee[m€]'].fillna(0)

# #add negative fee for players boughm€t
df['inorout'] = np.where(df['transfer_movement'] == 'in', -1, 1)
df['fee_real[m€]'] = df['fee[m€]'] * df['inorout']

#categorize transfers (purchase, loan, end of loan, other)

#sort by club_name
df.sort_values(by=['year','club_name'], inplace=True)
df.head(10)

Unnamed: 0,club_name,player_name,age,position,club_involved_name,fee_raw,transfer_movement,transfer_period,fee[m€],league_name,year,season,country,inorout,fee_real[m€]
114,Arsenal FC,John Jensen,27,Central Midfield,Bröndby IF,€1.60m,in,Summer,1.6,Premier League,1992,1992/1993,England,-1,-1.6
115,Arsenal FC,Gary McKeown,21,midfield,Shrewsbury,"End of loanJun 30, 1992",in,Summer,0.0,Premier League,1992,1992/1993,England,-1,-0.0
116,Arsenal FC,Ian Selley,18,Central Midfield,Arsenal U18,-,in,Summer,0.0,Premier League,1992,1992/1993,England,-1,-0.0
117,Arsenal FC,Alan Miller,23,Goalkeeper,Birmingham,"End of loanMay 1, 1993",in,Summer,0.0,Premier League,1992,1992/1993,England,-1,-0.0
118,Arsenal FC,Steve Morrow,21,Right-Back,Barnet,"End of loanJun 30, 1992",in,Summer,0.0,Premier League,1992,1992/1993,England,-1,-0.0
119,Arsenal FC,John Bacon,19,Centre-Forward,Shamrock Rovers,"End of loanJun 1, 1992",in,Summer,0.0,Premier League,1992,1992/1993,England,-1,-0.0
120,Arsenal FC,David Rocastle,25,Attacking Midfield,Leeds,€2.40m,out,Summer,2.4,Premier League,1992,1992/1993,England,1,2.4
121,Arsenal FC,Perry Groves,27,Left Winger,Southampton,€850Th.,out,Summer,0.85,Premier League,1992,1992/1993,England,1,0.85
122,Arsenal FC,Ty Gooden,19,midfield,Wycombe,?,out,Summer,0.0,Premier League,1992,1992/1993,England,1,0.0
123,Arsenal FC,Gary McKeown,21,midfield,Dundee FC,?,out,Summer,0.0,Premier League,1992,1992/1993,England,1,0.0


In [2]:
df.dtypes

club_name              object
player_name            object
age                     int32
position               object
club_involved_name     object
fee_raw                object
transfer_movement      object
transfer_period        object
fee[m€]               float64
league_name            object
year                   object
season                 object
country                object
inorout                 int32
fee_real[m€]          float64
dtype: object

In [None]:
print (df.fee_raw.unique().tolist())

In [None]:
len(df.index)

In [4]:
#check against transfermarkt data
filtered_values = np.where((df['year'] == '2020') & (df['club_name'] == 'Arsenal FC') & (df['transfer_movement'] == 'in'))
display(df.loc[filtered_values])

Unnamed: 0,club_name,player_name,age,position,club_involved_name,fee_raw,transfer_movement,transfer_period,fee[m€],league_name,year,season,country,inorout,fee_real[m€]
21598,Arsenal FC,Thomas Partey,27,Defensive Midfield,Atlético Madrid,€50.00m,in,Summer,50.0,Premier League,2020,2020/2021,England,-1,-50.0
21599,Arsenal FC,Gabriel Magalhães,22,Centre-Back,LOSC Lille,€26.00m,in,Summer,26.0,Premier League,2020,2020/2021,England,-1,-26.0
21600,Arsenal FC,Pablo Marí,26,Centre-Back,Flamengo,€6.00m,in,Summer,6.0,Premier League,2020,2020/2021,England,-1,-6.0
21601,Arsenal FC,Rúnar Alex Rúnarsson,25,Goalkeeper,Dijon,€2.00m,in,Summer,2.0,Premier League,2020,2020/2021,England,-1,-2.0
21602,Arsenal FC,Cédric Soares,28,Right-Back,Southampton,free transfer,in,Summer,0.0,Premier League,2020,2020/2021,England,-1,-0.0
21603,Arsenal FC,Willian,32,Left Winger,Chelsea,free transfer,in,Summer,0.0,Premier League,2020,2020/2021,England,-1,-0.0
21604,Arsenal FC,Emile Smith Rowe,20,Attacking Midfield,Arsenal U23,-,in,Summer,0.0,Premier League,2020,2020/2021,England,-1,-0.0
21605,Arsenal FC,Joe Willock,21,Central Midfield,Newcastle,"End of loanMay 31, 2021",in,Summer,0.0,Premier League,2020,2020/2021,England,-1,-0.0
21606,Arsenal FC,William Saliba,19,Centre-Back,Saint-Étienne,"End of loanJun 30, 2020",in,Summer,0.0,Premier League,2020,2020/2021,England,-1,-0.0
21607,Arsenal FC,Ainsley Maitland-Niles,23,Right Midfield,West Brom,"End of loanMay 31, 2021",in,Summer,0.0,Premier League,2020,2020/2021,England,-1,-0.0


In [None]:
#stringszűrős példa
filtered_values = np.where((dataFrame['Salary']>=100000) & (dataFrame['Age']< 40) & (dataFrame['JOB'].str.startswith('D')))
print(filtered_values)
display(dataFrame.loc[filtered_values])

In [3]:
df.to_csv("football_transfers_cleaned.csv")