In [1039]:
# Identify athletes meeting OCTC selection rules

#1. At least 1 athlete per event per gender
#2. Except for 100m, 400m, cap at 3 athletes per event. 100m, 400m capped at 6
#3. Where top athlete is >30 yrs old (except marathon), to include next athlete as well (below 30)
#4. Where althlete qualified in 2 events, to choose the better performing one
#5. For athletes looking to do full time, to write in to SAA for special consideration
#6. Exclude SPEX carded athletes
#7. Except for marathon, age threshold cut off of 40 yrs old for top athlete
#8. No double tapping of prog - potential names in red


%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [1138]:
# Import usual modules
import pandas as pd
import csv
import math
import os
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import openpyxl
import datetime
from scipy.stats import lognorm
import re
import string
from bs4 import BeautifulSoup
import requests
import unicodedata # for removing accented characters
import datetime
import icecream as ic



In [1756]:
# Extract timed event records

import pandas_gbq
from google.oauth2 import service_account

credentials = service_account.Credentials.from_service_account_file(
    '/Users/veesheenyuen/Desktop/DataScience/Keys/saa-analytics-7c8937b70609.json',
    
    
)

sql1="""
SELECT NAME, SEED,RESULT, TEAM, AGE, RANK AS COMPETITION_RANK, EVENT, DOB, COUNTRY, CATEGORY_EVENT, GENDER, COMPETITION, DATE
FROM `saa-analytics.results.saa_full` 
WHERE RESULT!='NM' AND RESULT!='-' AND RESULT!='FOUL' AND RANK!='DNS' AND RESULT!='DNS' AND RESULT!='DNF' AND RESULT!='DNQ' AND RESULT!='DQ' AND RESULT IS NOT NULL
"""

athletes = pandas_gbq.read_gbq(sql1, project_id="saa-analytics", credentials=credentials)




Downloading: 100%|[32m███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████[0m|[0m


In [1757]:
athletes.to_csv('athletes.csv', sep=',', encoding='utf-8-sig', index=False)

In [1758]:
athletes

Unnamed: 0,NAME,SEED,RESULT,TEAM,AGE,COMPETITION_RANK,EVENT,DOB,COUNTRY,CATEGORY_EVENT,GENDER,COMPETITION,DATE
0,Ethan Yan,,04:32.4,Singapore,,35,"Mile, Road",11-Jan-01,,Mid,Male,World Athletics Road Running Championships,2023
1,Romaine Soh,,05:24.2,Singapore,,28,"Mile, Road",26-Nov-94,,Mid,Female,World Athletics Road Running Championships,2023
2,Chui Ling Goh,,02:10.2,Singapore,,5,800m,27-Nov-92,,Mid,Female,14. Internationale Sparkassenmeeting,2023
3,Chui Ling Goh,,04:29.6,Singapore,,12,1500m,27-Nov-92,,Mid,Female,19th Asian Games,2023
4,Chui Ling Goh,,02:09.8,Singapore,,5,800m,27-Nov-92,,Mid,Female,25th Asian Athletics Championships,2023
...,...,...,...,...,...,...,...,...,...,...,...,...,...
37339,"{677: 'Toh, Eddie', 1521: 'Tan, Kevin', 1122: ...",,48.33,Club ZOOM,,1,Men 35-99 4x100 Meter Relay (160-199) To Masters,,,Relay,Male,10th Club ZOOM Kindred Spirit Series 2023,2023
37340,"{486: 'Richardson, James', 1388: 'Chin, Khen T...",,49.84,Singapore Masters Track & Fiel,,1,Men 35-99 4x100 Meter Relay (200-239) To Masters,,,Relay,Male,10th Club ZOOM Kindred Spirit Series 2023,2023
37341,"{1087: 'Adrian, Tan', 1084: 'Kok Keong, Lee', ...",,50.19,Erovra Club,,2,Men 35-99 4x100 Meter Relay (200-239) To Masters,,,Relay,Male,10th Club ZOOM Kindred Spirit Series 2023,2023
37342,"{147: 'Poh, Eelyn', 145: 'Chew, Cassandra', 14...",,58.65,Singapore Masters Track & Fiel,,1,Women 35-99 4x100 Meter Relay (160-199) To Mas...,,,Relay,Female,10th Club ZOOM Kindred Spirit Series 2023,2023


In [1759]:
athletes_2023 = athletes[athletes['DATE']=='2023']

In [1760]:
athletes_2023

Unnamed: 0,NAME,SEED,RESULT,TEAM,AGE,COMPETITION_RANK,EVENT,DOB,COUNTRY,CATEGORY_EVENT,GENDER,COMPETITION,DATE
0,Ethan Yan,,04:32.4,Singapore,,35,"Mile, Road",11-Jan-01,,Mid,Male,World Athletics Road Running Championships,2023
1,Romaine Soh,,05:24.2,Singapore,,28,"Mile, Road",26-Nov-94,,Mid,Female,World Athletics Road Running Championships,2023
2,Chui Ling Goh,,02:10.2,Singapore,,5,800m,27-Nov-92,,Mid,Female,14. Internationale Sparkassenmeeting,2023
3,Chui Ling Goh,,04:29.6,Singapore,,12,1500m,27-Nov-92,,Mid,Female,19th Asian Games,2023
4,Chui Ling Goh,,02:09.8,Singapore,,5,800m,27-Nov-92,,Mid,Female,25th Asian Athletics Championships,2023
...,...,...,...,...,...,...,...,...,...,...,...,...,...
37339,"{677: 'Toh, Eddie', 1521: 'Tan, Kevin', 1122: ...",,48.33,Club ZOOM,,1,Men 35-99 4x100 Meter Relay (160-199) To Masters,,,Relay,Male,10th Club ZOOM Kindred Spirit Series 2023,2023
37340,"{486: 'Richardson, James', 1388: 'Chin, Khen T...",,49.84,Singapore Masters Track & Fiel,,1,Men 35-99 4x100 Meter Relay (200-239) To Masters,,,Relay,Male,10th Club ZOOM Kindred Spirit Series 2023,2023
37341,"{1087: 'Adrian, Tan', 1084: 'Kok Keong, Lee', ...",,50.19,Erovra Club,,2,Men 35-99 4x100 Meter Relay (200-239) To Masters,,,Relay,Male,10th Club ZOOM Kindred Spirit Series 2023,2023
37342,"{147: 'Poh, Eelyn', 145: 'Chew, Cassandra', 14...",,58.65,Singapore Masters Track & Fiel,,1,Women 35-99 4x100 Meter Relay (160-199) To Mas...,,,Relay,Female,10th Club ZOOM Kindred Spirit Series 2023,2023


In [1884]:
athletes_2023[athletes_2023['NAME']=='Chua, Clara']

Unnamed: 0,NAME,SEED,RESULT,TEAM,AGE,COMPETITION_RANK,EVENT,DOB,COUNTRY,CATEGORY_EVENT,GENDER,COMPETITION,DATE,MAPPED_EVENT
13714,"Chua, Clara",2:30.00,2:32.82,Wings Athletics Club,16.0,2,Women 800 Meter Run Open,2007-04-23,,Mid,Female,SA Allcomers 3,2023,800m
13825,"Chua, Clara",5:07.00,5:11.00,Wings Athletic Club,16.0,1,Women 1500 Meter Run OPEN,2007-04-23,,Mid,Female,SA Allcomers 4,2023,1500m
26119,"Chua, Clara",8:05.00,8:17.87,St Marg Secondary School,16.0,3,Women 2400 Meter Run W Elite,2007-04-23 00:00:00.000,,Sprint,Female,Pocari Sweat Run 2.4km 2023,2023,
28656,"Chua, Clara",,19:52.40,Wings Athletics Club,16.0,1,Women 5000 Meter Run Open,2007-04-22,,Long,Female,52nd SA Inter Club Championships 2023,2023,5000m
35216,"Chua, Clara",5:24.00,5:29.78,Wings Athletics Club,16.0,2,Women 16-99 1 Mile Run Open,2007-04-23,,Mid,Female,10th Club ZOOM Kindred Spirit Series 2023,2023,1 mile
35217,"Chua, Clara",5:24.00,5:29.78,Wings Athletics Club,16.0,2,Women 16-99 1 Mile Run Open,2007-04-23,,Mid,Female,10th Club ZOOM Kindred Spirit Series 2023,2023,1 mile
35221,"Chua, Clara",5:24.00,5:29.78,Wings Athletics Club,16.0,2,Women 16-99 1 Mile Run Open,2007-04-23 00:00:00.000,,,Female,10th Club ZOOM Kindred Spirit Series 2023,2023,1 mile
35222,"Chua, Clara",5:24.00,5:29.78,Wings Athletics Club,16.0,2,Women 16-99 1 Mile Run Open,2007-04-23,,Mid,Female,10th Club ZOOM Kindred Spirit Series 2023,2023,1 mile
36597,"Chua, Clara",2:27.00,2:25.99,Wings Athletics Club,16.0,1,Women 16-99 800 Meter Run Open,2007-04-23,,Mid,Female,10th Club ZOOM Kindred Spirit Series 2023,2023,800m


In [1883]:
athletes_2023[athletes_2023['COMPETITION']=='52nd SA Inter Club Championships 2023']

Unnamed: 0,NAME,SEED,RESULT,TEAM,AGE,COMPETITION_RANK,EVENT,DOB,COUNTRY,CATEGORY_EVENT,GENDER,COMPETITION,DATE,MAPPED_EVENT
28397,"Loh, Anson",5.00m,15.71m,Wings Athletics Club,15,1,Men Shot Put U18,2008-04-28,,Throw,Male,52nd SA Inter Club Championships 2023,2023,Shot put
28398,"Tam, Joash",1.80m,1.80m,Wings Athletics Club,16,2,Men High Jump U18,2007-07-20,,Jump,Male,52nd SA Inter Club Championships 2023,2023,High jump
28399,"Tan, Rei",1.90m,1.91m,Raffles Institution,15,1,Men High Jump U18,2008-12-04,,Jump,Male,52nd SA Inter Club Championships 2023,2023,High jump
28400,"Lau, Jia Hern",1.82m,1.80m,Wings Athletics Club,15,2,Men High Jump U18,2008-09-03,,Jump,Male,52nd SA Inter Club Championships 2023,2023,High jump
28401,"ZHENGHONG, .",,1.60m,Erovra Club,23,5,Men High Jump Open,2000-01-15,,Jump,Male,52nd SA Inter Club Championships 2023,2023,High jump
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28699,"Phua, Jaydene",36.90m,30.74m,Hwa Chong Alumni Association,19,3,Women Javelin Throw (600g) Open,2004-05-07,,Throw,Female,52nd SA Inter Club Championships 2023,2023,Javelin throw
28700,"Evian, Chua",,30.33m,Hwa Chong Alumni Association,23,4,Women Javelin Throw (600g) Open,2000-01-18,,Throw,Female,52nd SA Inter Club Championships 2023,2023,Javelin throw
28701,"RA, YEOJIN",26.00m,28.56m,National University Singapore,21,6,Women Javelin Throw (600g) Open,2002-03-19,,Throw,Female,52nd SA Inter Club Championships 2023,2023,Javelin throw
28702,"Chong, Bernie",,21.49m,Temasek Polytechnic,19,8,Women Javelin Throw (600g) Open,2004-10-05,,Throw,Female,52nd SA Inter Club Championships 2023,2023,Javelin throw


In [1762]:
athletes_2023.to_csv('athletes_2023.csv', encoding='utf-8')

In [1763]:
# Choose 2023 only

athletes = athletes_2023

In [1764]:
athletes

Unnamed: 0,NAME,SEED,RESULT,TEAM,AGE,COMPETITION_RANK,EVENT,DOB,COUNTRY,CATEGORY_EVENT,GENDER,COMPETITION,DATE
0,Ethan Yan,,04:32.4,Singapore,,35,"Mile, Road",11-Jan-01,,Mid,Male,World Athletics Road Running Championships,2023
1,Romaine Soh,,05:24.2,Singapore,,28,"Mile, Road",26-Nov-94,,Mid,Female,World Athletics Road Running Championships,2023
2,Chui Ling Goh,,02:10.2,Singapore,,5,800m,27-Nov-92,,Mid,Female,14. Internationale Sparkassenmeeting,2023
3,Chui Ling Goh,,04:29.6,Singapore,,12,1500m,27-Nov-92,,Mid,Female,19th Asian Games,2023
4,Chui Ling Goh,,02:09.8,Singapore,,5,800m,27-Nov-92,,Mid,Female,25th Asian Athletics Championships,2023
...,...,...,...,...,...,...,...,...,...,...,...,...,...
37339,"{677: 'Toh, Eddie', 1521: 'Tan, Kevin', 1122: ...",,48.33,Club ZOOM,,1,Men 35-99 4x100 Meter Relay (160-199) To Masters,,,Relay,Male,10th Club ZOOM Kindred Spirit Series 2023,2023
37340,"{486: 'Richardson, James', 1388: 'Chin, Khen T...",,49.84,Singapore Masters Track & Fiel,,1,Men 35-99 4x100 Meter Relay (200-239) To Masters,,,Relay,Male,10th Club ZOOM Kindred Spirit Series 2023,2023
37341,"{1087: 'Adrian, Tan', 1084: 'Kok Keong, Lee', ...",,50.19,Erovra Club,,2,Men 35-99 4x100 Meter Relay (200-239) To Masters,,,Relay,Male,10th Club ZOOM Kindred Spirit Series 2023,2023
37342,"{147: 'Poh, Eelyn', 145: 'Chew, Cassandra', 14...",,58.65,Singapore Masters Track & Fiel,,1,Women 35-99 4x100 Meter Relay (160-199) To Mas...,,,Relay,Female,10th Club ZOOM Kindred Spirit Series 2023,2023


In [1765]:
athletes[athletes['NAME']=='Ho, Xander Ann Heng']

Unnamed: 0,NAME,SEED,RESULT,TEAM,AGE,COMPETITION_RANK,EVENT,DOB,COUNTRY,CATEGORY_EVENT,GENDER,COMPETITION,DATE
13760,"Ho, Xander Ann Heng",10.73,11.05,Wings Athletic Club,23.0,2,Men 100 Meter Dash OPEN,2000-05-19,,Sprint,Male,SA Allcomers 4,2023
16848,"Ho, Xander Ann Heng",10.73,10.89,Wings Athletics Club,23.0,4,Men 100 Meter Dash Open,2000-05-19 00:00:00.000,,Sprint,Male,SA Allcomers Meet 2,2023


In [1766]:
# Run events

#athletes['EVENT'] = athletes['EVENT'].replace(regex=r'.+200 Meter Dash.+', value='200m')
#athletes['EVENT'] = athletes['EVENT'].replace(regex=r'.+100 Meter Dash.+', value='100m')
#athletes['EVENT'] = athletes['EVENT'].replace(regex=r'.+400 Meter Dash.+', value='400m')
#athletes['EVENT'] = athletes['EVENT'].replace(regex=r'.+800 Meter Run.+', value='800m')

#athletes['EVENT'] = athletes['EVENT'].replace(regex=r'.+1500 Meter Run.+', value='1500m')
#athletes['EVENT'] = athletes['EVENT'].replace(regex=r'.+3000 Meter Run.+', value='3000m')

# Hurdles events

#athletes['EVENT'] = athletes['EVENT'].replace(regex=r'.+110 Meter Hurdles.+', value='110m hurdles')
#athletes['EVENT'] = athletes['EVENT'].replace(regex=r'.+110m Hurdles.+', value='110m hurdles')
#athletes['EVENT'] = athletes['EVENT'].replace(regex=r'.+100 Meter Hurdles.+', value='100m hurdles')
#athletes['EVENT'] = athletes['EVENT'].replace(regex=r'.+200 Meter Hurdles.+', value='200m hurdles')
#athletes['EVENT'] = athletes['EVENT'].replace(regex=r'.+400 Meter Hurdles.+', value='400m hurdles')
#athletes['EVENT'] = athletes['EVENT'].replace(regex=r'.+400m Hurdles.+', value='400m hurdles')


#athletes['EVENT'] = athletes['EVENT'].replace(regex=r'.+4x100 Meter Relay.+', value='4 x 100m relay')
#athletes['EVENT'] = athletes['EVENT'].replace(regex=r'.+4x400 Meter Relay.+', value='4 x 400m relay')
#athletes['EVENT'] = athletes['EVENT'].replace(regex=r'.+3000 meter.+', value='3000m')
#athletes['EVENT'] = athletes['EVENT'].replace(regex=r'.+1500 Meter Race Walk.+', value='1500m race walk')
#athletes['EVENT'] = athletes['EVENT'].replace(regex=r'.+3000m Race Walk.+', value='3000m race walk')
#athletes['EVENT'] = athletes['EVENT'].replace(regex=r'.+5000 Meter Race Walk.+', value='5000m race walk')
#athletes['EVENT'] = athletes['EVENT'].replace(regex=r'.+5000m Race Walk.+', value='5000m race walk')
#athletes['EVENT'] = athletes['EVENT'].replace(regex=r'.+10000 Meter Race Walk.+', value='10000m race walk')
#athletes['EVENT'] = athletes['EVENT'].replace(regex=r'.+5000 Meter Run.+', value='5000m run')
#athletes['EVENT'] = athletes['EVENT'].replace(regex=r'.+10000 Meter Run.+', value='10000m run')


#athletes['EVENT'] = athletes['EVENT'].replace(regex=r'.+Race Walk.+', value='race walk')
#athletes['EVENT'] = athletes['EVENT'].replace(regex=r'.+Pole Vault.+', value='Pole vault')

#athletes['EVENT'] = athletes['EVENT'].replace(regex=r'.+Shot Put.+', value='Shot put')

#athletes['EVENT'] = athletes['EVENT'].replace(regex=r'.+Discus.+', value='Discus throw')

#mask = athletes['EVENT'].str.contains(r'Discus', na=True)
#athletes.loc[mask, 'EVENT'] = 'Discus throw'

#mask = athletes['EVENT'].str.contains(r'Shot', na=True)
#athletes.loc[mask, 'EVENT'] = 'Shot put'

#mask = athletes['EVENT'].str.contains(r'Javelin', na=True)
#athletes.loc[mask, 'EVENT'] = 'Javelin throw'



#athletes['EVENT'] = athletes['EVENT'].replace(regex=r'.+Triple Jump.+', value='Triple jump')

#athletes['EVENT'] = athletes['EVENT'].replace(regex=r'.+Javelin Throw.+', value='Javelin throw')

#athletes['EVENT'] = athletes['EVENT'].replace(regex=r'.+Long Jump.+', value='Long jump')
#athletes['EVENT'] = athletes['EVENT'].replace(regex=r'Long Jump', value='Long jump')


#athletes['EVENT'] = athletes['EVENT'].replace(regex=r'High Jump', value='High jump')
#athletes['EVENT'] = athletes['EVENT'].replace(regex=r'.+High Jump.+', value='High jump')

#athletes['EVENT'] = athletes['EVENT'].replace(regex=r'.+S/C.+', value='steeplechase')
#athletes['EVENT'] = athletes['EVENT'].replace(regex=r'.+3000 Meter Steeplechase.+', value='3000m steeplechase')
#athletes['EVENT'] = athletes['EVENT'].replace(regex=r'.+2000 Meter Steeplechase.+', value='2000m steeplechase')


#mask = athletes['EVENT'].str.contains(r'High', na=True)
#athletes.loc[mask, 'EVENT'] = 'High jump'

#mask = athletes['EVENT'].str.contains(r'110m hurdles', na=True)
#athletes.loc[mask, 'EVENT'] = '110m hurdles'

#mask = athletes['EVENT'].str.contains(r'400m hurdles', na=True)
#athletes.loc[mask, 'EVENT'] = '400m hurdles'

#mask = athletes['EVENT'].str.contains(r'200m Hurdles', na=True)
#athletes.loc[mask, 'EVENT'] = '200m hurdles'

#mask = athletes['EVENT'].str.contains(r'100m Hurdles', na=True)
#athletes.loc[mask, 'EVENT'] = '100m hurdles'

#mask = athletes['EVENT'].str.contains(r'4 X 100m relay', na=True)
#athletes.loc[mask, 'EVENT'] = '4 x 100m relay'

#mask = athletes['EVENT'].str.contains(r'4 X 400m relay', na=True)
#athletes.loc[mask, 'EVENT'] = '4 x 400m relay'

#mask = athletes['EVENT'].str.contains(r'2000 Meter Steeplechase', na=True)
#athletes.loc[mask, 'EVENT'] = '2000m steeplechase'

#mask = athletes['EVENT'].str.contains(r'Hammer Throw', na=True)
#athletes.loc[mask, 'EVENT'] = 'Hammer throw'

#mask = athletes['EVENT'].str.contains(r'3000m S/C', na=True)
#athletes.loc[mask, 'EVENT'] = '3000m steeplechase'

#mask = athletes['EVENT'].str.contains(r'2000m S/C', na=True)
#athletes.loc[mask, 'EVENT'] = '2000m steeplechase'


#mask = athletes['EVENT'].str.contains(r'4x100m Relay', na=True)
#athletes.loc[mask, 'EVENT'] = '4 x 100m relay'

#mask = athletes['EVENT'].str.contains(r'4x400m Relay', na=True)
#athletes.loc[mask, 'EVENT'] = '4 x 400m relay'


# correct javelin category
#mask = athletes['EVENT'].str.contains(r'Javelin', na=True)
#athletes.loc[mask, 'CATEGORY_EVENT'] = 'Throw'




In [1767]:
# Create temporary mapped event column

athletes['MAPPED_EVENT']=''

# Correct javelin category

mask = athletes['EVENT'].str.contains(r'Javelin', na=True)
athletes.loc[mask, 'CATEGORY_EVENT'] = 'Throw'


# Running

mask = athletes['EVENT'].str.contains(r'50 Meter Dash', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '50m'
mask = athletes['EVENT'].str.contains(r'60 Meter Dash', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '60m'
mask = athletes['EVENT'].str.contains(r'80 Meter Dash', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '80m'
mask = athletes['EVENT'].str.contains(r'100 Meter Dash', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '100m'
mask = athletes['EVENT'].str.contains(r'100 Meter Run', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '100m'
mask = athletes['EVENT'].str.contains(r'100m', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '100m'
mask = athletes['EVENT'].str.contains(r'200 Meter Dash', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '200m'
mask = athletes['EVENT'].str.contains(r'200m', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '200m'
mask = athletes['EVENT'].str.contains(r'300 Meter Run', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '300m'
mask = athletes['EVENT'].str.contains(r'400 Meter Dash', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '400m'
mask = athletes['EVENT'].str.contains(r'400m', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '400m'
mask = athletes['EVENT'].str.contains(r'600 Meter Run', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '600m'
mask = athletes['EVENT'].str.contains(r'800 Meter Dash', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '800m'
mask = athletes['EVENT'].str.contains(r'800 Meter Run', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '800m'
mask = athletes['EVENT'].str.contains(r'800m', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '800m'
mask = athletes['EVENT'].str.contains(r'1500 Meter Run', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '1500m'
mask = athletes['EVENT'].str.contains(r'1500m', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '1500m'
mask = athletes['EVENT'].str.contains(r'3000 Meter Run', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '3000m'
mask = athletes['EVENT'].str.contains(r'3000m', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '3000m'
mask = athletes['EVENT'].str.contains(r'5000 Meter Run', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '5000m'
mask = athletes['EVENT'].str.contains(r'5000m', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '5000m'
mask = athletes['EVENT'].str.contains(r'10000 Meter Run', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '10000m'
mask = athletes['EVENT'].str.contains(r'10000m', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '10000m'
mask = athletes['EVENT'].str.contains(r'1 Mile Run', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '1 mile'


# Hurdles

mask = athletes['EVENT'].str.contains(r'80m Hurdles', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '80m hurdles'
mask = athletes['EVENT'].str.contains(r'80m hurdles', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '80m hurdles'
mask = athletes['EVENT'].str.contains(r'80 Meter Hurdles', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '80m hurdles'
mask = athletes['EVENT'].str.contains(r'100m Hurdles', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '100m hurdles'
mask = athletes['EVENT'].str.contains(r'100 Meter Hurdles', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '100m hurdles'
mask = athletes['EVENT'].str.contains(r'110m Hurdles', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '110m hurdles'
mask = athletes['EVENT'].str.contains(r'110 Meter Hurdles', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '110m hurdles'
mask = athletes['EVENT'].str.contains(r'200m Hurdles', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '200m hurdles'
mask = athletes['EVENT'].str.contains(r'200 Meter Hurdles', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '200m hurdles'
mask = athletes['EVENT'].str.contains(r'400m Hurdles', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '400m hurdles'
mask = athletes['EVENT'].str.contains(r'400 Meter Hurdles', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '400m hurdles'


# Throws

mask = athletes['EVENT'].str.contains(r'Javelin', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = 'Javelin throw'
mask = athletes['EVENT'].str.contains(r'Shot', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = 'Shot put'
mask = athletes['EVENT'].str.contains(r'Hammer', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = 'Hammer throw'
mask = athletes['EVENT'].str.contains(r'Discus', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = 'Discus throw'

# Jumps

mask = athletes['EVENT'].str.contains(r'High Jump', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = 'High jump'
mask = athletes['EVENT'].str.contains(r'Long Jump', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = 'Long jump'
mask = athletes['EVENT'].str.contains(r'Triple Jump', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = 'Triple jump'
mask = athletes['EVENT'].str.contains(r'Pole Vault', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = 'Pole vault'
mask = athletes['EVENT'].str.contains(r'High jump', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = 'High jump'
mask = athletes['EVENT'].str.contains(r'Long jump', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = 'Long jump'
mask = athletes['EVENT'].str.contains(r'Triple jump', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = 'Triple jump'
mask = athletes['EVENT'].str.contains(r'Pole vault', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = 'Pole vault'

# Steeplechase

mask = athletes['EVENT'].str.contains(r'2000m S/C', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '2000m steeplechase'
mask = athletes['EVENT'].str.contains(r'2000m steeplechase', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '2000m steeplechase'
mask = athletes['EVENT'].str.contains(r'2000 Meter Steeplechase', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '2000m steeplechase'
mask = athletes['EVENT'].str.contains(r'3000m S/C', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '3000m steeplechase'
mask = athletes['EVENT'].str.contains(r'3000 Meter Steeplechase', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '3000m steeplechase'


# Walk

mask = athletes['EVENT'].str.contains(r'1500 Meter Race Walk', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '1500m race walk'
mask = athletes['EVENT'].str.contains(r'3000m Race Walk', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '3000m race walk'
mask = athletes['EVENT'].str.contains(r'3000 Meter Race Walk', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '3000m race walk'
mask = athletes['EVENT'].str.contains(r'5000 Meter Race Walk', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '5000m race walk'
mask = athletes['EVENT'].str.contains(r'5000m Race Walk', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '5000m race walk'
mask = athletes['EVENT'].str.contains(r'10000 Meter Race Walk', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '10000m race walk'

# Relay

mask = athletes['EVENT'].str.contains(r'4x100m Relay', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '4 x 100m relay'
mask = athletes['EVENT'].str.contains(r'4 X 100m Relay', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '4 x 100m relay'
mask = athletes['EVENT'].str.contains(r'4x400m Relay', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '4 x 400m relay'
mask = athletes['EVENT'].str.contains(r'4 X 400m Relay', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '4 x 400m relay'
mask = athletes['EVENT'].str.contains(r'4x100 Meter Relay', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '4 x 100m relay'
mask = athletes['EVENT'].str.contains(r'4x400 Meter Relay', na=True)
athletes.loc[mask, 'MAPPED_EVENT'] = '4 x 400m relay'


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  athletes['MAPPED_EVENT']=''


In [1768]:
athletes

Unnamed: 0,NAME,SEED,RESULT,TEAM,AGE,COMPETITION_RANK,EVENT,DOB,COUNTRY,CATEGORY_EVENT,GENDER,COMPETITION,DATE,MAPPED_EVENT
0,Ethan Yan,,04:32.4,Singapore,,35,"Mile, Road",11-Jan-01,,Mid,Male,World Athletics Road Running Championships,2023,
1,Romaine Soh,,05:24.2,Singapore,,28,"Mile, Road",26-Nov-94,,Mid,Female,World Athletics Road Running Championships,2023,
2,Chui Ling Goh,,02:10.2,Singapore,,5,800m,27-Nov-92,,Mid,Female,14. Internationale Sparkassenmeeting,2023,800m
3,Chui Ling Goh,,04:29.6,Singapore,,12,1500m,27-Nov-92,,Mid,Female,19th Asian Games,2023,1500m
4,Chui Ling Goh,,02:09.8,Singapore,,5,800m,27-Nov-92,,Mid,Female,25th Asian Athletics Championships,2023,800m
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37339,"{677: 'Toh, Eddie', 1521: 'Tan, Kevin', 1122: ...",,48.33,Club ZOOM,,1,Men 35-99 4x100 Meter Relay (160-199) To Masters,,,Relay,Male,10th Club ZOOM Kindred Spirit Series 2023,2023,4 x 100m relay
37340,"{486: 'Richardson, James', 1388: 'Chin, Khen T...",,49.84,Singapore Masters Track & Fiel,,1,Men 35-99 4x100 Meter Relay (200-239) To Masters,,,Relay,Male,10th Club ZOOM Kindred Spirit Series 2023,2023,4 x 100m relay
37341,"{1087: 'Adrian, Tan', 1084: 'Kok Keong, Lee', ...",,50.19,Erovra Club,,2,Men 35-99 4x100 Meter Relay (200-239) To Masters,,,Relay,Male,10th Club ZOOM Kindred Spirit Series 2023,2023,4 x 100m relay
37342,"{147: 'Poh, Eelyn', 145: 'Chew, Cassandra', 14...",,58.65,Singapore Masters Track & Fiel,,1,Women 35-99 4x100 Meter Relay (160-199) To Mas...,,,Relay,Female,10th Club ZOOM Kindred Spirit Series 2023,2023,4 x 100m relay


In [1769]:
athletes[athletes['NAME']=='Ho, Xander Ann Heng']

Unnamed: 0,NAME,SEED,RESULT,TEAM,AGE,COMPETITION_RANK,EVENT,DOB,COUNTRY,CATEGORY_EVENT,GENDER,COMPETITION,DATE,MAPPED_EVENT
13760,"Ho, Xander Ann Heng",10.73,11.05,Wings Athletic Club,23.0,2,Men 100 Meter Dash OPEN,2000-05-19,,Sprint,Male,SA Allcomers 4,2023,100m
16848,"Ho, Xander Ann Heng",10.73,10.89,Wings Athletics Club,23.0,4,Men 100 Meter Dash Open,2000-05-19 00:00:00.000,,Sprint,Male,SA Allcomers Meet 2,2023,100m


In [1770]:
athletes.to_csv('athletes_post_map.csv', sep=',', encoding='utf-8-sig', index=False)


In [1771]:
credentials = service_account.Credentials.from_service_account_file(
    '/Users/veesheenyuen/Desktop/DataScience/Keys/saa-analytics-7c8937b70609.json',
)

sql="""
SELECT NAME, RESULT, RANK, EVENT, CATEGORY_EVENT, GENDER, COMPETITION, STAGE
FROM `saa-analytics.results.saa_full`
WHERE STAGE='Final' AND COMPETITION='SEA Games' AND RANK='3'
"""

benchmarks = pandas_gbq.read_gbq(sql, project_id="saa-analytics", credentials=credentials)




Downloading: 100%|[32m███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████[0m|[0m


In [1772]:
benchmarks

Unnamed: 0,NAME,RESULT,RANK,EVENT,CATEGORY_EVENT,GENDER,COMPETITION,STAGE
0,Trần Thị Nhi Yến,11.75,3,100m,Sprint,Female,SEA Games,Final
1,Muhammad Haiqal Hanafi,10.443,3,100m,Sprint,Male,SEA Games,Final
2,Lalu Muhammad Zohri,21.02,3,200m,Sprint,Male,SEA Games,Final
3,Zaidatul Husniah Zulkifli,23.6,3,200m,Sprint,Female,SEA Games,Final
4,Frederick Ramirez,46.63,3,400m,Sprint,Male,SEA Games,Final
5,Nguyễn Thị Hằng,53.84,3,400m,Sprint,Female,SEA Games,Final
6,Wan Muhammad Fazri Wan Zahari,1:53.86,3,800m,Mid,Male,SEA Games,Final
7,Goh Chui Ling,2:09.15,3,800m,Mid,Female,SEA Games,Final
8,Goh Chui Ling,4:26.33,3,1500m,Mid,Female,SEA Games,Final
9,Robi Syianturi,14:43.45,3,5000m,Long,Male,SEA Games,Final


In [1773]:
#benchmarks=SEAG[SEAG['RANK']=='3']

In [1774]:
benchmarks.rename(columns = {'RESULT':'BENCHMARK'}, inplace = True)


In [1775]:
benchmarks.drop(['NAME', 'RANK', 'CATEGORY_EVENT', 'COMPETITION', 'STAGE'], axis=1, inplace=True)


In [1776]:
benchmarks

Unnamed: 0,BENCHMARK,EVENT,GENDER
0,11.75,100m,Female
1,10.443,100m,Male
2,21.02,200m,Male
3,23.6,200m,Female
4,46.63,400m,Male
5,53.84,400m,Female
6,1:53.86,800m,Male
7,2:09.15,800m,Female
8,4:26.33,1500m,Female
9,14:43.45,5000m,Male


In [1777]:
# Converts any time format into seconds

def convert_time(i, string, metric):

    global output
    
    l=['discus', 'throw', 'jump', 'vault', 'shot']
        
    string=string.lower()    
    
    try:
    
        if any(s in string for s in l)==True:
        
            if 'm' in metric:
            
                metric=metric.replace('m', '')
                output=float(str(metric))
            
            elif 'GR' in metric:
            
                metric=metric.replace('GR', '')
                output=float(str(metric))


            else:
    
                output=float(str(metric))
        
        else:
        
            searchstring = ":"
            searchstring2 = "."
            substring=str(metric)
            count = substring.count(searchstring)
            count2 = substring.count(searchstring2)
            
            if count==0:
                output=float(substring)
                        
             
            elif (type(metric)==datetime.time or type(metric)==datetime.datetime):
                                                
                time=str(metric)
                h, m ,s = time.split(':')
                output = float(datetime.timedelta(hours=int(h),minutes=int(m),seconds=float(s)).total_seconds())
            
                                
            elif (count==1 and count2==1):
            
                m,s = metric.split(':')
                output = float(datetime.timedelta(minutes=int(m),seconds=float(s)).total_seconds())
                     
            elif (count==1 and count2==2):
            
                metric = metric.replace(".", ":", 1)
            
                h,m,s = metric.split(':')            
                output = float(datetime.timedelta(hours=int(h),minutes=int(m),seconds=float(s)).total_seconds())
                
        
            elif (count==2 and count2==0):
            
                h,m,s = metric.split(':')
                output = float(datetime.timedelta(hours=int(h),minutes=int(m),seconds=float(s)).total_seconds())
     
    except:
        
        pass
                
    return output

In [1778]:
for i in range(len(benchmarks)):
        
    rowIndex = benchmarks.index[i]

    input_string=benchmarks.iloc[rowIndex,1]
    
    metric=benchmarks.iloc[rowIndex,0]
    
    if metric==None:
        continue
        
    out = convert_time(i, input_string, metric)
     
    benchmarks.loc[rowIndex, 'Metric'] = out

In [1779]:
benchmarks

Unnamed: 0,BENCHMARK,EVENT,GENDER,Metric
0,11.75,100m,Female,11.75
1,10.443,100m,Male,10.443
2,21.02,200m,Male,21.02
3,23.6,200m,Female,23.6
4,46.63,400m,Male,46.63
5,53.84,400m,Female,53.84
6,1:53.86,800m,Male,113.86
7,2:09.15,800m,Female,129.15
8,4:26.33,1500m,Female,266.33
9,14:43.45,5000m,Male,883.45


In [1780]:
mask = benchmarks['EVENT'].str.contains(r'jump|throw|Pole|put', na=True)

benchmarks.loc[mask, '2%']=benchmarks['Metric']*0.98
benchmarks.loc[mask, '3.5%']=benchmarks['Metric']*0.965
benchmarks.loc[mask, '5%']=benchmarks['Metric']*0.95

benchmarks.loc[~mask, '2%']=benchmarks['Metric']*1.02
benchmarks.loc[~mask, '3.5%']=benchmarks['Metric']*1.035
benchmarks.loc[~mask, '5%']=benchmarks['Metric']*1.05


#benchmarks.iloc[5, [1]]='10000m run'
#benchmarks.iloc[28, [1]]='10000m run'
#benchmarks.iloc[26, [1]]='1500m'


In [1781]:
benchmarks['MAPPED_EVENT']=benchmarks['EVENT']

In [1782]:
benchmarks

Unnamed: 0,BENCHMARK,EVENT,GENDER,Metric,2%,3.5%,5%,MAPPED_EVENT
0,11.75,100m,Female,11.75,11.985,12.16125,12.3375,100m
1,10.443,100m,Male,10.443,10.65186,10.808505,10.96515,100m
2,21.02,200m,Male,21.02,21.4404,21.7557,22.071,200m
3,23.6,200m,Female,23.6,24.072,24.426,24.78,200m
4,46.63,400m,Male,46.63,47.5626,48.26205,48.9615,400m
5,53.84,400m,Female,53.84,54.9168,55.7244,56.532,400m
6,1:53.86,800m,Male,113.86,116.1372,117.8451,119.553,800m
7,2:09.15,800m,Female,129.15,131.733,133.67025,135.6075,800m
8,4:26.33,1500m,Female,266.33,271.6566,275.65155,279.6465,1500m
9,14:43.45,5000m,Male,883.45,901.119,914.37075,927.6225,5000m


In [1783]:
athletes

Unnamed: 0,NAME,SEED,RESULT,TEAM,AGE,COMPETITION_RANK,EVENT,DOB,COUNTRY,CATEGORY_EVENT,GENDER,COMPETITION,DATE,MAPPED_EVENT
0,Ethan Yan,,04:32.4,Singapore,,35,"Mile, Road",11-Jan-01,,Mid,Male,World Athletics Road Running Championships,2023,
1,Romaine Soh,,05:24.2,Singapore,,28,"Mile, Road",26-Nov-94,,Mid,Female,World Athletics Road Running Championships,2023,
2,Chui Ling Goh,,02:10.2,Singapore,,5,800m,27-Nov-92,,Mid,Female,14. Internationale Sparkassenmeeting,2023,800m
3,Chui Ling Goh,,04:29.6,Singapore,,12,1500m,27-Nov-92,,Mid,Female,19th Asian Games,2023,1500m
4,Chui Ling Goh,,02:09.8,Singapore,,5,800m,27-Nov-92,,Mid,Female,25th Asian Athletics Championships,2023,800m
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37339,"{677: 'Toh, Eddie', 1521: 'Tan, Kevin', 1122: ...",,48.33,Club ZOOM,,1,Men 35-99 4x100 Meter Relay (160-199) To Masters,,,Relay,Male,10th Club ZOOM Kindred Spirit Series 2023,2023,4 x 100m relay
37340,"{486: 'Richardson, James', 1388: 'Chin, Khen T...",,49.84,Singapore Masters Track & Fiel,,1,Men 35-99 4x100 Meter Relay (200-239) To Masters,,,Relay,Male,10th Club ZOOM Kindred Spirit Series 2023,2023,4 x 100m relay
37341,"{1087: 'Adrian, Tan', 1084: 'Kok Keong, Lee', ...",,50.19,Erovra Club,,2,Men 35-99 4x100 Meter Relay (200-239) To Masters,,,Relay,Male,10th Club ZOOM Kindred Spirit Series 2023,2023,4 x 100m relay
37342,"{147: 'Poh, Eelyn', 145: 'Chew, Cassandra', 14...",,58.65,Singapore Masters Track & Fiel,,1,Women 35-99 4x100 Meter Relay (160-199) To Mas...,,,Relay,Female,10th Club ZOOM Kindred Spirit Series 2023,2023,4 x 100m relay


In [1784]:
athletes[athletes['NAME']=='Ho, Xander Ann Heng']

Unnamed: 0,NAME,SEED,RESULT,TEAM,AGE,COMPETITION_RANK,EVENT,DOB,COUNTRY,CATEGORY_EVENT,GENDER,COMPETITION,DATE,MAPPED_EVENT
13760,"Ho, Xander Ann Heng",10.73,11.05,Wings Athletic Club,23.0,2,Men 100 Meter Dash OPEN,2000-05-19,,Sprint,Male,SA Allcomers 4,2023,100m
16848,"Ho, Xander Ann Heng",10.73,10.89,Wings Athletics Club,23.0,4,Men 100 Meter Dash Open,2000-05-19 00:00:00.000,,Sprint,Male,SA Allcomers Meet 2,2023,100m


In [1785]:
# There is a problem with RESULTS column being changed after this statement

#df = athletes.reset_index().merge(benchmarks.reset_index(), on=['MAPPED_EVENT','GENDER'], how='left')
#df = athletes.merge(benchmarks, on=['EVENT','GENDER'], how='left')


In [2173]:
# Merge benchmarks onto athletes on MAPPED_EVENT and GENDER

df = pd.merge(
    left=athletes, 
    right=benchmarks,
    how='left',
    left_on=['MAPPED_EVENT', 'GENDER'],
    right_on=['MAPPED_EVENT', 'GENDER'],
)

In [2174]:
df

Unnamed: 0,NAME,SEED,RESULT,TEAM,AGE,COMPETITION_RANK,EVENT_x,DOB,COUNTRY,CATEGORY_EVENT,GENDER,COMPETITION,DATE,MAPPED_EVENT,BENCHMARK,EVENT_y,Metric,2%,3.5%,5%
0,Ethan Yan,,04:32.4,Singapore,,35,"Mile, Road",11-Jan-01,,Mid,Male,World Athletics Road Running Championships,2023,,,,,,,
1,Romaine Soh,,05:24.2,Singapore,,28,"Mile, Road",26-Nov-94,,Mid,Female,World Athletics Road Running Championships,2023,,,,,,,
2,Chui Ling Goh,,02:10.2,Singapore,,5,800m,27-Nov-92,,Mid,Female,14. Internationale Sparkassenmeeting,2023,800m,2:09.15,800m,129.15,131.7330,133.67025,135.6075
3,Chui Ling Goh,,04:29.6,Singapore,,12,1500m,27-Nov-92,,Mid,Female,19th Asian Games,2023,1500m,4:26.33,1500m,266.33,271.6566,275.65155,279.6465
4,Chui Ling Goh,,02:09.8,Singapore,,5,800m,27-Nov-92,,Mid,Female,25th Asian Athletics Championships,2023,800m,2:09.15,800m,129.15,131.7330,133.67025,135.6075
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20747,"{677: 'Toh, Eddie', 1521: 'Tan, Kevin', 1122: ...",,48.33,Club ZOOM,,1,Men 35-99 4x100 Meter Relay (160-199) To Masters,,,Relay,Male,10th Club ZOOM Kindred Spirit Series 2023,2023,4 x 100m relay,39.36,4 x 100m relay,39.36,40.1472,40.73760,41.3280
20748,"{486: 'Richardson, James', 1388: 'Chin, Khen T...",,49.84,Singapore Masters Track & Fiel,,1,Men 35-99 4x100 Meter Relay (200-239) To Masters,,,Relay,Male,10th Club ZOOM Kindred Spirit Series 2023,2023,4 x 100m relay,39.36,4 x 100m relay,39.36,40.1472,40.73760,41.3280
20749,"{1087: 'Adrian, Tan', 1084: 'Kok Keong, Lee', ...",,50.19,Erovra Club,,2,Men 35-99 4x100 Meter Relay (200-239) To Masters,,,Relay,Male,10th Club ZOOM Kindred Spirit Series 2023,2023,4 x 100m relay,39.36,4 x 100m relay,39.36,40.1472,40.73760,41.3280
20750,"{147: 'Poh, Eelyn', 145: 'Chew, Cassandra', 14...",,58.65,Singapore Masters Track & Fiel,,1,Women 35-99 4x100 Meter Relay (160-199) To Mas...,,,Relay,Female,10th Club ZOOM Kindred Spirit Series 2023,2023,4 x 100m relay,44.58,4 x 100m relay,44.58,45.4716,46.14030,46.8090


In [2175]:
df[df['NAME']=='RAPHAEL, RYAN']

Unnamed: 0,NAME,SEED,RESULT,TEAM,AGE,COMPETITION_RANK,EVENT_x,DOB,COUNTRY,CATEGORY_EVENT,GENDER,COMPETITION,DATE,MAPPED_EVENT,BENCHMARK,EVENT_y,Metric,2%,3.5%,5%
4889,"RAPHAEL, RYAN",10.9,10.72,Singapore,20.0,2,Men 100 Meter Dash Open,2003-04-06,,Sprint,Male,18 Feb 2023 Trial,2023,100m,10.443,100m,10.443,10.65186,10.808505,10.96515
6868,"RAPHAEL, RYAN",11.0,10.9,Singapore,20.0,3,Men 100 Meter Dash 1 Trial,2003-04-06 00:00:00.000,,Sprint,Male,SA Allcomers Meet 2,2023,100m,10.443,100m,10.443,10.65186,10.808505,10.96515
6877,"RAPHAEL, RYAN",11.0,10.9,Singapore,20.0,4,Men 100 Meter Dash 2 Trial,2003-04-06 00:00:00.000,,Sprint,Male,SA Allcomers Meet 2,2023,100m,10.443,100m,10.443,10.65186,10.808505,10.96515


In [2176]:
# replace '-' with NaN

df['RESULT'] = df['RESULT'].replace(regex=r'–', value=np.NaN)
df['SEED'] = df['SEED'].replace(regex=r'–', value=np.NaN)


In [2177]:
df

Unnamed: 0,NAME,SEED,RESULT,TEAM,AGE,COMPETITION_RANK,EVENT_x,DOB,COUNTRY,CATEGORY_EVENT,GENDER,COMPETITION,DATE,MAPPED_EVENT,BENCHMARK,EVENT_y,Metric,2%,3.5%,5%
0,Ethan Yan,,04:32.4,Singapore,,35,"Mile, Road",11-Jan-01,,Mid,Male,World Athletics Road Running Championships,2023,,,,,,,
1,Romaine Soh,,05:24.2,Singapore,,28,"Mile, Road",26-Nov-94,,Mid,Female,World Athletics Road Running Championships,2023,,,,,,,
2,Chui Ling Goh,,02:10.2,Singapore,,5,800m,27-Nov-92,,Mid,Female,14. Internationale Sparkassenmeeting,2023,800m,2:09.15,800m,129.15,131.7330,133.67025,135.6075
3,Chui Ling Goh,,04:29.6,Singapore,,12,1500m,27-Nov-92,,Mid,Female,19th Asian Games,2023,1500m,4:26.33,1500m,266.33,271.6566,275.65155,279.6465
4,Chui Ling Goh,,02:09.8,Singapore,,5,800m,27-Nov-92,,Mid,Female,25th Asian Athletics Championships,2023,800m,2:09.15,800m,129.15,131.7330,133.67025,135.6075
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20747,"{677: 'Toh, Eddie', 1521: 'Tan, Kevin', 1122: ...",,48.33,Club ZOOM,,1,Men 35-99 4x100 Meter Relay (160-199) To Masters,,,Relay,Male,10th Club ZOOM Kindred Spirit Series 2023,2023,4 x 100m relay,39.36,4 x 100m relay,39.36,40.1472,40.73760,41.3280
20748,"{486: 'Richardson, James', 1388: 'Chin, Khen T...",,49.84,Singapore Masters Track & Fiel,,1,Men 35-99 4x100 Meter Relay (200-239) To Masters,,,Relay,Male,10th Club ZOOM Kindred Spirit Series 2023,2023,4 x 100m relay,39.36,4 x 100m relay,39.36,40.1472,40.73760,41.3280
20749,"{1087: 'Adrian, Tan', 1084: 'Kok Keong, Lee', ...",,50.19,Erovra Club,,2,Men 35-99 4x100 Meter Relay (200-239) To Masters,,,Relay,Male,10th Club ZOOM Kindred Spirit Series 2023,2023,4 x 100m relay,39.36,4 x 100m relay,39.36,40.1472,40.73760,41.3280
20750,"{147: 'Poh, Eelyn', 145: 'Chew, Cassandra', 14...",,58.65,Singapore Masters Track & Fiel,,1,Women 35-99 4x100 Meter Relay (160-199) To Mas...,,,Relay,Female,10th Club ZOOM Kindred Spirit Series 2023,2023,4 x 100m relay,44.58,4 x 100m relay,44.58,45.4716,46.14030,46.8090


In [2178]:
df[df['NAME']=='QUEK, CALVIN']

Unnamed: 0,NAME,SEED,RESULT,TEAM,AGE,COMPETITION_RANK,EVENT_x,DOB,COUNTRY,CATEGORY_EVENT,GENDER,COMPETITION,DATE,MAPPED_EVENT,BENCHMARK,EVENT_y,Metric,2%,3.5%,5%
4630,"QUEK, CALVIN",,11.01,Singapore,27.0,1,Men 100 Meter Dash Open,1996-09-26 00:00:00.000,,Sprint,Male,Trial #3,2023,100m,10.443,100m,10.443,10.65186,10.808505,10.96515
6873,"QUEK, CALVIN",10.98,10.69,Singapore,27.0,1,Men 100 Meter Dash 1 Trial,1996-02-26 00:00:00.000,,Sprint,Male,SA Allcomers Meet 2,2023,100m,10.443,100m,10.443,10.65186,10.808505,10.96515
6875,"QUEK, CALVIN",10.98,10.81,Singapore,27.0,1,Men 100 Meter Dash 2 Trial,1996-02-26 00:00:00.000,,Sprint,Male,SA Allcomers Meet 2,2023,100m,10.443,100m,10.443,10.65186,10.808505,10.96515
10534,"QUEK, CALVIN",47.4,48.04,Singapore,27.0,3,Men 400 Meter Dash Open,1996-02-26 00:00:00.000,,Sprint,Male,83rd Singapore Open Track & Field,2023,400m,46.63,400m,46.63,47.5626,48.26205,48.9615
10551,"QUEK, CALVIN",47.4,47.43,Singapore,27.0,1,Men 400 Meter Dash Open,1996-02-26 00:00:00.000,,Sprint,Male,83rd Singapore Open Track & Field,2023,400m,46.63,400m,46.63,47.5626,48.26205,48.9615
10657,"QUEK, CALVIN",51.85,52.82,Singapore,27.0,1,Men 400 Meter Hurdles Open,1996-02-26 00:00:00.000,,Hurdles,Male,83rd Singapore Open Track & Field,2023,400m hurdles,50.75,400m hurdles,50.75,51.765,52.52625,53.2875


In [2179]:
df.to_csv('df.csv', sep=',', encoding='utf-8-sig', index=False)


In [2180]:
# Convert results and seed into seconds format

for i in range(len(df)):
    
        
    rowIndex = df.index[i]

    input_string=df.iloc[rowIndex,6]    # event description
    
    metric=df.iloc[rowIndex,2] # result
    metric_seed=df.iloc[rowIndex, 1]  #seed
    
    if metric=='—' or metric=='DQ' or metric=='SCR' or metric=='FS' or metric=='DNQ' or metric==' DNS' or metric=='NH':
        continue
    
    print(i, input_string, metric)
    result_out = convert_time(i, input_string, metric)
    seed_out = convert_time(i, input_string, metric_seed)
         
    df.loc[rowIndex, 'RESULT_CONV'] = result_out
    df.loc[rowIndex, 'SEED_CONV'] = seed_out


0 Mile, Road     04:32.4
1 Mile, Road     05:24.2
2 800m     02:10.2
3 1500m     04:29.6
4 800m     02:09.8
5 1500m     04:31.3
6 1500m     04:40.6
7 800m     02:09.2
8 1500m     04:26.3
9 800m     02:07.8
10 800m     02:10.9
11 800m     02:13.8
12 1500m     04:44.1
13 800m     02:54.1
14 1500m     04:55.8
15 1500m     05:11.2
16 1500m     04:06.5
17 Pole Vault     4.8
18 Pole Vault     3.15
19 Pole Vault     4.6
20 High Jump     2.15
21 High Jump     2.15
22 High Jump     2
23 High Jump     2.07
24 High Jump     2.06
25 High Jump     2.06
26 High Jump     2.08
27 Triple Jump     11.93
28 High Jump     1.85
29 High Jump     1.87
30 Pole Vault     3.4
31 Long Jump     6.3
32 High Jump     1.9
33 Long Jump     5.73
34 Triple Jump     11.28
35 Triple Jump     12.67
36 Long Jump     5.52
37 Triple Jump     12.04
38 Triple Jump     12.62
39 Long Jump     5.63
40 Triple Jump     12.49
41 Long Jump     5.83w 
42 Triple Jump     12.92
43 Long Jump     5.91
44 Triple Jump     12.25
45 Triple Ju

919 200m 00:29.8
920 200m 00:31.6
921 200m 00:28.0
922 200m 00:26.9
923 200m 00:38.6
924 200m 00:30.6
925 200m 00:29.3
926 200m 00:28.6
927 200m 00:30.7
928 200m 00:27.8
929 200m 00:28.5
930 200m 00:26.8
931 200m 00:32.1
932 200m 00:27.6
933 200m 00:33.2
934 200m 00:27.4
935 200m 00:30.4
936 200m 00:26.2
937 200m 00:25.4
938 200m 00:37.8
939 200m 00:24.2
940 200m 00:24.3
941 200m 00:24.2
942 200m 00:31.0
943 200m 00:32.2
944 200m 00:28.6
945 200m 00:26.7
946 200m 00:31.0
947 200m 00:30.6
948 200m 00:25.6
949 200m 00:24.9
950 200m 00:32.4
951 200m 00:28.4
952 200m 00:27.1
953 200m 00:26.2
954 200m 00:26.9
955 200m 00:28.5
956 200m 00:40.7
957 200m 00:31.0
958 200m 00:28.7
959 200m 00:23.1
960 200m 00:32.2
961 200m 00:26.6
962 200m 00:24.4
963 200m 00:30.2
964 200m 00:29.6
965 200m 00:31.4
966 200m 00:30.5
967 200m 00:25.9
968 200m 00:26.9
969 200m 00:26.2
970 200m 00:29.3
971 200m 00:23.4
972 200m 00:23.1
973 200m 00:27.2
974 200m 00:31.2
975 200m 00:25.4
976 200m 00:24.4
977 200m 00:27

1821 400m 01:02.8
1822 400m 01:24.3
1823 400m 01:02.2
1824 400m 01:12.5
1825 400m 01:00.4
1826 400m 00:53.9
1827 400m 00:54.2
1828 400m 01:18.3
1829 400m 01:26.4
1830 400m 01:07.1
1831 400m 01:10.1
1832 400m 01:09.0
1833 400m 01:05.6
1834 400m 00:53.9
1835 400m 01:04.9
1836 400m 01:05.7
1837 400m 01:01.4
1838 400m 01:29.0
1839 400m 01:23.7
1840 400m 01:11.3
1841 400m 01:07.8
1842 400m 01:32.8
1843 400m 01:13.6
1844 400m 00:53.5
1845 400m 01:23.6
1846 400m 01:15.6
1847 400m 00:56.4
1848 400m 01:21.1
1849 400m 01:06.7
1850 400m 00:58.0
1851 400m 00:54.7
1852 400m 01:13.5
1853 400m 01:01.9
1854 400m 00:56.3
1855 400m 01:44.6
1856 400m 01:17.1
1857 400m 01:17.2
1858 400m 00:58.8
1859 400m 01:04.5
1860 400m 01:03.6
1861 400m 01:04.3
1862 400m 01:08.3
1863 400m 01:06.8
1864 400m 01:04.0
1865 400m 01:34.4
1866 400m 01:13.1
1867 400m 01:13.7
1868 400m 01:27.9
1869 400m 01:04.9
1870 400m 01:09.8
1871 400m 00:59.3
1872 400m 01:03.8
1873 400m 01:04.2
1874 400m 01:18.3
1875 400m 01:01.8
1876 400m 

2777 1500m 06:45.0
2778 1500m 04:31.3
2779 1500m 05:53.4
2780 1500m 06:36.2
2781 1500m 05:34.4
2782 1500m 06:01.3
2783 1500m 04:55.1
2784 1500m 06:24.0
2785 1500m 04:17.5
2786 1500m 04:44.7
2787 1500m 04:46.2
2788 1500m 05:57.1
2789 1500m 04:44.0
2790 1500m 06:23.7
2791 1500m 04:42.8
2792 1500m 05:36.0
2793 1500m 06:17.5
2794 1500m 05:18.7
2795 1500m 05:35.7
2796 1500m 06:03.6
2797 1500m 05:36.5
2798 1500m 05:20.3
2799 1500m 06:11.5
2800 1500m 04:52.2
2801 1500m 05:39.0
2802 1500m 04:30.4
2803 1500m 05:47.6
2804 1500m 05:36.1
2805 1500m 06:28.4
2806 1500m 06:18.5
2807 1500m 06:18.2
2808 1500m 05:18.1
2809 1500m 06:19.9
2810 1500m 05:23.6
2811 1500m 06:07.6
2812 1500m 04:29.9
2813 1500m 04:53.0
2814 1500m 05:45.9
2815 1500m 05:57.2
2816 1500m 06:01.0
2817 1500m 07:13.8
2818 1500m 07:36.6
2819 1500m 05:09.7
2820 1500m 04:49.5
2821 1500m 04:44.9
2822 1500m 04:21.5
2823 1500m 04:54.9
2824 1500m 06:33.0
2825 1500m 04:52.4
2826 1500m 07:40.0
2827 1500m 05:53.8
2828 1500m 06:19.5
2829 1500m 0

3758 4 X 100m Relay 00:58.3
3759 4 X 100m Relay 00:52.8
3760 4 X 100m Relay 01:00.5
3761 4 X 100m Relay 00:50.4
3762 4 X 100m Relay 00:47.1
3763 4 X 100m Relay 00:51.4
3764 4 X 100m Relay 00:57.3
3765 4 X 100m Relay 00:58.9
3766 4 X 100m Relay 00:53.9
3767 4 X 100m Relay 00:51.9
3768 4 X 100m Relay 00:59.3
3769 4 X 100m Relay 00:51.5
3770 4 X 100m Relay 00:54.6
3771 4 X 100m Relay 00:54.4
3772 4 X 100m Relay 00:48.0
3773 4 X 100m Relay 01:02.1
3774 4 X 100m Relay 00:47.7
3775 4 X 100m Relay 00:46.7
3776 4 X 100m Relay 01:01.5
3777 4 X 100m Relay 00:53.3
3778 4 X 100m Relay 01:01.4
3779 4 X 100m Relay 00:52.1
3780 4 X 100m Relay 00:53.9
3781 4 X 100m Relay 00:48.6
3782 4 X 100m Relay 00:51.3
3783 4 X 100m Relay 00:54.5
3784 4 X 100m Relay 00:50.1
3785 4 X 100m Relay 00:51.9
3786 4 X 100m Relay 00:53.3
3787 4 X 100m Relay 00:53.7
3788 4 X 100m Relay 00:53.8
3789 4 X 100m Relay 00:57.4
3790 4 X 100m Relay 00:44.1
3791 4 X 100m Relay 00:45.1
3792 4 X 100m Relay 00:54.8
3793 4 X 100m Relay 

4736 Men 400 Meter Dash Open 1:01.17
4737 Men 400 Meter Dash Open 57.26
4738 Men 400 Meter Dash Open 58.11
4739 Men 400 Meter Dash Open 55.99
4740 Men 400 Meter Dash Open 1:02.43
4741 Men 400 Meter Dash Open 53.61
4742 Men 400 Meter Dash Open 1:00.96
4743 Men 400 Meter Dash Open 57.78
4744 Men 400 Meter Dash Open 52.43
4745 Men 400 Meter Dash Open 50.29
4746 Men 400 Meter Dash Open 57.93
4747 Men 400 Meter Dash Open 52.58
4748 Women 800 Meter Run Open 3:36.35
4749 Women 800 Meter Run Open 2:29.38
4750 Women 800 Meter Run Open 2:32.82
4751 Women 800 Meter Run Open 2:44.45
4752 Women 800 Meter Run Open 2:45.31
4753 Women 100 Meter Dash Open 13.18
4754 Women 100 Meter Dash Open 13.46
4755 Women 100 Meter Dash Open 20.38
4756 Women 100 Meter Dash Open 13.87
4757 Women 100 Meter Dash Open 13.32
4758 Women 100 Meter Dash Open 13.11
4759 Women 100 Meter Dash Open 15.37
4760 Women 200 Meter Dash Open 27.22
4761 Women 200 Meter Dash Open 27.59
4762 Men 4x100 Meter Relay WUG Trial Open 41.82
476

5755 Girls Triple Jump A Div 9.73m
5756 Girls Triple Jump A Div 10.01m
5757 Girls Triple Jump A Div 9.21m
5758 Girls Triple Jump A Div 10.90m
5759 Girls Triple Jump A Div 9.66m
5760 Girls Triple Jump A Div 9.69m
5761 Girls Triple Jump A Div 10.14m
5762 Girls Triple Jump B Div 8.97m
5763 Girls Triple Jump B Div 8.92m
5764 Girls Triple Jump B Div 8.00m
5765 Girls Triple Jump B Div 9.73m
5766 Girls Triple Jump B Div 9.37m
5767 Girls Triple Jump B Div 8.82m
5768 Girls Triple Jump B Div 10.68m
5769 Girls Triple Jump B Div 10.39m
5770 Girls Triple Jump B Div 8.29m
5771 Girls Triple Jump B Div 9.55m
5772 Girls Triple Jump B Div 8.84m
5773 Girls Triple Jump B Div 9.40m
5774 Girls Triple Jump B Div 8.39m
5775 Girls Triple Jump B Div 9.95m
5776 Girls Triple Jump B Div 9.41m
5777 Girls Triple Jump B Div 8.54m
5778 Girls Triple Jump B Div 10.23m
5779 Girls Triple Jump B Div 9.86m
5780 Girls Triple Jump B Div 9.02m
5781 Girls Triple Jump C Div 8.26m
5782 Girls Triple Jump C Div 8.15m
5783 Girls Tri

6752 Girls 3000 Meter Run B Div 13:24.40
6753 Girls 3000 Meter Run B Div 14:37.09
6754 Girls 3000 Meter Run B Div 16:10.49
6755 Girls 3000 Meter Run B Div 13:21.59
6756 Girls 3000 Meter Run B Div 12:27.93
6757 Girls 3000 Meter Run B Div 13:45.50
6758 Girls 3000 Meter Run B Div 13:55.81
6759 Girls 3000 Meter Run B Div 13:36.65
6760 Girls 3000 Meter Run B Div 17:18.83
6761 Girls 3000 Meter Run B Div 13:17.63
6762 Girls 3000 Meter Run B Div 14:03.02
6763 Girls 3000 Meter Run C Div 13:30.15
6764 Girls 3000 Meter Run C Div 13:39.83
6765 Girls 3000 Meter Run C Div 13:14.05
6766 Girls 3000 Meter Run C Div 14:59.74
6767 Girls 3000 Meter Run C Div 12:51.85
6768 Girls 3000 Meter Run C Div 13:21.64
6769 Girls 400 Meter Dash A Div 1:13.54
6770 Girls 400 Meter Dash A Div 1:12.84
6771 Girls 400 Meter Dash A Div 1:13.49
6772 Girls 400 Meter Dash A Div 1:14.48
6773 Girls 400 Meter Dash A Div 1:06.16
6774 Girls 400 Meter Dash B Div 1:24.56
6775 Girls 400 Meter Dash B Div 1:08.19
6776 Girls 400 Meter Da

7741 Triathlon - 80.0 Dash 14.22
7742 Triathlon - 80.0 Dash 14.52
7743 Triathlon - 80.0 Dash 13.87
7744 Triathlon - 80.0 Dash 11.87
7745 Triathlon - 80.0 Dash 13.77
7746 Triathlon - 80.0 Dash 14.54
7747 Triathlon - 80.0 Dash 14.35
7748 Triathlon - 80.0 Dash 12.36
7749 Triathlon - 80.0 Dash 15.12
7750 Triathlon - 80.0 Dash 14.89
7751 Triathlon - 80.0 Dash 13.21
7752 Triathlon - 80.0 Dash 14.09
7753 Triathlon - 80.0 Dash 13.37
7754 Triathlon - 80.0 Dash 13.56
7755 Triathlon - 80.0 Dash 13.37
7756 Triathlon - 80.0 Dash 15.32
7757 Triathlon - 80.0 Dash 14.45
7758 Triathlon - 80.0 Dash 13.83
7759 Triathlon - 80.0 Dash 11.66
7760 Triathlon - 80.0 Dash 12.26
7761 Triathlon - 80.0 Dash 12.68
7762 Triathlon - 80.0 Dash 14.31
7763 Triathlon - 80.0 Dash 12.26
7764 Triathlon - 80.0 Dash 13.38
7765 Triathlon - 80.0 Dash 12.95
7766 Triathlon - 80.0 Dash 15.58
7767 Triathlon - 80.0 Dash 13.69
7768 Triathlon - 80.0 Dash 12.74
7769 Triathlon - 80.0 Dash 13.81
7770 Triathlon - 80.0 Dash 14.32
7771 Triat

8724 Triathlon - 0.0 Long Jump 2.22m
8725 Triathlon - 0.0 Long Jump 2.66m
8726 Triathlon - 0.0 Long Jump 4.11m
8727 Triathlon - 0.0 Long Jump 2.95m
8728 Triathlon - 0.0 Long Jump 2.22m
8729 Triathlon - 0.0 Long Jump 2.65m
8730 Triathlon - 0.0 Long Jump 3.67m
8731 Triathlon - 0.0 Long Jump 2.15m
8732 Triathlon - 0.0 Long Jump 2.53m
8733 Triathlon - 0.0 Long Jump 2.84m
8734 Triathlon - 0.0 Long Jump 2.50m
8735 Triathlon - 0.0 Long Jump 3.97m
8736 Triathlon - 0.0 Long Jump 3.26m
8737 Triathlon - 0.0 Long Jump 1.17m
8738 Triathlon - 0.0 Long Jump 2.59m
8739 Triathlon - 0.0 Long Jump 1.16m
8740 Triathlon - 0.0 Long Jump 1.46m
8741 Triathlon - 0.0 Long Jump 2.77m
8742 Triathlon - 0.0 Long Jump 3.30m
8743 Triathlon - 0.0 Long Jump 3.22m
8744 Triathlon - 0.0 Long Jump 1.60m
8745 Triathlon - 0.0 Long Jump 3.39m
8746 Triathlon - 0.0 Long Jump 1.41m
8747 Triathlon - 0.0 Long Jump 1.91m
8748 Triathlon - 0.0 Long Jump 2.62m
8749 Triathlon - 0.0 Long Jump 2.03m
8750 Triathlon - 0.0 Long Jump 1.93m
8

9705 Women 40-44 100 Meter Dash Masters 15.56
9706 Women 40-44 100 Meter Dash Masters 16.93
9707 Women 40-44 1500 Meter Run Masters 10:19.15
9708 Women 40-44 400 Meter Dash Masters 1:38.55
9709 Women 40-44 400 Meter Dash Masters 1:21.35
9710 Women 45-49 100 Meter Dash Masters 17.90
9711 Women 45-49 1500 Meter Run Masters 13:43.99
9712 Women 45-49 400 Meter Dash Masters 2:29.02
9713 Women 50-54 100 Meter Dash Masters 17.57
9714 Women 50-54 100 Meter Dash Masters 17.29
9715 Women 50-54 100 Meter Dash Masters 18.74
9716 Women 50-54 1500 Meter Run Masters 6:19.89
9717 Women 50-54 400 Meter Dash Masters 1:41.49
9718 Women 50-54 400 Meter Dash Masters 1:39.42
9719 Women 55-59 100 Meter Dash Masters 16.81
9720 Men 35-39 4x100 Meter Relay Masters 47.35
9721 Men 40-44 4x100 Meter Relay Masters 1:08.48
9722 Men 40-44 4x100 Meter Relay Masters 58.73
9723 Men 40-44 4x100 Meter Relay Masters 48.85
9724 Men 45-49 4x100 Meter Relay Masters 55.18
9725 Men 50-54 4x100 Meter Relay Masters 49.87
9726 Men

10689 Men 4x400 Meter Relay Open 3:29.05
10690 Women 10000 Meter Run Open 46:18.36
10691 Women 10000 Meter Run Open 39:46.88
10692 Women 10000 Meter Run Open 46:14.95
10693 Women 10000 Meter Run Open 37:24.85
10694 Mixed 4x100 Meter Relay Open 47.54
10695 Mixed 4x100 Meter Relay Open 47.54
10696 Mixed 4x100 Meter Relay Open 45.02
10697 Mixed 4x100 Meter Relay Open 45.02
10698 Women 100 Meter Hurdles Open 15.07
10699 Women 100 Meter Hurdles Open 14.64
10700 Women 100 Meter Hurdles Open 15.96
10701 Women 100 Meter Hurdles Open 14.89
10702 Women 100 Meter Hurdles Open 15.03
10703 Women 100 Meter Hurdles Open 14.79
10704 Women 100 Meter Hurdles Open 13.85
10705 Women 400 Meter Hurdles Open 1:15.00
10706 Women 400 Meter Hurdles Open 1:02.41
10707 Women 400 Meter Hurdles Open 1:05.05
10708 Women 400 Meter Hurdles Open 1:07.70
10709 Women 400 Meter Hurdles Open 1:09.84
10710 Women 4x100 Meter Relay Open 45.82
10711 Women 4x100 Meter Relay Open 45.82
10712 Women 4x100 Meter Relay Open 45.70
10

11669 Women 400 Meter Dash Open 1:02.98
11670 Women 400 Meter Dash Open 1:06.31
11671 Women 400 Meter Dash Open 1:12.57
11672 Women 400 Meter Dash Open 1:30.02
11673 Women 400 Meter Dash Open 1:07.98
11674 Women 400 Meter Dash Open 1:19.12
11675 Women 400 Meter Dash Open 1:17.54
11676 Women 400 Meter Dash Open 1:04.88
11677 Women 400 Meter Dash Open 1:13.43
11678 Women 400 Meter Dash Open 1:13.69
11679 Women 5000 Meter Run Open 20:00.20
11680 Women 5000 Meter Run Open 24:14.72
11681 Women 5000 Meter Run Open 22:32.01
11682 Women 5000 Meter Run Open 23:18.64
11683 Women 5000 Meter Run Open 23:14.01
11684 Women 5000 Meter Run Open 22:39.49
11685 Women 5000 Meter Run Open 25:06.08
11686 Women 5000 Meter Run Open 28:23.07
11687 Women 5000 Meter Run Open 20:48.39
11688 Women 5000 Meter Run Open 23:24.39
11689 Women 5000 Meter Run Open 22:11.36
11690 Women 5000 Meter Run Open 29:21.13
11691 Women 5000 Meter Run Open 27:07.11
11692 Women 5000 Meter Run Open 25:19.11
11693 Men 110 Meter Hurdle

12660 Boys 80 Meter Dash 9-10 13.14
12661 Boys 80 Meter Dash 9-10 13.07
12662 Boys 80 Meter Dash 9-10 14.75
12663 Boys 80 Meter Dash 9-10 12.96
12664 Boys 80 Meter Dash 9-10 12.58
12665 Boys 80 Meter Dash 9-10 14.78
12666 Boys 80 Meter Dash 9-10 15.66
12667 Boys 80 Meter Dash 9-10 15.66
12668 Boys 80 Meter Dash 9-10 11.86
12669 Boys 80 Meter Dash 9-10 13.61
12670 Boys 80 Meter Dash 9-10 13.41
12671 Boys 80 Meter Dash 9-10 12.89
12672 Boys 80 Meter Dash 9-10 13.66
12673 Boys 80 Meter Dash 9-10 12.17
12674 Boys 80 Meter Dash 9-10 12.67
12675 Boys 80 Meter Dash 9-10 15.15
12676 Boys 80 Meter Dash 9-10 13.29
12677 Boys 80 Meter Dash 9-10 13.07
12678 Boys 80 Meter Dash 9-10 12.09
12679 Boys 80 Meter Dash 9-10 12.17
12680 Boys 80 Meter Dash 9-10 13.07
12681 Boys 80 Meter Dash 9-10 13.56
12682 Boys 80 Meter Dash 9-10 12.58
12683 Boys 80 Meter Dash 9-10 13.07
12684 Boys 80 Meter Dash 9-10 12.87
12685 Boys 80 Meter Dash 9-10 16.22
12686 Boys 80 Meter Dash 9-10 12.99
12687 Boys 80 Meter Dash 9-1

13634 Girls 100 Meter Dash 13-14 15.91
13635 Girls 100 Meter Dash 13-14 14.81
13636 Girls 100 Meter Dash 13-14 15.91
13637 Girls 100 Meter Dash 13-14 14.81
13638 Girls 100 Meter Dash 13-14 14.46
13639 Girls 100 Meter Dash 13-14 14.03
13640 Girls 100 Meter Dash 13-14 15.78
13641 Girls 100 Meter Dash 15-16 14.33
13642 Girls 100 Meter Dash 15-16 13.30
13643 Girls 100 Meter Dash 15-16 13.02
13644 Girls 100 Meter Dash 15-16 14.33
13645 Girls 100 Meter Dash 15-16 13.30
13646 Girls 100 Meter Dash 15-16 13.02
13647 Girls 100 Meter Dash 15-16 14.33
13648 Girls 100 Meter Dash 15-16 13.02
13649 Girls 100 Meter Dash 15-16 13.30
13650 Girls 200 Meter Dash 11-12 29.36
13651 Girls 200 Meter Dash 11-12 29.99
13652 Girls 200 Meter Dash 11-12 45.09
13653 Girls 200 Meter Dash 11-12 37.17
13654 Girls 200 Meter Dash 11-12 31.74
13655 Girls 200 Meter Dash 11-12 30.11
13656 Girls 200 Meter Dash 11-12 29.36
13657 Girls 200 Meter Dash 11-12 39.51
13658 Girls 200 Meter Dash 11-12 35.90
13659 Girls 200 Meter Das

14594 Women 800 Meter Run Open 3:26.08
14595 Women 800 Meter Run Open 3:46.09
14596 Women 800 Meter Run Open 2:59.52
14597 Women 800 Meter Run Open 3:21.93
14598 Women 800 Meter Run Open 3:43.48
14599 Women 800 Meter Run Open 2:35.97
14600 Women 800 Meter Run Open 2:59.79
14601 Women 800 Meter Run Open 2:52.35
14602 Women 800 Meter Run Open 3:04.16
14603 Women 800 Meter Run Open 2:45.04
14604 Women 800 Meter Run Open 2:54.35
14605 Women 800 Meter Run Open 2:52.81
14606 Women 800 Meter Run Open 3:02.31
14607 Women 800 Meter Run Open 3:00.12
14608 Women 800 Meter Run Open 2:54.69
14609 Women 800 Meter Run Open 2:53.19
14610 Women 800 Meter Run Open 2:59.74
14611 Women 800 Meter Run Open 3:00.34
14612 Women 800 Meter Run Open 2:46.62
14613 Women 800 Meter Run Open 2:51.39
14614 Women 800 Meter Run Open 3:09.03
14615 Women 800 Meter Run Open 2:27.47
14616 Women 800 Meter Run Open 2:34.25
14617 Women 800 Meter Run Open 2:44.51
14618 Women 800 Meter Run Open 3:18.85
14619 Women 800 Meter Run

15586 Boys High Jump U15 1.45m
15589 Boys High Jump U15 1.55m
15591 Boys High Jump U15 1.40m
15593 Boys High Jump U15 1.45m
15594 Boys High Jump U18 1.65m
15596 Boys High Jump U18 1.60m
15597 Boys High Jump U18 1.85m
15598 Boys High Jump U18 1.87m
15599 Boys High Jump U18 1.60m
15601 Boys High Jump U18 1.65m
15602 Boys High Jump U18 1.60m
15603 Boys High Jump U18 1.75m
15604 Boys High Jump U18 1.55m
15605 Boys High Jump U18 1.45m
15606 Boys High Jump U18 1.50m
15607 Boys High Jump U18 1.60m
15609 Boys High Jump U18 1.69m
15610 Boys High Jump U18 1.71m
15611 Boys High Jump U18 1.55m
15612 Boys Long Jump U15 4.80m
15613 Boys Long Jump U15 5.63m
15614 Boys Long Jump U15 5.59m
15615 Boys Long Jump U15 4.28m
15616 Boys Long Jump U15 4.40m
15617 Boys Long Jump U15 4.16m
15618 Boys Long Jump U15 4.70m
15619 Boys Long Jump U15 3.99m
15620 Boys Long Jump U15 5.06m
15621 Boys Long Jump U15 4.76m
15622 Boys Long Jump U15 4.59m
15623 Boys Long Jump U15 4.38m
15624 Boys Long Jump U15 5.06m
15625 Bo

16500 Men 100 Meter Dash Open 11.98
16501 Men 100 Meter Dash Open 10.97
16502 Men 100 Meter Dash Open 12.44
16503 Men 100 Meter Dash Open 10.89
16504 Men 1500 Meter Run Open 4:15.81
16505 Men 1500 Meter Run Open 4:20.14
16506 Men 1500 Meter Run Open 4:26.76
16507 Men 1500 Meter Run Open 4:33.77
16508 Men 1500 Meter Run Open 4:21.84
16509 Men 1500 Meter Run Open 4:28.62
16510 Men 1500 Meter Run Open 4:43.09
16511 Men 1500 Meter Run Open 4:42.82
16512 Men 200 Meter Dash Open 23.51
16513 Men 200 Meter Dash Open 28.34
16514 Men 200 Meter Dash Open 24.20
16515 Men 200 Meter Dash Open 24.38
16516 Men 200 Meter Dash Open 24.36
16517 Men 200 Meter Dash Open 24.38
16518 Men 200 Meter Dash Open 24.77
16519 Men 200 Meter Dash Open 23.96
16520 Men 200 Meter Dash Open 23.76
16521 Men 200 Meter Dash Open 23.94
16522 Men 200 Meter Dash Open 25.31
16523 Men 200 Meter Dash Open 25.59
16524 Men 200 Meter Dash Open 22.12
16525 Men 200 Meter Dash Open 23.90
16526 Men 200 Meter Dash Open 22.47
16527 Men 40

17471 Boys 13-14 High Jump U15 1.50m
17472 Boys 13-14 High Jump U15 1.60m
17473 Boys 13-14 High Jump U15 1.50m
17474 Boys 13-14 High Jump U15 1.45m
17475 Boys 13-14 High Jump U15 1.50m
17476 Boys 13-14 Long Jump U15 4.72m
17477 Boys 13-14 Long Jump U15 3.88m
17478 Boys 13-14 Long Jump U15 4.62m
17479 Boys 13-14 Long Jump U15 5.43m
17480 Boys 13-14 Long Jump U15 4.44m
17481 Boys 13-14 Long Jump U15 3.67m
17482 Boys 13-14 Long Jump U15 4.51m
17483 Boys 13-14 Long Jump U15 4.66m
17484 Boys 13-14 Long Jump U15 3.12m
17485 Boys 13-14 Long Jump U15 4.72m
17486 Boys 13-14 Long Jump U15 4.55m
17487 Boys 13-14 Long Jump U15 4.44m
17488 Boys 13-14 Long Jump U15 5.44m
17489 Boys 13-14 Long Jump U15 4.87m
17490 Boys 13-14 Long Jump U15 4.51m
17491 Boys 13-14 Long Jump U15 5.26m
17492 Boys 13-14 Long Jump U15 4.33m
17493 Boys 13-14 Long Jump U15 4.77m
17494 Boys 13-14 Long Jump U15 5.00m
17495 Boys 13-14 Long Jump U15 5.60m
17496 Boys 13-14 Long Jump U15 4.52m
17497 Boys 13-14 Long Jump U15 5.88m
1

18450 Boys 13-14 Shot Put 4kg U15 9.76m
18451 Boys 13-14 Shot Put 4kg U15 10.47m
18452 Boys 13-14 Shot Put 4kg U15 3.09m
18453 Boys 13-14 Shot Put 4kg U15 3.09m
18454 Boys 15-16 Shot Put 5kg U17 9.16m
18455 Boys 15-16 Shot Put 5kg U17 9.13m
18456 Boys 15-16 Shot Put 5kg U17 9.92m
18457 Boys 15-16 Shot Put 5kg U17 9.92m
18458 Boys 15-16 Shot Put 5kg U17 9.92m
18459 Boys 15-16 Shot Put 5kg U17 9.13m
18460 Boys 15-16 Shot Put 5kg U17 9.13m
18461 Boys 15-16 Shot Put 5kg U17 9.92m
18462 Boys 15-16 Shot Put 5kg U17 9.16m
18463 Boys 15-16 Shot Put 5kg U17 9.16m
18464 Boys 15-16 Shot Put 5kg U17 9.13m
18465 Boys 15-16 Shot Put 5kg U17 9.16m
18466 Boys 7-10 600 Meter Run U11 2:00.79
18467 Boys 7-10 600 Meter Run U11 2:45.82
18468 Boys 7-10 600 Meter Run U11 2:36.40
18469 Boys 7-10 600 Meter Run U11 2:13.11
18470 Boys 7-10 600 Meter Run U11 2:21.63
18471 Boys 7-10 600 Meter Run U11 2:35.31
18472 Boys 7-10 600 Meter Run U11 2:14.14
18473 Boys 7-10 600 Meter Run U11 2:35.23
18474 Boys 7-10 600 Met

19433 Girls 11-12 600 Meter Run U13 2:09.68
19434 Girls 11-12 600 Meter Run U13 2:24.20
19435 Girls 11-12 600 Meter Run U13 2:10.28
19436 Girls 11-12 600 Meter Run U13 2:09.34
19437 Girls 11-12 600 Meter Run U13 2:32.09
19438 Girls 11-12 600 Meter Run U13 2:17.19
19439 Girls 11-12 600 Meter Run U13 2:17.79
19440 Girls 11-12 600 Meter Run U13 2:12.23
19441 Girls 11-12 600 Meter Run U13 2:13.11
19442 Girls 11-14 800 Meter Run U15 2:56.93
19443 Girls 11-14 800 Meter Run U15 3:28.47
19444 Girls 11-14 800 Meter Run U15 2:48.40
19445 Girls 11-14 800 Meter Run U15 2:41.95
19446 Girls 11-14 800 Meter Run U15 3:06.75
19447 Girls 11-14 800 Meter Run U15 3:15.67
19448 Girls 11-14 800 Meter Run U15 2:49.16
19449 Girls 11-14 800 Meter Run U15 3:01.96
19450 Girls 11-14 800 Meter Run U15 2:46.36
19451 Girls 11-14 800 Meter Run U15 2:37.19
19452 Girls 11-14 800 Meter Run U15 3:07.65
19453 Girls 11-14 800 Meter Run U15 2:43.06
19454 Girls 11-14 800 Meter Run U15 3:15.45
19455 Girls 11-14 800 Meter Run 

20426 Men 45-54 1 Mile Run (45-54) Masters 5:51.07
20427 Boys 11-12 Long Jump 50cm Takeoff U13 3.45m
20428 Boys 11-12 Long Jump 50cm Takeoff U13 3.32m
20429 Boys 11-12 Long Jump 50cm Takeoff U13 3.94m
20430 Boys 11-12 Long Jump 50cm Takeoff U13 4.50m
20431 Boys 11-12 Long Jump 50cm Takeoff U13 3.38m
20432 Boys 11-12 Long Jump 50cm Takeoff U13 3.46m
20433 Boys 11-12 Long Jump 50cm Takeoff U13 3.25m
20434 Boys 11-12 Long Jump 50cm Takeoff U13 4.66m
20435 Boys 11-12 Long Jump 50cm Takeoff U13 3.75m
20436 Boys 11-12 Long Jump 50cm Takeoff U13 4.40m
20437 Boys 11-12 Long Jump 50cm Takeoff U13 4.40m
20438 Boys 11-12 Long Jump 50cm Takeoff U13 4.32m
20439 Boys 11-12 Long Jump 50cm Takeoff U13 4.13m
20440 Boys 11-12 Long Jump 50cm Takeoff U13 3.00m
20441 Boys 11-12 Long Jump 50cm Takeoff U13 3.45m
20442 Boys 11-12 Long Jump 50cm Takeoff U13 4.62m
20443 Boys 11-12 Long Jump 50cm Takeoff U13 4.08m
20444 Boys 11-12 Long Jump 50cm Takeoff U13 3.51m
20445 Boys 11-12 Long Jump 50cm Takeoff U13 3.73m

In [2181]:
df["AGE"].fillna(0, inplace=True)
df['AGE'] = df['AGE'].astype('float')

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["AGE"].fillna(0, inplace=True)


In [2182]:
df

Unnamed: 0,NAME,SEED,RESULT,TEAM,AGE,COMPETITION_RANK,EVENT_x,DOB,COUNTRY,CATEGORY_EVENT,...,DATE,MAPPED_EVENT,BENCHMARK,EVENT_y,Metric,2%,3.5%,5%,RESULT_CONV,SEED_CONV
0,Ethan Yan,,04:32.4,Singapore,0.0,35,"Mile, Road",11-Jan-01,,Mid,...,2023,,,,,,,,272.40,272.40
1,Romaine Soh,,05:24.2,Singapore,0.0,28,"Mile, Road",26-Nov-94,,Mid,...,2023,,,,,,,,324.20,324.20
2,Chui Ling Goh,,02:10.2,Singapore,0.0,5,800m,27-Nov-92,,Mid,...,2023,800m,2:09.15,800m,129.15,131.7330,133.67025,135.6075,130.20,130.20
3,Chui Ling Goh,,04:29.6,Singapore,0.0,12,1500m,27-Nov-92,,Mid,...,2023,1500m,4:26.33,1500m,266.33,271.6566,275.65155,279.6465,269.60,269.60
4,Chui Ling Goh,,02:09.8,Singapore,0.0,5,800m,27-Nov-92,,Mid,...,2023,800m,2:09.15,800m,129.15,131.7330,133.67025,135.6075,129.80,129.80
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20747,"{677: 'Toh, Eddie', 1521: 'Tan, Kevin', 1122: ...",,48.33,Club ZOOM,0.0,1,Men 35-99 4x100 Meter Relay (160-199) To Masters,,,Relay,...,2023,4 x 100m relay,39.36,4 x 100m relay,39.36,40.1472,40.73760,41.3280,48.33,48.33
20748,"{486: 'Richardson, James', 1388: 'Chin, Khen T...",,49.84,Singapore Masters Track & Fiel,0.0,1,Men 35-99 4x100 Meter Relay (200-239) To Masters,,,Relay,...,2023,4 x 100m relay,39.36,4 x 100m relay,39.36,40.1472,40.73760,41.3280,49.84,49.84
20749,"{1087: 'Adrian, Tan', 1084: 'Kok Keong, Lee', ...",,50.19,Erovra Club,0.0,2,Men 35-99 4x100 Meter Relay (200-239) To Masters,,,Relay,...,2023,4 x 100m relay,39.36,4 x 100m relay,39.36,40.1472,40.73760,41.3280,50.19,50.19
20750,"{147: 'Poh, Eelyn', 145: 'Chew, Cassandra', 14...",,58.65,Singapore Masters Track & Fiel,0.0,1,Women 35-99 4x100 Meter Relay (160-199) To Mas...,,,Relay,...,2023,4 x 100m relay,44.58,4 x 100m relay,44.58,45.4716,46.14030,46.8090,58.65,58.65


In [2183]:
# Choose SEED if better than RESULT

condition1=df['SEED_CONV']>df['RESULT_CONV']
condition2=((df['CATEGORY_EVENT']=='Jump')|(df['CATEGORY_EVENT']=='Throw'))
condition3=df['SEED_CONV']<df['RESULT_CONV']
condition4=~((df['CATEGORY_EVENT']=='Jump')|(df['CATEGORY_EVENT']=='Throw'))


df['RESULT_BEST']=df['SEED_CONV'].where((condition1 & condition2)|(condition3 & condition4), df['RESULT_CONV'].values)

In [2184]:
df

Unnamed: 0,NAME,SEED,RESULT,TEAM,AGE,COMPETITION_RANK,EVENT_x,DOB,COUNTRY,CATEGORY_EVENT,...,MAPPED_EVENT,BENCHMARK,EVENT_y,Metric,2%,3.5%,5%,RESULT_CONV,SEED_CONV,RESULT_BEST
0,Ethan Yan,,04:32.4,Singapore,0.0,35,"Mile, Road",11-Jan-01,,Mid,...,,,,,,,,272.40,272.40,272.40
1,Romaine Soh,,05:24.2,Singapore,0.0,28,"Mile, Road",26-Nov-94,,Mid,...,,,,,,,,324.20,324.20,324.20
2,Chui Ling Goh,,02:10.2,Singapore,0.0,5,800m,27-Nov-92,,Mid,...,800m,2:09.15,800m,129.15,131.7330,133.67025,135.6075,130.20,130.20,130.20
3,Chui Ling Goh,,04:29.6,Singapore,0.0,12,1500m,27-Nov-92,,Mid,...,1500m,4:26.33,1500m,266.33,271.6566,275.65155,279.6465,269.60,269.60,269.60
4,Chui Ling Goh,,02:09.8,Singapore,0.0,5,800m,27-Nov-92,,Mid,...,800m,2:09.15,800m,129.15,131.7330,133.67025,135.6075,129.80,129.80,129.80
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20747,"{677: 'Toh, Eddie', 1521: 'Tan, Kevin', 1122: ...",,48.33,Club ZOOM,0.0,1,Men 35-99 4x100 Meter Relay (160-199) To Masters,,,Relay,...,4 x 100m relay,39.36,4 x 100m relay,39.36,40.1472,40.73760,41.3280,48.33,48.33,48.33
20748,"{486: 'Richardson, James', 1388: 'Chin, Khen T...",,49.84,Singapore Masters Track & Fiel,0.0,1,Men 35-99 4x100 Meter Relay (200-239) To Masters,,,Relay,...,4 x 100m relay,39.36,4 x 100m relay,39.36,40.1472,40.73760,41.3280,49.84,49.84,49.84
20749,"{1087: 'Adrian, Tan', 1084: 'Kok Keong, Lee', ...",,50.19,Erovra Club,0.0,2,Men 35-99 4x100 Meter Relay (200-239) To Masters,,,Relay,...,4 x 100m relay,39.36,4 x 100m relay,39.36,40.1472,40.73760,41.3280,50.19,50.19,50.19
20750,"{147: 'Poh, Eelyn', 145: 'Chew, Cassandra', 14...",,58.65,Singapore Masters Track & Fiel,0.0,1,Women 35-99 4x100 Meter Relay (160-199) To Mas...,,,Relay,...,4 x 100m relay,44.58,4 x 100m relay,44.58,45.4716,46.14030,46.8090,58.65,58.65,58.65


In [2185]:
df[df['NAME']=='Lucas Le Cong Fun']

Unnamed: 0,NAME,SEED,RESULT,TEAM,AGE,COMPETITION_RANK,EVENT_x,DOB,COUNTRY,CATEGORY_EVENT,...,MAPPED_EVENT,BENCHMARK,EVENT_y,Metric,2%,3.5%,5%,RESULT_CONV,SEED_CONV,RESULT_BEST
15,Lucas Le Cong Fun,,05:11.2,Singapore,0.0,7,1500m,11-Jan-06,,Mid,...,1500m,,,,,,,311.2,311.2,311.2
29,Lucas Le Cong Fun,,1.87,Singapore,0.0,3,High Jump,11-Jan-06,,Jump,...,High jump,2.15 m,High jump,2.15,2.107,2.07475,2.0425,1.87,1.87,1.87
30,Lucas Le Cong Fun,,3.4,Singapore,0.0,3,Pole Vault,11-Jan-06,,Jump,...,Pole vault,5.20 m,Pole vault,5.2,5.096,5.018,4.94,3.4,3.4,3.4
31,Lucas Le Cong Fun,,6.3,Singapore,0.0,5,Long Jump,11-Jan-06,,Jump,...,Long jump,7.62,Long jump,7.62,7.4676,7.3533,7.239,6.3,6.3,6.3
32,Lucas Le Cong Fun,,1.9,Singapore,0.0,11,High Jump,11-Jan-06,,Jump,...,High jump,2.15 m,High jump,2.15,2.107,2.07475,2.0425,1.9,1.9,1.9
96,Lucas Le Cong Fun,,11.42,Singapore,0.0,6,Shot Put (5kg),11-Jan-06,,Throw,...,Shot put,17.3,Shot put,17.3,16.954,16.6945,16.435,11.42,11.42,11.42
97,Lucas Le Cong Fun,,39.67,Singapore,0.0,2,Discus Throw (1.5kg),11-Jan-06,,Throw,...,Discus throw,50.02,Discus throw,50.02,49.0196,48.2693,47.519,39.67,39.67,39.67
98,Lucas Le Cong Fun,,47.94,Singapore,0.0,1,Javelin Throw (700g),11-Jan-06,,Throw,...,Javelin throw,66.2,Javelin throw,66.2,64.876,63.883,62.89,47.94,47.94,47.94
147,Lucas Le Cong Fun,,11.83,Singapore,0.0,5,100m,11-Jan-06,,Sprint,...,100m,10.443,100m,10.443,10.65186,10.808505,10.96515,11.83,11.83,11.83
148,Lucas Le Cong Fun,,53.95,Singapore,0.0,7,400m,11-Jan-06,,Sprint,...,400m,46.63,400m,46.63,47.5626,48.26205,48.9615,53.95,53.95,53.95


In [2186]:
mask = df['CATEGORY_EVENT'].str.contains(r'Jump|Throw', na=True)

df.loc[mask, 'Delta2'] = df['RESULT_BEST']-df['2%']
df.loc[mask, 'Delta3.5'] = df['RESULT_BEST']-df['3.5%']
df.loc[mask, 'Delta5'] = df['RESULT_BEST']-df['5%']

df.loc[~mask, 'Delta2'] =  df['2%'] - df['RESULT_BEST']
df.loc[~mask, 'Delta3.5'] = df['3.5%'] - df['RESULT_BEST']
df.loc[~mask, 'Delta5'] = df['5%'] - df['RESULT_BEST']



#rslt_df['Delta2']=rslt_df['2pc']-rslt_df['RESULT_CONV']
#rslt_df['Delta35']=rslt_df['35pc']-rslt_df['RESULT_CONV']
#rslt_df['Delta5']=rslt_df['5pc']-rslt_df['RESULT_CONV']
df=df.loc[df['COMPETITION']!='SEA Games']

In [2187]:
# Performance metric to filter out athletes

df['PERF_SCALAR']=df['Delta5']/df['Metric']*100

In [2188]:
df.to_csv('df_test.csv', sep=',', encoding='utf-8-sig', index=False)


In [2189]:
# Correct some of the known name variations

df['NAME'] = df['NAME'].replace(regex=r'PRAHARSH, RYAN', value='S/O SUBASH SOMAN, PRAHARSH RYAN')
df['NAME'] = df['NAME'].replace(regex=r'Ryan, Praharsh', value='S/O SUBASH SOMAN, PRAHARSH RYAN')
df['NAME'] = df['NAME'].replace(regex=r'Ryan Praharsh', value='S/O SUBASH SOMAN, PRAHARSH RYAN')

df['NAME'] = df['NAME'].replace(regex=r'Ryan Praharsh', value='S/O SUBASH SOMAN, PRAHARSH RYAN')

df['NAME'] = df['NAME'].replace(regex=r'Norhisham, Jamie El Redha Ang El Redha Ang', value='Norhisham, Jamie El Redha Ang')



df['NAME'] = df['NAME'].replace(regex=r'TAN, ELIZABETH ANN SHEE R', value='TAN, ELIZABETH-ANN')
df['NAME'] = df['NAME'].replace(regex=r'Tan, Elizabeth Ann', value='TAN, ELIZABETH-ANN')
df['NAME'] = df['NAME'].replace(regex=r'TAN, SHEE RU, ELIZABETH-A', value='TAN, ELIZABETH-ANN')
df['NAME'] = df['NAME'].replace(regex=r'Elizabeth-Ann Tan', value='TAN, ELIZABETH-ANN')
df['NAME'] = df['NAME'].replace(regex=r'Tan, Elizabeth-Ann', value='TAN, ELIZABETH-ANN')



df['NAME'] = df['NAME'].replace(regex=r'LOUIS, MARC BRIAN', value='Louis, Marc Brian')
df['NAME'] = df['NAME'].replace(regex=r'LOUIS, MARC BRIAN BRIAN', value='Louis, Marc Brian')
df['NAME'] = df['NAME'].replace(regex=r'Louis, Marc Brian Brian', value='Louis, Marc Brian')
df['NAME'] = df['NAME'].replace(regex=r'Louis, Marc', value='Louis, Marc Brian')
df['NAME'] = df['NAME'].replace(regex=r'Marc Brian Louis', value='Louis, Marc Brian')
df['NAME'] = df['NAME'].replace(regex=r'Brian louis, Marc', value='Louis, Marc Brian')

df['NAME'] = df['NAME'].replace(regex=r'RAPHAEL, RYAN', value='Raphael, Ryan')
df['NAME'] = df['NAME'].replace(regex=r'NG, RYAN RAPHAEL', value='Raphael, Ryan')



df['NAME'] = df['NAME'].replace(regex=r'TAN JUN JIE', value='Tan, Jun Jie')
df['NAME'] = df['NAME'].replace(regex=r'Tan Jun Jie', value='Tan, Jun Jie')

df['NAME'] = df['NAME'].replace(regex=r'Shanti Veronica Pereira', value='PEREIRA, VERONICA SHANTI')
df['NAME'] = df['NAME'].replace(regex=r'LIEW, YEE LING, BERNICE', value='Liew, Bernice')
df['NAME'] = df['NAME'].replace(regex=r'LIEW, BERNICE YEE LING', value='Liew, Bernice')

df['NAME'] = df['NAME'].replace(regex=r'Mark Lee', value='LEE, MARK REN')




df['NAME'] = df['NAME'].replace(regex=r'SNG, MICHELLE', value='Sng, Michelle')
df['NAME'] = df['NAME'].replace(regex=r'SNG, SUAT LI, MICHELLE', value='Sng, Michelle')

df['NAME'] = df['NAME'].replace(regex=r'MUN, IVAN', value='Mun, Ivan')
df['NAME'] = df['NAME'].replace(regex=r'LOW, JUN YU', value='Low, Jun Yu')

df['NAME'] = df['NAME'].replace(regex=r'ANG, CHEN XIANG', value='Ang, Chen Xiang')
df['NAME'] = df['NAME'].replace(regex=r'LIM, OLIVER', value='Lim, Oliver')

df['NAME'] = df['NAME'].replace(regex=r'HO, XANDER, ANN HENG', value='Ho, Xander Ann Heng')
df['NAME'] = df['NAME'].replace(regex=r'XANDER, HO ANN HENG', value='Ho, Xander Ann Heng')
df['NAME'] = df['NAME'].replace(regex=r'Ho Ann Heng, Xander', value='Ho, Xander Ann Heng')


df['NAME'] = df['NAME'].replace(regex=r'CHUA, JOSHUA', value='Chua, Joshua')

df['NAME'] = df['NAME'].replace(regex=r'LEE YU FOONG (LI YUFENG)', value='Lee, Yu Foong')
df['NAME'] = df['NAME'].replace(regex=r'YAP, SEAN YEE', value='Yap, Sean Yee')
df['NAME'] = df['NAME'].replace(regex=r'KRISHNAN, HARIHARAN S/O', value='S/O Krishna, Hariharan')
df['NAME'] = df['NAME'].replace(regex=r'Norhisham, Jamie', value='Norhisham, Jamie El Redha Ang')

df['NAME'] = df['NAME'].replace(regex=r'ALEXIS TEO PEI QI', value='Teo, Alexis')
df['NAME'] = df['NAME'].replace(regex=r'HENG, FU HONG, BRANDON', value='Heng, Brandon')


df['NAME'] = df['NAME'].replace(regex=r'Shaun, Goh', value='GOH, SHAUN')

df['NAME'] = df['NAME'].replace(regex=r'Enriquez, Roxanne Rose Zulueta', value='ROSE ZULUETA ENRIQUE, ROXANNE')
df['NAME'] = df['NAME'].replace(regex=r'ENRIQUEZ, ROXANNE ROSE ZULUETA', value='ROSE ZULUETA ENRIQUE, ROXANNE')
df['NAME'] = df['NAME'].replace(regex=r'ENRIQUEZ, ROSE ZULUETA, ROXANN', value='ROSE ZULUETA ENRIQUE, ROXANNE')

df['NAME'] = df['NAME'].replace(regex=r'Goh, Clara', value='GOH, CLARA')

df['NAME'] = df['NAME'].replace(regex=r'WONG JIN ROU', value='Wong, Jin Rou')
df['NAME'] = df['NAME'].replace(regex=r'Michelle Sng Suat Li', value='Michelle Sng')
df['NAME'] = df['NAME'].replace(regex=r'Sng, Michelle', value='Michelle Sng')

df['NAME'] = df['NAME'].replace(regex=r'CHONG LE XUAN, CHLOE', value='Chong Le Xuan, Chloe')

df['NAME'] = df['NAME'].replace(regex=r'Shun Yi Audrey Koh', value='KOH SHUN YI AUDREY')

df['NAME'] = df['NAME'].replace(regex=r'THANA RAJAN, S/O, THIRUBEN', value='THANA RAJAN, THIRUBEN  S/O')
df['NAME'] = df['NAME'].replace(regex=r'Rajan Thiruben Thana', value='THANA RAJAN, THIRUBEN  S/O')

df['NAME'] = df['NAME'].replace(regex=r'Zubin Percy Muncherji', value='PERCY MUNCHERJI, ZUBIN')
df['NAME'] = df['NAME'].replace(regex=r'Muncherji, Zubin', value='PERCY MUNCHERJI, ZUBIN')
df['NAME'] = df['NAME'].replace(regex=r'MUNCHERJI, ZUBIN PERCY', value='PERCY MUNCHERJI, ZUBIN')

df['NAME'] = df['NAME'].replace(regex=r'Raam Kumar Muthukumaran', value='Muthukumaran, Raam Kumar')

df['NAME'] = df['NAME'].replace(regex=r'CHIA, WEI EN, SHAWN', value='Chia, Shawn')
df['NAME'] = df['NAME'].replace(regex=r'FAYIZ, MOHAMED HAJA', value='Fayiz, Mohamed Haja')
df['NAME'] = df['NAME'].replace(regex=r'., Hariharan', value='S/O Krishna, Hariharan')
df['NAME'] = df['NAME'].replace(regex=r'Chia Wei En, Shawn', value='Chia, Shawn')

df['NAME'] = df['NAME'].replace(regex=r'S/O KrishnS/O Krishna, Hariharan', value='S/O Krishna, Hariharan')

df['NAME'] = df['NAME'].replace(regex=r'Wong Yijie Lucas', value='Wong, Yijie Lucas')

df['NAME'] = df['NAME'].replace(regex=r'QUEK XIN YONG JEREMIAH', value='QUEK, JEREMIAH')
df['NAME'] = df['NAME'].replace(regex=r'quek xin yong jeremi', value='QUEK, JEREMIAH')

df['NAME'] = df['NAME'].replace(regex=r'Han Qi Feng', value='Feng, Han Qi')
df['NAME'] = df['NAME'].replace(regex=r'FENG HAN QI', value='Feng, Han Qi')

df['NAME'] = df['NAME'].replace(regex=r'Chiu, Jingwen', value='Chiu, Jing Wen')

df['NAME'] = df['NAME'].replace(regex=r'Brandon Heng Fu Hong', value='Heng, Brandon')
df['NAME'] = df['NAME'].replace(regex=r'LEE YU FOONG (LI YUFENG)', value='Lee, Yu Foong')

df['NAME'] = df['NAME'].replace(regex=r'YEE, CHUN WAI, ERIC', value='Yee Chun Wai, Eric')
df['NAME'] = df['NAME'].replace(regex=r'Yee, Chun Wai, Eric', value='Yee Chun Wai, Eric')

df['NAME'] = df['NAME'].replace(regex=r'LOW CHUAN YI', value='Low, Chuan Yi')

df['NAME'] = df['NAME'].replace(regex=r'ESTHER TAY SHEE WEI', value='Tay, Esther')
df['NAME'] = df['NAME'].replace(regex=r'Esther Tay', value='Tay, Esther')

df['NAME'] = df['NAME'].replace(regex=r'Rachel Yang Bing Jie', value='YANG, RACHEL')

df['NAME'] = df['NAME'].replace(regex=r'Jun Yu Low', value='Low, Jun Yu')
df['NAME'] = df['NAME'].replace(regex=r'Low Jun Yu', value='Low, Jun Yu')

df['NAME'] = df['NAME'].replace(regex=r'Dewey Ng', value='Ng, Dewey')
df['NAME'] = df['NAME'].replace(regex=r'NG, DEWEY', value='Ng, Dewey')

df['NAME'] = df['NAME'].replace(regex=r'., Brendon Ting Li King', value='Ting Li King, Brandon')
df['NAME'] = df['NAME'].replace(regex=r'TAN KIAN SWEE ISAAC', value='Tan, Isaac')
df['NAME'] = df['NAME'].replace(regex=r'TAN, KIAN SWEE ISAAC', value='Tan, Isaac')

df['NAME'] = df['NAME'].replace(regex=r'GAO SHENGWEI', value='Gao, Shengwei')
df['NAME'] = df['NAME'].replace(regex=r'NEO SHYH KAI', value='Neo, Shyh Kai')
df['NAME'] = df['NAME'].replace(regex=r'ANGEL LIM YU XIN', value='Lim, Yuxin Angel')
df['NAME'] = df['NAME'].replace(regex=r'LAUREL, LIM JIA EN', value='LAUREL LIM JIA EN')

df['NAME'] = df['NAME'].replace(regex=r'LOH DING RONG, ANSON', value='Loh, Anson')
df['NAME'] = df['NAME'].replace(regex=r'CHUA HSIN-WEN CLARA', value='Chua, Clara')

df['NAME'] = df['NAME'].replace(regex=r'SEAN, TEE YU XIANG', value='SEAN TEE YU XIANG')
df['NAME'] = df['NAME'].replace(regex=r'NG JUN HENG, RYAN', value='Ng, Ryan')
df['NAME'] = df['NAME'].replace(regex=r'LIM WEE ENG', value='Lim, Wee Eng')

df['NAME'] = df['NAME'].replace(regex=r'FAITH JIA YIN KOH', value='Koh, Faith')

df['NAME'] = df['NAME'].replace(regex=r'Chen Xiang Ang', value='Ang, Chen Xiang')
df['NAME'] = df['NAME'].replace(regex=r'CHUA JE-AN, GARRETT', value='Chua, Garrett')
df['NAME'] = df['NAME'].replace(regex=r'Looi, Yan Ning Samantha', value='LOOI YAN NING, SAMANTHA')
df['NAME'] = df['NAME'].replace(regex=r'Lim, Sheila Qi Le', value='Lim Qi Le, Sheila')
df['NAME'] = df['NAME'].replace(regex=r'FAITH JIA YIN KOH', value='Koh, Faith')
df['NAME'] = df['NAME'].replace(regex=r'NG, CHIN HUI', value='Ng, Chin Hui')
df['NAME'] = df['NAME'].replace(regex=r'SEAN, TEE YU XIANG', value='SEAN TEE YU XIANG')
df['NAME'] = df['NAME'].replace(regex=r'CHUA HSIN-WEN CLARA', value='Chua, Clara')

df['NAME'] = df['NAME'].replace(regex=r'Edsel Poh', value='Poh, Edsel')
df['NAME'] = df['NAME'].replace(regex=r'Gautam, Kulandaiveludayar Se', value='Kulandaiveludayar Se, Gautam')
df['NAME'] = df['NAME'].replace(regex=r'FAITH ZHEN FORD', value='Faith Zhen Ford')
df['NAME'] = df['NAME'].replace(regex=r'FAITH ZHEN FORD', value='Faith Zhen Ford')


In [2190]:
df

Unnamed: 0,NAME,SEED,RESULT,TEAM,AGE,COMPETITION_RANK,EVENT_x,DOB,COUNTRY,CATEGORY_EVENT,...,2%,3.5%,5%,RESULT_CONV,SEED_CONV,RESULT_BEST,Delta2,Delta3.5,Delta5,PERF_SCALAR
0,Ethan Yan,,04:32.4,Singapore,0.0,35,"Mile, Road",11-Jan-01,,Mid,...,,,,272.40,272.40,272.40,,,,
1,Romaine Soh,,05:24.2,Singapore,0.0,28,"Mile, Road",26-Nov-94,,Mid,...,,,,324.20,324.20,324.20,,,,
2,Chui Ling Goh,,02:10.2,Singapore,0.0,5,800m,27-Nov-92,,Mid,...,131.7330,133.67025,135.6075,130.20,130.20,130.20,1.5330,3.47025,5.4075,4.186992
3,Chui Ling Goh,,04:29.6,Singapore,0.0,12,1500m,27-Nov-92,,Mid,...,271.6566,275.65155,279.6465,269.60,269.60,269.60,2.0566,6.05155,10.0465,3.772200
4,Chui Ling Goh,,02:09.8,Singapore,0.0,5,800m,27-Nov-92,,Mid,...,131.7330,133.67025,135.6075,129.80,129.80,129.80,1.9330,3.87025,5.8075,4.496709
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20747,"{677: 'Toh, Eddie', 1521: 'Tan, Kevin', 1122: ...",,48.33,Club ZOOM,0.0,1,Men 35-99 4x100 Meter Relay (160-199) To Masters,,,Relay,...,40.1472,40.73760,41.3280,48.33,48.33,48.33,-8.1828,-7.59240,-7.0020,-17.789634
20748,"{486: 'Richardson, James', 1388: 'Chin, Khen T...",,49.84,Singapore Masters Track & Fiel,0.0,1,Men 35-99 4x100 Meter Relay (200-239) To Masters,,,Relay,...,40.1472,40.73760,41.3280,49.84,49.84,49.84,-9.6928,-9.10240,-8.5120,-21.626016
20749,"{1087: 'Adrian, Tan', 1084: 'Kok Keong, Lee', ...",,50.19,Erovra Club,0.0,2,Men 35-99 4x100 Meter Relay (200-239) To Masters,,,Relay,...,40.1472,40.73760,41.3280,50.19,50.19,50.19,-10.0428,-9.45240,-8.8620,-22.515244
20750,"{147: 'Poh, Eelyn', 145: 'Chew, Cassandra', 14...",,58.65,Singapore Masters Track & Fiel,0.0,1,Women 35-99 4x100 Meter Relay (160-199) To Mas...,,,Relay,...,45.4716,46.14030,46.8090,58.65,58.65,58.65,-13.1784,-12.50970,-11.8410,-26.561238


In [2191]:
# Exclude foreigners from MALAYSIA, THAILAND etc.

df_select = df[(df['TEAM']!='Malaysia') & (df['TEAM']!='THAILAND') & (df['TEAM']!='China') & (df['TEAM']!='South Korea') & (df['TEAM']!='Laos') & (df['TEAM']!='Philippines') & (df['TEAM']!='Piboonbumpen Thailand') & (df['TEAM']!='Chinese Taipei') & (df['TEAM']!='Gurkha Contingent') & (df['TEAM']!='Australia') & (df['TEAM']!='Piboonbumpen Thailand')] 


In [2192]:
df_select

Unnamed: 0,NAME,SEED,RESULT,TEAM,AGE,COMPETITION_RANK,EVENT_x,DOB,COUNTRY,CATEGORY_EVENT,...,2%,3.5%,5%,RESULT_CONV,SEED_CONV,RESULT_BEST,Delta2,Delta3.5,Delta5,PERF_SCALAR
0,Ethan Yan,,04:32.4,Singapore,0.0,35,"Mile, Road",11-Jan-01,,Mid,...,,,,272.40,272.40,272.40,,,,
1,Romaine Soh,,05:24.2,Singapore,0.0,28,"Mile, Road",26-Nov-94,,Mid,...,,,,324.20,324.20,324.20,,,,
2,Chui Ling Goh,,02:10.2,Singapore,0.0,5,800m,27-Nov-92,,Mid,...,131.7330,133.67025,135.6075,130.20,130.20,130.20,1.5330,3.47025,5.4075,4.186992
3,Chui Ling Goh,,04:29.6,Singapore,0.0,12,1500m,27-Nov-92,,Mid,...,271.6566,275.65155,279.6465,269.60,269.60,269.60,2.0566,6.05155,10.0465,3.772200
4,Chui Ling Goh,,02:09.8,Singapore,0.0,5,800m,27-Nov-92,,Mid,...,131.7330,133.67025,135.6075,129.80,129.80,129.80,1.9330,3.87025,5.8075,4.496709
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20747,"{677: 'Toh, Eddie', 1521: 'Tan, Kevin', 1122: ...",,48.33,Club ZOOM,0.0,1,Men 35-99 4x100 Meter Relay (160-199) To Masters,,,Relay,...,40.1472,40.73760,41.3280,48.33,48.33,48.33,-8.1828,-7.59240,-7.0020,-17.789634
20748,"{486: 'Richardson, James', 1388: 'Chin, Khen T...",,49.84,Singapore Masters Track & Fiel,0.0,1,Men 35-99 4x100 Meter Relay (200-239) To Masters,,,Relay,...,40.1472,40.73760,41.3280,49.84,49.84,49.84,-9.6928,-9.10240,-8.5120,-21.626016
20749,"{1087: 'Adrian, Tan', 1084: 'Kok Keong, Lee', ...",,50.19,Erovra Club,0.0,2,Men 35-99 4x100 Meter Relay (200-239) To Masters,,,Relay,...,40.1472,40.73760,41.3280,50.19,50.19,50.19,-10.0428,-9.45240,-8.8620,-22.515244
20750,"{147: 'Poh, Eelyn', 145: 'Chew, Cassandra', 14...",,58.65,Singapore Masters Track & Fiel,0.0,1,Women 35-99 4x100 Meter Relay (160-199) To Mas...,,,Relay,...,45.4716,46.14030,46.8090,58.65,58.65,58.65,-13.1784,-12.50970,-11.8410,-26.561238


In [2193]:
df_select[df_select['NAME']=='Lim, Joshua']

Unnamed: 0,NAME,SEED,RESULT,TEAM,AGE,COMPETITION_RANK,EVENT_x,DOB,COUNTRY,CATEGORY_EVENT,...,2%,3.5%,5%,RESULT_CONV,SEED_CONV,RESULT_BEST,Delta2,Delta3.5,Delta5,PERF_SCALAR
4744,"Lim, Joshua",52.47,52.43,Wings Athletics Club,29.0,2,Men 400 Meter Dash Open,1994-12-28,,Sprint,...,47.5626,48.26205,48.9615,52.43,52.47,52.43,-4.8674,-4.16795,-3.4685,-7.438344
5009,"Lim, Joshua",2:03.22,2:07.65,Wings Athletics Club,29.0,5,Men 800 Meter Run Open,1994-12-28,,Mid,...,116.1372,117.8451,119.553,127.65,123.22,123.22,-7.0828,-5.3749,-3.667,-3.220622
5731,"Lim, Joshua",2:03.22,2:04.77,Wings Athletics Club,29.0,7,Men 800 Meter Run Open,1994-12-28 00:00:00.000,,Mid,...,116.1372,117.8451,119.553,124.77,123.22,123.22,-7.0828,-5.3749,-3.667,-3.220622
5873,"Lim, Joshua",,54.14,Wings Athletics Club,29.0,7,Men 400 Meter Dash Open,1994-12-28 00:00:00.000,,Sprint,...,47.5626,48.26205,48.9615,54.14,54.14,54.14,-6.5774,-5.87795,-5.1785,-11.105511
10533,"Lim, Joshua",53.21,52.47,Wings Athletics Club,29.0,16,Men 400 Meter Dash Open,1994-12-28 00:00:00.000,,Sprint,...,47.5626,48.26205,48.9615,52.47,53.21,52.47,-4.9074,-4.20795,-3.5085,-7.524126
11966,"Lim, Joshua",52.43,53.39,Wings Athletics Club,29.0,11,Men 400 Meter Dash Open,1994-12-28,,Sprint,...,47.5626,48.26205,48.9615,53.39,52.43,52.43,-4.8674,-4.16795,-3.4685,-7.438344
16074,"Lim, Joshua",2:03.00,2:04.54,Wings Athletics Club,29.0,7,Men 800 Meter Run Open,1994-12-28 00:00:00.000,,Mid,...,116.1372,117.8451,119.553,124.54,123.0,123.0,-6.8628,-5.1549,-3.447,-3.027402
16081,"Lim, Joshua",2:03.00,2:09.06,Wings Athletics Club,29.0,9,Men 800 Meter Run Open,1994-12-28 00:00:00.000,,Mid,...,116.1372,117.8451,119.553,129.06,123.0,123.0,-6.8628,-5.1549,-3.447,-3.027402
16527,"Lim, Joshua",54.14,53.21,Wings Athletics Club,29.0,4,Men 400 Meter Dash Open,1994-12-28 00:00:00.000,,Sprint,...,47.5626,48.26205,48.9615,53.21,54.14,53.21,-5.6474,-4.94795,-4.2485,-9.111087
16530,"Lim, Joshua",54.14,52.63,Wings Athletics Club,29.0,2,Men 400 Meter Dash Open,1994-12-28 00:00:00.000,,Sprint,...,47.5626,48.26205,48.9615,52.63,54.14,52.63,-5.0674,-4.36795,-3.6685,-7.867253


In [2194]:
#df_calvin.to_csv("df_calvin.csv", encoding='utf-8')

In [2195]:
foreigners = pd.read_csv('/Users/veesheenyuen/Desktop/DataScience/SAA/MM/List of Foreigners.csv', encoding='latin-1')


In [2196]:
foreigners['V1'] = foreigners['LAST_NAME']+' '+foreigners['FIRST_NAME']
foreigners['V2'] = foreigners['FIRST_NAME']+' '+foreigners['LAST_NAME']
foreigners['V3'] = foreigners['LAST_NAME']+', '+foreigners['FIRST_NAME']
foreigners['V4'] = foreigners['FIRST_NAME']+' '+foreigners['LAST_NAME']

for1 = foreigners['V1'].dropna().tolist()
for2 = foreigners['V2'].dropna().tolist()
for3 = foreigners['V3'].dropna().tolist()
for4 = foreigners['V4'].dropna().tolist()

foreign_list = for1+for2+for3+for4 

foreign_list_casefold=[s.casefold() for s in foreign_list]

exclusions = foreign_list_casefold

excluded_list = df_select.loc[~df['NAME'].str.casefold().isin(exclusions)]  # ~ means NOT IN. DROP spex carded athletes

In [2197]:
#rslt_df = df.loc[(df['RESULT_CONV'] < df['2pc']) & (df['AGE']<40) & (df['EVENT']!='Marathon')]

In [2198]:
#rslt_df = df.loc[(((df['CATEGORY_EVENT']=='Jump')|(df['CATEGORY_EVENT']=='Throw'))&(df['RESULT_CONV'] >= df['5pc']) & (df['AGE']<40) & ((df['EVENT']!='Marathon')|(df['AGE']<60) & (df['EVENT']=='Marathon')))]

In [2199]:
# Choose the best performing event for each athlete

top_performers_clean = excluded_list.sort_values(['NAME','PERF_SCALAR'],ascending=False).groupby('NAME').head(1) # Choose top performing event per NAME


In [2200]:
top_performers_clean[top_performers_clean['NAME']=='Ng, Chin Hui']

Unnamed: 0,NAME,SEED,RESULT,TEAM,AGE,COMPETITION_RANK,EVENT_x,DOB,COUNTRY,CATEGORY_EVENT,...,2%,3.5%,5%,RESULT_CONV,SEED_CONV,RESULT_BEST,Delta2,Delta3.5,Delta5,PERF_SCALAR
10539,"Ng, Chin Hui",49.81,49.14,Singapore,29.0,11,Men 400 Meter Dash Open,1994-12-01 00:00:00.000,,Sprint,...,47.5626,48.26205,48.9615,49.14,49.81,49.14,-1.5774,-0.87795,-0.1785,-0.382801


In [2201]:
top_performers_clean.to_csv('top_performers_clean.csv', sep=',', encoding='utf-8-sig', index=False)


In [2202]:
# Filter top 8 performances for each event and gender

top_performers = top_performers_clean.sort_values(by=['MAPPED_EVENT', 'GENDER', 'PERF_SCALAR'],ascending=[False, False, False]).groupby(['GENDER', 'MAPPED_EVENT']).head(8).reset_index(drop=True)




In [2203]:
top_performers.to_csv('top_performers_checkpoint.csv', sep=',', encoding='utf-8-sig', index=False)


In [1658]:
# Apply Rule 4 above...if an athlete is in more than one event, choose the best performing one


#top_performers_final = top_performers_sorted.sort_values(by=['MAPPED_EVENT', 'GENDER', 'PERF_SCALAR'], ascending=[False, False, False], na_position='last')


In [1659]:
top_performers_final.to_csv('top_performers_final.csv', sep=',', encoding='utf-8-sig', index=False)


In [1095]:
df_top = top_performers_final

In [1096]:
df_top

Unnamed: 0,NAME,RESULT,AGE,COMPETITION_RANK,EVENT_x,DOB,COUNTRY,CATEGORY_EVENT,GENDER,COMPETITION,...,EVENT_y,Metric,2%,3.5%,5%,RESULT_CONV,Delta2,Delta3.5,Delta5,PERF_SCALAR
0,"Ting Li King, Brandon",15.03m,21.0,1,Men Triple Jump Open,2002-01-09 00:00:00.000,,Jump,Male,83rd Singapore Open Track & Field,...,Triple jump,15.7,15.386,15.1505,14.915,15.03,-0.356,-0.1205,0.115,0.732484
1,"ANURA, ANDRE",14.93m,24.0,2,Men Triple Jump Open,1999-06-12 00:00:00.000,,Jump,Male,83rd Singapore Open Track & Field,...,Triple jump,15.7,15.386,15.1505,14.915,14.93,-0.456,-0.2205,0.015,0.095541
2,"Lee, Gabriel",14.88m,20.0,3,Men Triple Jump Open,2003-02-23 00:00:00.000,,Jump,Male,83rd Singapore Open Track & Field,...,Triple jump,15.7,15.386,15.1505,14.915,14.88,-0.506,-0.2705,-0.035,-0.222930
3,"., Brendon Ting Li King",14.86m,21.0,1,Men 16-99 Triple Jump Open,2002-01-09,,Jump,Male,10th Club ZOOM Kindred Spirit Series 2023,...,Triple jump,15.7,15.386,15.1505,14.915,14.86,-0.526,-0.2905,-0.055,-0.350318
5,TAN KIAN SWEE ISAAC,14.31,0.0,1.0,Triple Jump,,,Jump,Male,NSG,...,Triple jump,15.7,15.386,15.1505,14.915,14.31,-1.076,-0.8405,-0.605,-3.853503
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
597,"Ho, Kayden",53.73,6.0,8,Boys 4-6 200 Meter Run U7,2017-05-08,,Sprint,Male,10th Club ZOOM Kindred Spirit Series 2023,...,,,,,,53.73,,,,
595,"Gowreeson, Nolan",40.77,6.0,1,Boys 4-6 200 Meter Run U7,2017-01-24,,Sprint,Male,10th Club ZOOM Kindred Spirit Series 2023,...,,,,,,40.77,,,,
598,"Castelino, Ivaan",50.65,6.0,6,Boys 4-6 200 Meter Run U7,2017-08-25,,Sprint,Male,10th Club ZOOM Kindred Spirit Series 2023,...,,,,,,50.65,,,,
599,"Syed Ahmed Riadh, Sharifah Falisha",2:51.34,17.0,1,-,2006-05-20 00:00:00.000,,,Female,SA Allcomers Meet 2,...,,,,,,171.34,,,,


In [1019]:
# Apply Rule 3 & is within 5% performance band

octc_df = df_top.loc[(((df_top['CATEGORY_EVENT']=='Mid')|(df_top['CATEGORY_EVENT']=='Sprint')|(df_top['CATEGORY_EVENT']=='Long')|(df['CATEGORY_EVENT']=='Hurdles')|(df['CATEGORY_EVENT']=='Walk')|(df['CATEGORY_EVENT']=='Relay')|(df['CATEGORY_EVENT']=='Marathon')|(df_top['CATEGORY_EVENT']=='Steeple')|(df_top['CATEGORY_EVENT']=='Pentathlon')|(df_top['CATEGORY_EVENT']=='Heptathlon')|(df_top['CATEGORY_EVENT']=='Triathlon'))&(df_top['RESULT_CONV'] <= df_top['5%']) & (df_top['AGE']<40) & ((df_top['MAPPED_EVENT']!='Marathon')|(df_top['AGE']<60) & (df_top['MAPPED_EVENT']=='Marathon')))|(((df_top['CATEGORY_EVENT']=='Jump')|(df_top['CATEGORY_EVENT']=='Throw'))&(df_top['RESULT_CONV'] >= df_top['5%']) & (df_top['AGE']<40) & ((df_top['MAPPED_EVENT']!='Marathon')|(df_top['AGE']<60) & (df_top['MAPPED_EVENT']=='Marathon')))]

In [1020]:
octc_df

Unnamed: 0,NAME,RESULT,AGE,COMPETITION_RANK,EVENT_x,DOB,COUNTRY,CATEGORY_EVENT,GENDER,COMPETITION,...,EVENT_y,Metric,2%,3.5%,5%,RESULT_CONV,Delta2,Delta3.5,Delta5,PERF_SCALAR
0,"Ting Li King, Brandon",15.03m,21.0,1,Men Triple Jump Open,2002-01-09 00:00:00.000,,Jump,Male,83rd Singapore Open Track & Field,...,Triple jump,15.70,15.3860,15.15050,14.9150,15.03,-0.3560,-0.12050,0.1150,0.732484
1,"ANURA, ANDRE",14.93m,24.0,2,Men Triple Jump Open,1999-06-12 00:00:00.000,,Jump,Male,83rd Singapore Open Track & Field,...,Triple jump,15.70,15.3860,15.15050,14.9150,14.93,-0.4560,-0.22050,0.0150,0.095541
11,CHANDRU BHAVIKA,11.22,0.0,2.0,Triple Jump,,,Jump,Female,NSG,...,Triple jump,13.46,13.1908,12.98890,12.7870,11.22,-1.9708,-1.76890,-1.5670,-11.641902
12,SHARIFAH FALISHA BINTE SYED AHMED RIADH,11,0.0,3.0,Triple Jump,,,Jump,Female,NSG,...,Triple jump,13.46,13.1908,12.98890,12.7870,11.00,-2.1908,-1.98890,-1.7870,-13.276374
13,"Tan, Jurnus",10.99m,19.0,1,Women Triple Jump Open,2004-05-12,,Jump,Female,POLITE Track & Field Championships 2023,...,Triple jump,13.46,13.1908,12.98890,12.7870,10.99,-2.2008,-1.99890,-1.7970,-13.350669
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
536,"PEREIRA, VERONICA SHANTI",11.43,27.0,1,Women 100 Meter Dash Open,1996-09-20 00:00:00.000,,Sprint,Female,83rd Singapore Open Track & Field,...,100m,11.75,11.9850,12.16125,12.3375,11.43,0.5550,0.73125,0.9075,7.723404
537,"HU, Chia-Chen",11.57,26.0,2,Women 100 Meter Dash Open,1997-03-28 00:00:00.000,,Sprint,Female,83rd Singapore Open Track & Field,...,100m,11.75,11.9850,12.16125,12.3375,11.57,0.4150,0.59125,0.7675,6.531915
540,"TAN, ELIZABETH-ANN",11.91,20.0,3,Women 100 Meter Dash Open,2003-09-23 00:00:00.000,,Sprint,Female,83rd Singapore Open Track & Field,...,100m,11.75,11.9850,12.16125,12.3375,11.91,0.0750,0.25125,0.4275,3.638298
555,"Gurung, Subas",31:44.38,26.0,1,Men 10000 Meter Run Open,1997-10-22 00:00:00.000,,Long,Male,SA Allcomers Meet 2,...,10000m,1885.55,1923.2610,1951.54425,1979.8275,1904.38,18.8810,47.16425,75.4475,4.001352


In [1021]:
octc_df

Unnamed: 0,NAME,RESULT,AGE,COMPETITION_RANK,EVENT_x,DOB,COUNTRY,CATEGORY_EVENT,GENDER,COMPETITION,...,EVENT_y,Metric,2%,3.5%,5%,RESULT_CONV,Delta2,Delta3.5,Delta5,PERF_SCALAR
0,"Ting Li King, Brandon",15.03m,21.0,1,Men Triple Jump Open,2002-01-09 00:00:00.000,,Jump,Male,83rd Singapore Open Track & Field,...,Triple jump,15.70,15.3860,15.15050,14.9150,15.03,-0.3560,-0.12050,0.1150,0.732484
1,"ANURA, ANDRE",14.93m,24.0,2,Men Triple Jump Open,1999-06-12 00:00:00.000,,Jump,Male,83rd Singapore Open Track & Field,...,Triple jump,15.70,15.3860,15.15050,14.9150,14.93,-0.4560,-0.22050,0.0150,0.095541
11,CHANDRU BHAVIKA,11.22,0.0,2.0,Triple Jump,,,Jump,Female,NSG,...,Triple jump,13.46,13.1908,12.98890,12.7870,11.22,-1.9708,-1.76890,-1.5670,-11.641902
12,SHARIFAH FALISHA BINTE SYED AHMED RIADH,11,0.0,3.0,Triple Jump,,,Jump,Female,NSG,...,Triple jump,13.46,13.1908,12.98890,12.7870,11.00,-2.1908,-1.98890,-1.7870,-13.276374
13,"Tan, Jurnus",10.99m,19.0,1,Women Triple Jump Open,2004-05-12,,Jump,Female,POLITE Track & Field Championships 2023,...,Triple jump,13.46,13.1908,12.98890,12.7870,10.99,-2.2008,-1.99890,-1.7970,-13.350669
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
536,"PEREIRA, VERONICA SHANTI",11.43,27.0,1,Women 100 Meter Dash Open,1996-09-20 00:00:00.000,,Sprint,Female,83rd Singapore Open Track & Field,...,100m,11.75,11.9850,12.16125,12.3375,11.43,0.5550,0.73125,0.9075,7.723404
537,"HU, Chia-Chen",11.57,26.0,2,Women 100 Meter Dash Open,1997-03-28 00:00:00.000,,Sprint,Female,83rd Singapore Open Track & Field,...,100m,11.75,11.9850,12.16125,12.3375,11.57,0.4150,0.59125,0.7675,6.531915
540,"TAN, ELIZABETH-ANN",11.91,20.0,3,Women 100 Meter Dash Open,2003-09-23 00:00:00.000,,Sprint,Female,83rd Singapore Open Track & Field,...,100m,11.75,11.9850,12.16125,12.3375,11.91,0.0750,0.25125,0.4275,3.638298
555,"Gurung, Subas",31:44.38,26.0,1,Men 10000 Meter Run Open,1997-10-22 00:00:00.000,,Long,Male,SA Allcomers Meet 2,...,10000m,1885.55,1923.2610,1951.54425,1979.8275,1904.38,18.8810,47.16425,75.4475,4.001352


In [1022]:
octc_df.to_csv('octc_df.csv', sep=',', encoding='utf-8-sig', index=False)


In [364]:
#rslt_df['Rank'] = (rslt_df.sort_values(by=['EVENT', 'GENDER', 'Delta35'], ascending=[False, False, True])['Delta35']
#                .rank(method='first', ascending=False)
#             )


# Download list of foreigners and spex carded athletes

In [365]:
#spex_list=pd.read_csv('/Users/veesheenyuen/Desktop/DataScience/SAA/SPEX_CARDED_LIST.csv', encoding='latin-1')


In [431]:
foreigners = pd.read_csv('/Users/veesheenyuen/Desktop/DataScience/SAA/MM/List of Foreigners.csv', encoding='latin-1')


In [367]:
#spex_list

Unnamed: 0,V1,V2,V3,V4,V5,Level
0,Goh Chui Ling,"Goh, Chui Ling",,,,E3
1,Michelle Sng,"Sng, Michelle",,,,E3
2,Quek Jun Jie Calvin,"Quek, Jun Jie Calvin","QUEK, CALVIN JUN JIE",,,E3
3,"SOH RUI YONG, GUILLAUME","Soh Rui Yong, Guillaume",,,,E3
4,Aaron Justin tan wen jie,"Tan Wen Jie, Aaron Justin",,,,E3P
5,DANIEL LEOW SOON YEE,"Yee, Daniel Leow Soon",,,,E3P
6,Joshua Chua,"Chua, Joshua",,,,E3P
7,NG ZHI RONG RYAN RAPHAEL,"Ng Zhi Rong, Ryan Raphael","Raphael, Ryan",,,E3P
8,Wenli Rachel,"Wenli, Rachel",,,,E3P
9,Wong YaoHan Melvin,"Wong YaoHan, Melvin",,,,E3P


In [432]:
foreigners

Unnamed: 0,LAST_NAME,FIRST_NAME,Unnamed: 2,Unnamed: 3,Unnamed: 4
0,Aaryan,Greuter Christoph,,,
1,Akahodani,Takayuki,,,
2,Apondar,Audric,,,
3,Brooks,Ruby,,,
4,Brouwer,Cees,,,
...,...,...,...,...,...
219,CHO,CHIA-HSUAN,,,
220,NGUYEN,HOAI VAN,,,
221,NGUYEN,QUOC THINH,,,
222,PHAM,QUYNH GIANG,,,


In [433]:
foreigners['V1'] = foreigners['LAST_NAME']+' '+foreigners['FIRST_NAME']
foreigners['V2'] = foreigners['FIRST_NAME']+' '+foreigners['LAST_NAME']
foreigners['V3'] = foreigners['LAST_NAME']+', '+foreigners['FIRST_NAME']
foreigners['V4'] = foreigners['FIRST_NAME']+' '+foreigners['LAST_NAME']

In [434]:
foreigners

Unnamed: 0,LAST_NAME,FIRST_NAME,Unnamed: 2,Unnamed: 3,Unnamed: 4,V1,V2,V3,V4
0,Aaryan,Greuter Christoph,,,,Aaryan Greuter Christoph,Greuter Christoph Aaryan,"Aaryan, Greuter Christoph",Greuter Christoph Aaryan
1,Akahodani,Takayuki,,,,Akahodani Takayuki,Takayuki Akahodani,"Akahodani, Takayuki",Takayuki Akahodani
2,Apondar,Audric,,,,Apondar Audric,Audric Apondar,"Apondar, Audric",Audric Apondar
3,Brooks,Ruby,,,,Brooks Ruby,Ruby Brooks,"Brooks , Ruby",Ruby Brooks
4,Brouwer,Cees,,,,Brouwer Cees,Cees Brouwer,"Brouwer, Cees",Cees Brouwer
...,...,...,...,...,...,...,...,...,...
219,CHO,CHIA-HSUAN,,,,CHO CHIA-HSUAN,CHIA-HSUAN CHO,"CHO, CHIA-HSUAN",CHIA-HSUAN CHO
220,NGUYEN,HOAI VAN,,,,NGUYEN HOAI VAN,HOAI VAN NGUYEN,"NGUYEN , HOAI VAN",HOAI VAN NGUYEN
221,NGUYEN,QUOC THINH,,,,NGUYEN QUOC THINH,QUOC THINH NGUYEN,"NGUYEN , QUOC THINH",QUOC THINH NGUYEN
222,PHAM,QUYNH GIANG,,,,PHAM QUYNH GIANG,QUYNH GIANG PHAM,"PHAM , QUYNH GIANG",QUYNH GIANG PHAM


In [371]:
#spex1 = spex_list['V1'].dropna().tolist()
#spex2 = spex_list['V2'].dropna().tolist()
#spex3 = spex_list['V3'].dropna().tolist()
#spex4 = spex_list['V4'].dropna().tolist()
#spex5 = spex_list['V5'].dropna().tolist()


In [435]:
for1 = foreigners['V1'].dropna().tolist()
for2 = foreigners['V2'].dropna().tolist()
for3 = foreigners['V3'].dropna().tolist()
for4 = foreigners['V4'].dropna().tolist()


In [373]:
#spex_athletes=spex1+spex2+spex3+spex4+spex5

In [436]:
foreign_list = for1+for2+for3+for4 

In [437]:
#spex_athletes

In [438]:
foreign_list

['Aaryan Greuter Christoph',
 'Akahodani Takayuki',
 'Apondar Audric',
 'Brooks  Ruby',
 'Brouwer Cees',
 'CARLESATER ARMAAN',
 'Chary Mathangi ',
 'Cinthikael Angel',
 'Comia Tsang Hannah',
 'Comia Tsang Mhandy',
 'Donnelly Jordan ',
 'Dorai Trinity Sharilyn',
 'Freeman Alexander',
 'Gaume Constance',
 'Gregorio John Alexander',
 'HAGIWARA RYOTA ',
 'Indarto Gerard',
 'Joy Matthew',
 'JULURI ADITYA',
 'Kiet Tran',
 'Koduru Kalyani',
 'Kuchenbuch Natalya',
 'KUIJPERS KUIJPERS AYKO ELLIN',
 'Kumar Joseph Suraj',
 'KUPPUSAMY PRAKSHEETA',
 'Kusumo Naia',
 'Kuwalekar  Spruha ',
 'Lahey Kezia Stephanie ',
 'Liz Jerry',
 'Marican Raees Marican Bin Ayoob Marican',
 'MARIE BRUNO DE FERRIERES DE SAUVEBOEUF LOUIS ',
 'Maslov Timofei',
 'Masrezwan Maeva',
 'MENDOZA NICCOLO',
 'Mennella Leonardo',
 'Meyers Nikita Mae Jiny-Yu',
 'Mishra Sunay',
 'Mo Thant Cin',
 'Moon Daniel Youngjo',
 'OCAMPO ANIAG DWAYNE MICHAEL OCAMPO ANIAG',
 'Ortega Rafael Pedro',
 'Owen Elia',
 'owl emma',
 'Ozbudak Deniz',
 

In [439]:
#spex_athletes_casefold=[s.casefold() for s in spex_athletes]

In [440]:
foreign_list_casefold=[s.casefold() for s in foreign_list]

In [442]:
#spex_athletes_casefold

In [443]:
foreign_list_casefold

['aaryan greuter christoph',
 'akahodani takayuki',
 'apondar audric',
 'brooks  ruby',
 'brouwer cees',
 'carlesater armaan',
 'chary mathangi ',
 'cinthikael angel',
 'comia tsang hannah',
 'comia tsang mhandy',
 'donnelly jordan ',
 'dorai trinity sharilyn',
 'freeman alexander',
 'gaume constance',
 'gregorio john alexander',
 'hagiwara ryota ',
 'indarto gerard',
 'joy matthew',
 'juluri aditya',
 'kiet tran',
 'koduru kalyani',
 'kuchenbuch natalya',
 'kuijpers kuijpers ayko ellin',
 'kumar joseph suraj',
 'kuppusamy praksheeta',
 'kusumo naia',
 'kuwalekar  spruha ',
 'lahey kezia stephanie ',
 'liz jerry',
 'marican raees marican bin ayoob marican',
 'marie bruno de ferrieres de sauveboeuf louis ',
 'maslov timofei',
 'masrezwan maeva',
 'mendoza niccolo',
 'mennella leonardo',
 'meyers nikita mae jiny-yu',
 'mishra sunay',
 'mo thant cin',
 'moon daniel youngjo',
 'ocampo aniag dwayne michael ocampo aniag',
 'ortega rafael pedro',
 'owen elia',
 'owl emma',
 'ozbudak deniz',
 

In [444]:
# Exclusion list is foreigners + spex carded 

#exclusions = foreign_list_casefold + spex_athletes_casefold

exclusions = foreign_list_casefold

In [445]:
# Apply Rule 4 above...if an athlete is in more than one event, choose the best performing one

top_performers=rslt_df.sort_values(['NAME','PERF_SCALAR'],ascending=False).groupby('NAME').head(1) # Choose top performing event per NAME


In [446]:
top_performers

Unnamed: 0,index_x,NAME,RESULT,AGE,COMPETITION_RANK,EVENT_x,DOB,COUNTRY,CATEGORY_EVENT,GENDER,...,EVENT_y,Metric,2pc,35pc,5pc,RESULT_CONV,Delta2,Delta35,Delta5,PERF_SCALAR
8353,11670,{},12.99,8.0,2,Boys 4x100 Meter Relay 15-16,2015-01-01 00:00:00.000,,Relay,Male,...,4 x 100m relay,39.36,40.1472,40.73760,41.3280,12.99,27.1572,27.74760,28.3380,71.996951
12480,17949,"{9: 'Ng, Caitlin Shan Wen', 438: 'Seow, Kyra',...",22.17,0.0,1,Girls 13-14 4x100 Meter Relay U15,,,Relay,Female,...,4 x 100m relay,44.58,45.4716,46.14030,46.8090,22.17,23.3016,23.97030,24.6390,55.269179
9267,13141,"{995: 'Lee, Asher', 991: 'Yeo, Chee Hean Phili...",29.89,0.0,2,Boys 9-12 4x400 Meter Relay U13,,,Relay,Male,...,4 x 400m relay,188.82,192.5964,195.42870,198.2610,29.89,162.7064,165.53870,168.3710,89.170109
9588,13486,"{994: 'De Ming, Gilbert Lee', 988: 'Lee, Jerem...",38.89,0.0,14,Boys 10-12 4x100 Meter Relay U13,,,Relay,Male,...,4 x 100m relay,39.36,40.1472,40.73760,41.3280,38.89,1.2572,1.84760,2.4380,6.194106
12424,17876,"{991: 'Yeo, Chee Hean Philip', 992: 'Yeo, Chee...",12.92,0.0,16,Boys 10-12 4x100 Meter Relay U13,,,Relay,Male,...,4 x 100m relay,39.36,40.1472,40.73760,41.3280,12.92,27.2272,27.81760,28.4080,72.174797
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10975,15886,"., Khairulnazim",14.72,17.0,4,Men 400 Meter Dash Open,2006-01-27,,Sprint,Male,...,400m,46.63,47.5626,48.26205,48.9615,14.72,32.8426,33.54205,34.2415,73.432340
9918,13873,"., Kaarthika",12.53,27.0,9,Women 400 Meter Dash Women Junior,1996-06-13,,Sprint,Female,...,400m,53.84,54.9168,55.72440,56.5320,12.53,42.3868,43.19440,44.0020,81.727340
10701,15018,"., Jayashree",1:14.05,15.0,36,Girls Long Jump B Div,2008-01-29 00:00:00.000,,Jump,Female,...,Long jump,6.02,5.8996,5.80930,5.7190,10.97,5.0704,5.16070,5.2510,87.225914
12077,17484,"., Cheung Zheng",47.61,14.0,20,Boys 11-14 400 Meter Dash U15,2009-02-19,,Sprint,Male,...,400m,46.63,47.5626,48.26205,48.9615,47.61,-0.0474,0.65205,1.3515,2.898349


In [447]:
top_performers.to_csv('top_checkpoint.csv', sep=',', encoding='utf-8-sig', index=False)

In [448]:
excluded_list = top_performers.loc[~rslt_df['NAME'].str.casefold().isin(exclusions)]  # ~ means NOT IN. DROP spex carded athletes

In [449]:
#spexed_list=top_performers

In [450]:
excluded_list.sort_values(['MAPPED_EVENT', 'GENDER', 'PERF_SCALAR'], ascending=[True, True, False], inplace=True)
excluded_list['overall_rank'] = 1
excluded_list['overall_rank'] = excluded_list.groupby(['MAPPED_EVENT', 'GENDER'])['overall_rank'].cumsum()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  excluded_list.sort_values(['MAPPED_EVENT', 'GENDER', 'PERF_SCALAR'], ascending=[True, True, False], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  excluded_list['overall_rank'] = 1
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  excluded_list['overall_rank'] = excluded_list.groupby(['MAPPED_EVENT', 'GENDER'])['overall_rank'].cumsum()


In [451]:
#spexed_list=spexed_list[(((spexed_list['EVENT']=='400m')&(spexed_list['overall_rank']<7)))|(((spexed_list['EVENT']=='100m')&(spexed_list['overall_rank']<7)))]

In [452]:
#Apply OCTC selection rule: max 6 for 100m/400m and max 3 for all other events

excluded_list = excluded_list[(((excluded_list['MAPPED_EVENT']=='400m')|(excluded_list['MAPPED_EVENT']=='100m'))&(excluded_list['overall_rank']<7))|(~((excluded_list['MAPPED_EVENT']=='400m')|(excluded_list['MAPPED_EVENT']=='100m'))&(excluded_list['overall_rank']<4))]


In [453]:
excluded_list

Unnamed: 0,index_x,NAME,RESULT,AGE,COMPETITION_RANK,EVENT_x,DOB,COUNTRY,CATEGORY_EVENT,GENDER,...,Metric,2pc,35pc,5pc,RESULT_CONV,Delta2,Delta35,Delta5,PERF_SCALAR,overall_rank
8135,11374,"Tan, Bernice",2:16.97,23.0,3,Women 10000 Meter Run Open,2000-11-16,,Long,Female,...,2131.03,2173.6506,2205.61605,2237.5815,136.97,2036.6806,2068.64605,2100.6115,98.572592,1
8137,11380,"NicAmhlaoibh, Maire",2:16.97,35.0,2,Women 10000 Meter Run Open,1988-02-14,,Long,Female,...,2131.03,2173.6506,2205.61605,2237.5815,136.97,2036.6806,2068.64605,2100.6115,98.572592,2
8134,11372,"Ng, Yew Cheo",2:16.97,37.0,4,Women 10000 Meter Run Open,1986-11-30,,Long,Female,...,2131.03,2173.6506,2205.61605,2237.5815,136.97,2036.6806,2068.64605,2100.6115,98.572592,3
9739,13637,"PHAGAMI, KHUM BAHADUR",20.34m,39.0,2,Men 10000 Meter Run Men Senior A,1984-08-09,,Long,Male,...,1885.55,1923.2610,1951.54425,1979.8275,144.29,1778.9710,1807.25425,1835.5375,97.347591,1
9734,13632,"Mohd, Jamian",35.31m,38.0,6,Men 10000 Meter Run Men Senior A,1985-04-29,,Long,Male,...,1885.55,1923.2610,1951.54425,1979.8275,144.29,1778.9710,1807.25425,1835.5375,97.347591,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3249,3544,SOLANA LEANN REINKIRSTEN CANDA,00:22.7,0.0,9.0,Triple Jump,,,Jump,Female,...,13.46,13.1908,12.98890,12.7870,1150.60,1137.4092,1137.61110,1137.8130,8453.291233,2
3260,3555,SHERYL TOH PEIXUAN,00:23.4,0.0,6.0,Triple Jump,,,Jump,Female,...,13.46,13.1908,12.98890,12.7870,1150.60,1137.4092,1137.61110,1137.8130,8453.291233,3
3257,3552,RAO TIANYU,00:28.7,0.0,12.0,Triple Jump,,,Jump,Male,...,15.70,15.3860,15.15050,14.9150,1150.60,1135.2140,1135.44950,1135.6850,7233.662420,1
3246,3541,NG JUN JIE,00:15.7,0.0,11.0,Triple Jump,,,Jump,Male,...,15.70,15.3860,15.15050,14.9150,1150.60,1135.2140,1135.44950,1135.6850,7233.662420,2


In [454]:
excluded_list.to_csv('octc_all_events.csv', sep=',', encoding='utf-8-sig', index=False)


In [None]:
# 1. If top athlete>30 (and the only one >30), there are already 6 in list, and next one is already <30 then it means do nothing? 
# 2. If top athlete>30 and is the sole pick.  Next athlete is far beyond 5% band.  Do we add?
# 3. If top athlete>30 and there are less than 6 within 5% band of SEAG benchmark. Do we add that one more who is beyond 5%?
# 4. At least one per gender.  The only pick is far beyond 5%.  Do we add?

# Marinda's request for 6 and 8 place finish analysis

In [241]:
import pandas_gbq
from google.oauth2 import service_account


credentials = service_account.Credentials.from_service_account_file(
    '/Users/veesheenyuen/Desktop/DataScience/Keys/saa-analytics-7c8937b70609.json',
)

sql="""
SELECT NAME, RESULT, RANK, EVENT, CATEGORY_EVENT, GENDER, COMPETITION, STAGE
FROM `saa-analytics.results.saa_full`
WHERE STAGE='Final' AND COMPETITION='SEA Games' AND (RANK='3' OR RANK='6' OR RANK='8')
"""

SEAG = pandas_gbq.read_gbq(sql, project_id="saa-analytics", credentials=credentials)



Downloading: 100%|[32m███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████[0m|[0m


In [246]:
SEAG.tail(60)

Unnamed: 0,NAME,RESULT,RANK,EVENT,CATEGORY_EVENT,GENDER,COMPETITION,STAGE
52,Kobsit Sittichai,2.07 m,6,High jump,Jump,Male,SEA Games,Final
53,Farell Glen Felix Jerus,2.15 m,3,High jump,Jump,Male,SEA Games,Final
54,Michelle Sng Suat Li,1.73,3,High jump,Jump,Female,SEA Games,Final
55,Yan Chan,1.95 m,8,High jump,Jump,Male,SEA Games,Final
56,Suwandi Wijaya,7.47,6,Long jump,Jump,Male,SEA Games,Final
57,Nurul Ashikin Abas,5.67,6,Long jump,Jump,Female,SEA Games,Final
58,Bùi Thị Loan,6.02,3,Long jump,Jump,Female,SEA Games,Final
59,Pok Pisey,4.76,8,Long jump,Jump,Female,SEA Games,Final
60,Sapwaturrahman Sapwaturrahman,7.62,3,Long jump,Jump,Male,SEA Games,Final
61,John Marvin Aragon Rafols,7.19,8,Long jump,Jump,Male,SEA Games,Final


In [247]:
for i in range(len(SEAG)):
        
    rowIndex = SEAG.index[i]

    input_string=SEAG.iloc[rowIndex,3]  # event is column index 3
    
    metric=SEAG.iloc[rowIndex,1] # result is column index 1
    
    if (metric==None or metric=='NH' or metric=='NM'):  # skip those non-numeric values
        continue
        
   # elif metric=='NH':
    #    continue
        
    print(i, input_string, metric)
        
    out = convert_time(i, input_string, metric)
     
    SEAG.loc[rowIndex, 'Metric'] = out

0 1500m     03:59.40
1 1500m     04:04.3
2 1500m     04:05.4
3 Decathlon 6891
4 100m 10.78
5 100m 11.75
6 100m 11.96
7 100m 10.443
8 200m 24.09
9 200m 23.6
11 200m 21.02
12 200m 21.58
13 400m 46.63
14 400m 48.26
15 400m 57.54
16 400m 1:01.20
17 400m 53.84
18 800m 1:57.98
19 800m 1:53.86
20 800m 2:15.77
21 800m 2:18.25
22 800m 2:09.15
23 800m 1:55.39
24 1500m 4:26.33
25 1500m 4:38.64
26 1500m 5:00.090
27 5000m 17:13.63
28 5000m 18:41.76
29 5000m 14:43.45
30 5000m 15:01.79
31 5000m 18:02.52
32 5000m 15:09.65
33 10000m 33:00.23
34 10000m 32:36.32
35 10000m 38:11.04
36 10000m 37:29.58
37 10000m 35:31.03
38 10000m 31:25.55
39 Marathon 2:35:49
40 Marathon 3:17:36
41 Marathon 3:46:44
42 Marathon 2:52:41
43 Marathon 2:50:27
44 Marathon 2:41:36
45 Shot put 11.69
46 Shot put 12.12
47 Shot put 14.44
48 Shot put 14.65
49 Shot put 17.3
50 Shot put 16.06
52 High jump 2.07 m
53 High jump 2.15 m
54 High jump 1.73
55 High jump 1.95 m
56 Long jump 7.47
57 Long jump 5.67
58 Long jump 6.02
59 Long jump 4.

In [248]:
SEAG

Unnamed: 0,NAME,RESULT,RANK,EVENT,CATEGORY_EVENT,GENDER,COMPETITION,STAGE,Metric
0,Wahyudi Putra,03:59.40,3,1500m,Mid,Male,SEA Games,Final,239.40
1,Edwin GIRON,04:04.3,6,1500m,Mid,Male,SEA Games,Final,244.30
2,Van Dung GIANG,04:05.4,8,1500m,Mid,Male,SEA Games,Final,245.40
3,Aries TOLEDO,6891,3,Decathlon,Decathlon,Male,SEA Games,Final,6891.00
4,Joshua Hanwei Chua,10.78,6,100m,Sprint,Male,SEA Games,Final,10.78
...,...,...,...,...,...,...,...,...,...
107,"Sukanya Janchaona, Benny Nontanam, Sasipim S...",3:39.29,3,4 x 400m relay,Relay,Female,SEA Games,Final,219.29
108,Nguyễn Thị Huong,11:00.85,3,3000m steeplechase,Steeple,Female,SEA Games,Final,660.85
109,Pandu Sukarya,8:55.05,3,3000m steeplechase,Steeple,Male,SEA Games,Final,535.05
110,Ri Udom,10:36.06,8,3000m steeplechase,Steeple,Male,SEA Games,Final,636.06


In [249]:
SEAG.to_csv('check_variation.csv', encoding='utf-8')

In [250]:
comps = SEAG[SEAG['RANK']=='3']

In [251]:
comps

Unnamed: 0,NAME,RESULT,RANK,EVENT,CATEGORY_EVENT,GENDER,COMPETITION,STAGE,Metric
0,Wahyudi Putra,03:59.40,3,1500m,Mid,Male,SEA Games,Final,239.4
3,Aries TOLEDO,6891,3,Decathlon,Decathlon,Male,SEA Games,Final,6891.0
5,Trần Thị Nhi Yến,11.75,3,100m,Sprint,Female,SEA Games,Final,11.75
7,Muhammad Haiqal Hanafi,10.443,3,100m,Sprint,Male,SEA Games,Final,10.443
9,Zaidatul Husniah Zulkifli,23.6,3,200m,Sprint,Female,SEA Games,Final,23.6
11,Lalu Muhammad Zohri,21.02,3,200m,Sprint,Male,SEA Games,Final,21.02
13,Frederick Ramirez,46.63,3,400m,Sprint,Male,SEA Games,Final,46.63
17,Nguyễn Thị Hằng,53.84,3,400m,Sprint,Female,SEA Games,Final,53.84
19,Wan Muhammad Fazri Wan Zahari,1:53.86,3,800m,Mid,Male,SEA Games,Final,113.86
22,Goh Chui Ling,2:09.15,3,800m,Mid,Female,SEA Games,Final,129.15


In [252]:
new_comps = comps[['EVENT', 'GENDER', 'Metric']]

In [253]:
#comps=SEAG.groupby(['EVENT', 'GENDER'])['Metric'].min()


In [254]:
df2 = SEAG.merge(new_comps, on=['EVENT', 'GENDER'], how='left')


In [255]:
df2

Unnamed: 0,NAME,RESULT,RANK,EVENT,CATEGORY_EVENT,GENDER,COMPETITION,STAGE,Metric_x,Metric_y
0,Wahyudi Putra,03:59.40,3,1500m,Mid,Male,SEA Games,Final,239.40,239.400
1,Edwin GIRON,04:04.3,6,1500m,Mid,Male,SEA Games,Final,244.30,239.400
2,Van Dung GIANG,04:05.4,8,1500m,Mid,Male,SEA Games,Final,245.40,239.400
3,Aries TOLEDO,6891,3,Decathlon,Decathlon,Male,SEA Games,Final,6891.00,6891.000
4,Joshua Hanwei Chua,10.78,6,100m,Sprint,Male,SEA Games,Final,10.78,10.443
...,...,...,...,...,...,...,...,...,...,...
107,"Sukanya Janchaona, Benny Nontanam, Sasipim S...",3:39.29,3,4 x 400m relay,Relay,Female,SEA Games,Final,219.29,219.290
108,Nguyễn Thị Huong,11:00.85,3,3000m steeplechase,Steeple,Female,SEA Games,Final,660.85,660.850
109,Pandu Sukarya,8:55.05,3,3000m steeplechase,Steeple,Male,SEA Games,Final,535.05,535.050
110,Ri Udom,10:36.06,8,3000m steeplechase,Steeple,Male,SEA Games,Final,636.06,535.050


In [256]:
df2.to_csv('check_metric.csv', encoding='utf-8')

In [257]:
df2['% VARIATION']=(df2['Metric_x']/df2['Metric_y']*100-100)

In [258]:
df2

Unnamed: 0,NAME,RESULT,RANK,EVENT,CATEGORY_EVENT,GENDER,COMPETITION,STAGE,Metric_x,Metric_y,% VARIATION
0,Wahyudi Putra,03:59.40,3,1500m,Mid,Male,SEA Games,Final,239.40,239.400,0.000000
1,Edwin GIRON,04:04.3,6,1500m,Mid,Male,SEA Games,Final,244.30,239.400,2.046784
2,Van Dung GIANG,04:05.4,8,1500m,Mid,Male,SEA Games,Final,245.40,239.400,2.506266
3,Aries TOLEDO,6891,3,Decathlon,Decathlon,Male,SEA Games,Final,6891.00,6891.000,0.000000
4,Joshua Hanwei Chua,10.78,6,100m,Sprint,Male,SEA Games,Final,10.78,10.443,3.227042
...,...,...,...,...,...,...,...,...,...,...,...
107,"Sukanya Janchaona, Benny Nontanam, Sasipim S...",3:39.29,3,4 x 400m relay,Relay,Female,SEA Games,Final,219.29,219.290,0.000000
108,Nguyễn Thị Huong,11:00.85,3,3000m steeplechase,Steeple,Female,SEA Games,Final,660.85,660.850,0.000000
109,Pandu Sukarya,8:55.05,3,3000m steeplechase,Steeple,Male,SEA Games,Final,535.05,535.050,0.000000
110,Ri Udom,10:36.06,8,3000m steeplechase,Steeple,Male,SEA Games,Final,636.06,535.050,18.878609


In [259]:
df2.to_csv('SEAG_variation.csv', sep=',', encoding='utf-8-sig', index=False)
