# Importing Libraries

In [1]:
import pandas as pd
from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut
from geopy.extra.rate_limiter import RateLimiter

# Tranforming the Data

In [2]:
df = pd.read_csv('./race.csv')

In [3]:
df.head()

Unnamed: 0,place,first,last,city,state,age,division,time
0,1,Daniel,Wilson,Tulsa,OK,35,M,8:23:01
1,2,Eric,Davis,Greenwood,IN,38,M,8:57:54
2,3,Stewart,Edwards,New Smyrna Beach,FL,43,M,9:24:35
3,4,Ron,Hammett,Montverde,FL,53,M,9:24:36
4,5,Seth,Cain,Geneva,FL,44,M,9:42:17


In [4]:
df['full_name'] = df['first'] + ' ' + df['last']

In [5]:
df['time'] = pd.to_timedelta(df['time'])

In [6]:
df['total_minutes'] = (df['time'].dt.total_seconds()/60).round().astype(int)

In [7]:
df.rename(columns={'division':'gender'}, inplace=True)

In [8]:
def get_lat_long(city, state):
    address = f'{city}, {state}'
    try:
        geolocator = Nominatim(user_agent='running', timeout=10) 
        location = geolocator.geocode(address)
        if location:
            return location.latitude, location.longitude
        else:
            return None, None
    except GeocoderTimedOut:
        return None, None

In [9]:
df['latitude'], df['longitude'] = zip(*df.apply(lambda x: get_lat_long(x['city'], x['state']), axis=1))

In [10]:
df['coordinates'] = df['latitude'].astype(str) + ', ' + df['longitude'].astype(str)

In [11]:
df.head()

Unnamed: 0,place,first,last,city,state,age,gender,time,full_name,total_minutes,latitude,longitude,coordinates
0,1,Daniel,Wilson,Tulsa,OK,35,M,0 days 08:23:01,Daniel Wilson,503,36.156312,-95.992752,"36.1563122, -95.9927516"
1,2,Eric,Davis,Greenwood,IN,38,M,0 days 08:57:54,Eric Davis,538,39.613699,-86.109543,"39.6136987, -86.1095429"
2,3,Stewart,Edwards,New Smyrna Beach,FL,43,M,0 days 09:24:35,Stewart Edwards,565,29.025813,-80.927127,"29.0258132, -80.9271271"
3,4,Ron,Hammett,Montverde,FL,53,M,0 days 09:24:36,Ron Hammett,565,28.600277,-81.673964,"28.6002769, -81.673964"
4,5,Seth,Cain,Geneva,FL,44,M,0 days 09:42:17,Seth Cain,582,28.739716,-81.115062,"28.7397163, -81.1150616"


# Exporting the Tranformed Data

In [12]:
df.to_csv('./race_transformed.csv', index=False)