In [52]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import country_converter as coco
from selenium import webdriver as driver
from geopy.geocoders import Nominatim
import folium

In [53]:
#scraping data from a website
df = pd.read_html('https://f1.fandom.com/wiki/Circuits')[0]
df = df[df["Season(s)"].str.contains("present") == True]
df = df[df["Location"].str.contains("Miami Gardens, FL")==False]
tracks = df.drop(columns=["Type", "Race(s)", "Season(s)", "Races held"])
tracks

Unnamed: 0,Circuit,Location,Country
4,Albert Park,Melbourne,AUS
7,Bahrain International Circuit,Sakhir,BHR
9,Baku City Circuit,Baku,AZE
10,Circuit de Barcelona-Catalunya,Montmeló,ESP
19,Circuit of the Americas,"Austin, TX",USA
26,Autodromo Enzo e Dino Ferrari,Imola,ITA
30,Circuit Gilles Villeneuve,"Montréal, Quebec",CAN
33,Autódromo Hermanos Rodríguez,Mexico City,MEX
35,Hungaroring,Mogyoród,HUN
46,Autódromo José Carlos Pace,São Paulo,BRA


In [54]:
#generating the country names from their codes using country_converter
tracks['Country Name'] = tracks.Country.apply(lambda x: coco.convert(names=x, to='name_short', not_found=None))
tracks

SIN not found in ISO3
MON not found in ISO3
UAE not found in ISO3
NED not found in ISO3


Unnamed: 0,Circuit,Location,Country,Country Name
4,Albert Park,Melbourne,AUS,Australia
7,Bahrain International Circuit,Sakhir,BHR,Bahrain
9,Baku City Circuit,Baku,AZE,Azerbaijan
10,Circuit de Barcelona-Catalunya,Montmeló,ESP,Spain
19,Circuit of the Americas,"Austin, TX",USA,United States
26,Autodromo Enzo e Dino Ferrari,Imola,ITA,Italy
30,Circuit Gilles Villeneuve,"Montréal, Quebec",CAN,Canada
33,Autódromo Hermanos Rodríguez,Mexico City,MEX,Mexico
35,Hungaroring,Mogyoród,HUN,Hungary
46,Autódromo José Carlos Pace,São Paulo,BRA,Brazil


In [55]:
#as the original codes were not in ISO3 format, some codes needed to be converted manually
df = tracks.replace({'Country Name':{'SIN': 'Singapore','MON': 'Monaco', 'UAE': 'United Arab Emirates', 'NED': 'Netherlands'}})
df

Unnamed: 0,Circuit,Location,Country,Country Name
4,Albert Park,Melbourne,AUS,Australia
7,Bahrain International Circuit,Sakhir,BHR,Bahrain
9,Baku City Circuit,Baku,AZE,Azerbaijan
10,Circuit de Barcelona-Catalunya,Montmeló,ESP,Spain
19,Circuit of the Americas,"Austin, TX",USA,United States
26,Autodromo Enzo e Dino Ferrari,Imola,ITA,Italy
30,Circuit Gilles Villeneuve,"Montréal, Quebec",CAN,Canada
33,Autódromo Hermanos Rodríguez,Mexico City,MEX,Mexico
35,Hungaroring,Mogyoród,HUN,Hungary
46,Autódromo José Carlos Pace,São Paulo,BRA,Brazil


In [56]:
#getting attendance data
data = pd.read_html('https://f1destinations.com/2017-f1-attendance-figures/')[0]
data = data[data["Race"].str.contains("TOTAL")==False]
data = data.drop(columns=["% Change"])

columns_titles = ["Race","2016 Attendance","2017 Attendance"]

data = data.reindex(columns=columns_titles)
data["Growth"] = (((data["2017 Attendance"] / data["2016 Attendance"]) - 1) * 100).astype(int)
data['Race'] = data['Race'].str.replace('\d+', '', regex=True).str.replace('.', '', regex=True).str.replace(' ', '', regex=True)
data

Unnamed: 0,Race,2016 Attendance,2017 Attendance,Growth
0,Canada,300000,360000,19
1,GreatBritain,350000,344500,-1
2,Mexico,339967,337043,0
3,Australia,271800,296600,9
4,Belgium,233730,265000,13
5,Singapore,219000,260000,18
6,USA,269889,258000,-4
7,Monaco,200000,200000,0
8,Hungary,176000,199000,13
9,AbuDhabi,195000,195000,0


In [57]:
join = pd.merge(df, data, left_on='Country Name', right_on='Race', how='left')
join

Unnamed: 0,Circuit,Location,Country,Country Name,Race,2016 Attendance,2017 Attendance,Growth
0,Albert Park,Melbourne,AUS,Australia,Australia,271800.0,296600.0,9.0
1,Bahrain International Circuit,Sakhir,BHR,Bahrain,Bahrain,92000.0,93000.0,1.0
2,Baku City Circuit,Baku,AZE,Azerbaijan,Azerbaijan,30000.0,71541.0,138.0
3,Circuit de Barcelona-Catalunya,Montmeló,ESP,Spain,Spain,165029.0,177984.0,7.0
4,Circuit of the Americas,"Austin, TX",USA,United States,,,,
5,Autodromo Enzo e Dino Ferrari,Imola,ITA,Italy,Italy,147500.0,185000.0,25.0
6,Circuit Gilles Villeneuve,"Montréal, Quebec",CAN,Canada,Canada,300000.0,360000.0,19.0
7,Autódromo Hermanos Rodríguez,Mexico City,MEX,Mexico,Mexico,339967.0,337043.0,0.0
8,Hungaroring,Mogyoród,HUN,Hungary,Hungary,176000.0,199000.0,13.0
9,Autódromo José Carlos Pace,São Paulo,BRA,Brazil,Brazil,136410.0,141218.0,3.0


In [58]:
geolocator = Nominatim(user_agent='test')
join['gcode'] = join['Location'].apply(geolocator.geocode)
join['lat'] = [g.latitude for g in join.gcode]
join['long'] = [g.longitude for g in join.gcode]
final = join.drop(columns=["gcode"])
final

Unnamed: 0,Circuit,Location,Country,Country Name,Race,2016 Attendance,2017 Attendance,Growth,lat,long
0,Albert Park,Melbourne,AUS,Australia,Australia,271800.0,296600.0,9.0,-37.814218,144.963161
1,Bahrain International Circuit,Sakhir,BHR,Bahrain,Bahrain,92000.0,93000.0,1.0,32.953941,65.540016
2,Baku City Circuit,Baku,AZE,Azerbaijan,Azerbaijan,30000.0,71541.0,138.0,40.375443,49.832675
3,Circuit de Barcelona-Catalunya,Montmeló,ESP,Spain,Spain,165029.0,177984.0,7.0,41.551519,2.248081
4,Circuit of the Americas,"Austin, TX",USA,United States,,,,,30.271129,-97.7437
5,Autodromo Enzo e Dino Ferrari,Imola,ITA,Italy,Italy,147500.0,185000.0,25.0,44.353515,11.714123
6,Circuit Gilles Villeneuve,"Montréal, Quebec",CAN,Canada,Canada,300000.0,360000.0,19.0,45.503182,-73.569806
7,Autódromo Hermanos Rodríguez,Mexico City,MEX,Mexico,Mexico,339967.0,337043.0,0.0,19.43263,-99.133178
8,Hungaroring,Mogyoród,HUN,Hungary,Hungary,176000.0,199000.0,13.0,47.603742,19.247409
9,Autódromo José Carlos Pace,São Paulo,BRA,Brazil,Brazil,136410.0,141218.0,3.0,-23.550651,-46.633382


In [73]:
#building the map
def find_color(growth):
    if (growth > 20):
        return 'red'
    if (growth > 10):
        return 'orange'
    else:
        return 'yellow'

map_clusters = folium.Map(zoom_start=4)

for lat, long, att, growth in zip(final['lat'], final['long'], final['2017 Attendance'], final['Growth']):
    folium.CircleMarker(
    [lat, long],
    color=find_color(growth),
    weight=10,
    fill_opacity = 0.6,
    radius = att/2000000).add_to(map_clusters)
    
map_clusters

In [None]:
#finding the number of car dealerships in the circuit areas