# Let's do something with data from the Eurovision Song Contest

Because why not

In [11]:
from bs4 import BeautifulSoup
import pandas as pd
import requests
import re

In [9]:
# preliminary test – are we getting data right away?
response = requests.get('https://eurovision.tv/event/lisbon-2018/grand-final'
                        '/participants').text

soup = BeautifulSoup(response, 'lxml')

table_rows = soup.select('.row__TableRow-an9049-0')

for row in table_rows:
  print(row.text)
  

R/OCountryArtistsongPtsPointsRankRanking
01UkraineMELOVINUnder The Ladder13017
02SpainAmaia y AlfredTu Canción6123
03SloveniaLea SirkHvala, ne!6422
04LithuaniaIeva ZasimauskaitėWhen We're Old18112
05AustriaCesár SampsonNobody But You3423
06EstoniaElina NechayevaLa Forza2458
07NorwayAlexander RybakThat's How You Write A Song14415
08PortugalCláudia PascoalO Jardim3926
09United KingdomSuRieStorm4824
10SerbiaSanja Ilić & BalkanikaNova Deca11319
11GermanyMichael SchulteYou Let Me Walk Alone3404
12AlbaniaEugent BushpepaMall18411
13FranceMadame MonsieurMercy17313
14Czech RepublicMikolas JosefLie To Me2816
15DenmarkRasmussenHigher Ground2269
16AustraliaJessica MauboyWe Got Love9920
17FinlandSaara AaltoMonsters4625
18BulgariaEQUINOXBones16614
19MoldovaDoReDoSMy Lucky Day20910
20SwedenBenjamin IngrossoDance You Off2747
21HungaryAWSViszlát Nyár9321
22IsraelNettaTOY5291
23The NetherlandsWaylonOutlaw In 'Em12118
24IrelandRyan O'ShaughnessyTogether13616
25CyprusEleni FoureiraFuego4362
26ItalyErmal M

In [10]:
# Cool, so this works, which is nice – since that means that the data is 
# actually prerendered on the server and not generated by javascript in the 
# browser – although the way the website behaves certainly suggests so.

## Contests

In [None]:
# Get all the contests from the entry page
contests_response = requests.get('https://eurovision.tv/events').text
contests_soup = BeautifulSoup(contests_response, 'lxml')


In [18]:

contests_elements = contests_soup.select('.result__Wrapper-s9upcfm-1')

contests = []

for contest_element in contests_elements:
  name = ' '.join(contest_element.select_one(
    '.text__Text-x6y36f-0').stripped_strings)
  location = re.search(r'^(.*) \d{4}', name).group(1)
  year = re.search(r'\d{4}', name).group(0)
  
  contest = {
    'url': contest_element['href'],
    'name': name,
    # Let's get the image, too, just for kicks
    'img_src': contest_element.select_one('img')['src'],
    'year': year,
    'location': location
  }
  
  contests.append(contest)
  
contests_df = pd.DataFrame(contests)


In [19]:
contests_df

Unnamed: 0,img_src,location,name,url,year
0,https://apex.eurovision.tv/image/52219b044b79f...,Lisbon,Lisbon 2018,/event/lisbon-2018,2018
1,https://apex.eurovision.tv/image/7b0be5f440549...,Kyiv,Kyiv 2017,/event/kyiv-2017,2017
2,https://apex.eurovision.tv/image/9ef19ed95f0ce...,Stockholm,Stockholm 2016,/event/stockholm-2016,2016
3,https://apex.eurovision.tv/image/6b2a4289be78d...,Vienna,Vienna 2015,/event/vienna-2015,2015
4,https://apex.eurovision.tv/image/a7c9ca682b37e...,Copenhagen,Copenhagen 2014,/event/copenhagen-2014,2014
5,https://apex.eurovision.tv/image/acf1012f98180...,Malmö,Malmö 2013,/event/malmo-2013,2013
6,https://apex.eurovision.tv/image/de26c67acd021...,Baku,Baku 2012,/event/baku-2012,2012
7,https://apex.eurovision.tv/image/06cbdc0fccf0e...,Düsseldorf,Düsseldorf 2011,/event/dusseldorf-2011,2011
8,https://apex.eurovision.tv/image/2ef4dd5de1f60...,Oslo,Oslo 2010,/event/oslo-2010,2010
9,https://apex.eurovision.tv/image/a1834353e3c7e...,Moscow,Moscow 2009,/event/moscow-2009,2009


## Finalists

In [20]:
BASE_URL = 'https://eurovision.tv'

# Challenges: at some point during the contest, the format changed from just 
# having a final to having two semi-finals and a grand final – unfortunately,
#  this also means that the URL structure changes there. This certainly does 
# not make me that happy.



In [24]:
def get_finalists(contest_url_fragement):
  full_url = BASE_URL + contest_url_fragement + '/final'
  
  response = requests.get(full_url)
  
  # Whoops, nothing to see at "/final". Let's try "/grand-final" instead
  if response.status_code > 200 and response.status_code < 500:
    full_url = BASE_URL + contest_url_fragement + '/grand-final'
    response = requests.get(full_url)
    
  
  contest_soup = BeautifulSoup(response.text, 'lxml')
  
  finalists_rows = contest_soup.select('.body__TableBody-s18it6kf-0 tr')
  
  for finalist in finalists_rows:
    print(finalist.text)
    

In [25]:
get_finalists('/event/bergen-1986')

01LuxembourgSherisse LaurenceL'amour De Ma Vie1173
02YugoslaviaDoris DragovicZeljo Moja4911
03FranceCocktail ChicEuropéennes1317
04NorwayKetil StokkanRomeo4412
05United KingdomRyderRunner In The Night727
06IcelandIcyGleðibankinn1916
07The NetherlandsFrizzle SizzleAlles Heeft Ritme4013
08TurkeyKlips ve OnlarHalley539
09SpainCadillacValentino5110
10SwitzerlandDaniela SimonsPas Pour Moi1402
11IsraelMoti Galadi and Sarai TzurielYavoh Yom719
12IrelandLuv BugYou Can Count On Me964
13BelgiumSandra KimJ'aime La Vie1761
14GermanyIngrid PetersÜber Die Brücke Geh'n628
15CyprusElpidaTora Zo420
16AustriaTimna BrauerDie Zeit Ist Einsam1218
17SwedenLasse Holm and Monica TörnellE' De' Det Här Du Kallar Kärlek785
18DenmarkLise Haavik and TraxDu Er Fuld Af Løgn776
19FinlandKari KuivalainenPäivä Kahden Ihmisen2215
20PortugalDoraNão Sejas Mau Para Mim2814
