# Let's do something with data from the Eurovision Song Contest

Because why not

In [8]:
from bs4 import BeautifulSoup
import pandas as pd
import requests
import re
import uuid
import datetime

In [10]:
# CONSTANTS

BASE_URL = 'https://eurovision.tv'
TODAY = datetime.datetime.now().strftime('%Y%m%d')

In [4]:
# preliminary test – are we getting data right away?
response = requests.get('https://eurovision.tv/event/lisbon-2018/grand-final'
                        '/participants').text

soup = BeautifulSoup(response, 'lxml')

table_rows = soup.select('.row__TableRow-an9049-0')

for row in table_rows:
  print(row.text)
  

R/OCountryArtistsongPtsPointsRankRanking
01UkraineMELOVINUnder The Ladder13017
02SpainAmaia y AlfredTu Canción6123
03SloveniaLea SirkHvala, ne!6422
04LithuaniaIeva ZasimauskaitėWhen We're Old18112
05AustriaCesár SampsonNobody But You3423
06EstoniaElina NechayevaLa Forza2458
07NorwayAlexander RybakThat's How You Write A Song14415
08PortugalCláudia PascoalO Jardim3926
09United KingdomSuRieStorm4824
10SerbiaSanja Ilić & BalkanikaNova Deca11319
11GermanyMichael SchulteYou Let Me Walk Alone3404
12AlbaniaEugent BushpepaMall18411
13FranceMadame MonsieurMercy17313
14Czech RepublicMikolas JosefLie To Me2816
15DenmarkRasmussenHigher Ground2269
16AustraliaJessica MauboyWe Got Love9920
17FinlandSaara AaltoMonsters4625
18BulgariaEQUINOXBones16614
19MoldovaDoReDoSMy Lucky Day20910
20SwedenBenjamin IngrossoDance You Off2747
21HungaryAWSViszlát Nyár9321
22IsraelNettaTOY5291
23The NetherlandsWaylonOutlaw In 'Em12118
24IrelandRyan O'ShaughnessyTogether13616
25CyprusEleni FoureiraFuego4362
26ItalyErmal M

In [5]:
# Cool, so this works, which is nice – since that means that the data is 
# actually prerendered on the server and not generated by javascript in the 
# browser – although the way the website behaves certainly suggests so.

## Data Model

This should be the fully realised data model:

![Data Model](./super-extended-model.svg)

## Contests

In [4]:
# Get all the contests from the entry page
contests_response = requests.get('https://eurovision.tv/events').text
contests_soup = BeautifulSoup(contests_response, 'lxml')


In [5]:

contests_elements = contests_soup.select('.result__Wrapper-s9upcfm-1')

contests = []

for contest_element in contests_elements:
  name = ' '.join(contest_element.select_one(
    '.text__Text-x6y36f-0').stripped_strings)
  location = re.search(r'^(.*) \d{4}', name).group(1)
  year = re.search(r'\d{4}', name).group(0)
  
  contest = {
    'url': contest_element['href'],
    'name': name,
    # Let's get the image, too, just for kicks
    'img_src': contest_element.select_one('img')['src'],
    'year': year,
    'location': location,
    'id': uuid.uuid3(uuid.NAMESPACE_URL, BASE_URL + contest_element['href'])
  }
  
  contests.append(contest)
  
contests_df = pd.DataFrame(contests)



In [6]:
contests_df

Unnamed: 0,id,img_src,location,name,url,year
0,c7aae442-7c52-3a14-9bf5-2fb1880fbf58,https://apex.eurovision.tv/image/52219b044b79f...,Lisbon,Lisbon 2018,/event/lisbon-2018,2018
1,42e0141d-3b05-3776-ba39-ea69c28e8bdb,https://apex.eurovision.tv/image/7b0be5f440549...,Kyiv,Kyiv 2017,/event/kyiv-2017,2017
2,9b967659-2937-3ddc-97a8-7d4296ecdc2b,https://apex.eurovision.tv/image/9ef19ed95f0ce...,Stockholm,Stockholm 2016,/event/stockholm-2016,2016
3,dcd86fee-f06a-315a-abcf-372a8fba2e7f,https://apex.eurovision.tv/image/6b2a4289be78d...,Vienna,Vienna 2015,/event/vienna-2015,2015
4,65ff473b-91bc-335f-bed3-9052fc7cdeb5,https://apex.eurovision.tv/image/a7c9ca682b37e...,Copenhagen,Copenhagen 2014,/event/copenhagen-2014,2014
5,fc7505da-0220-34ba-82a1-3abc5e8b3e36,https://apex.eurovision.tv/image/acf1012f98180...,Malmö,Malmö 2013,/event/malmo-2013,2013
6,1e72efe5-71b9-3669-9b83-b0857549f15b,https://apex.eurovision.tv/image/de26c67acd021...,Baku,Baku 2012,/event/baku-2012,2012
7,888687e1-8da0-3d6c-ab5a-ded1f7e9ac21,https://apex.eurovision.tv/image/06cbdc0fccf0e...,Düsseldorf,Düsseldorf 2011,/event/dusseldorf-2011,2011
8,459089e7-dc8b-3af3-816c-e195d8a63533,https://apex.eurovision.tv/image/2ef4dd5de1f60...,Oslo,Oslo 2010,/event/oslo-2010,2010
9,fa0099a0-6a5e-3a2e-913f-5e19be93c801,https://apex.eurovision.tv/image/a1834353e3c7e...,Moscow,Moscow 2009,/event/moscow-2009,2009


In [12]:
# save to csv
contests_df.to_csv('project/contests-' + TODAY + '.csv', index=False)

## Finalists

In [9]:


# Challenges: at some point during the contest, the format changed from just 
# having a final to having two semi-finals and a grand final – unfortunately,
#  this also means that the URL structure changes there. This certainly does 
# not make me that happy.



In [14]:
def get_finalists(contest_url_fragement):
  full_url = BASE_URL + contest_url_fragement + '/final'

  response = requests.get(full_url)

  # Whoops, nothing to see at "/final". Let's try "/grand-final" instead
  if 200 < response.status_code < 500:
    full_url = BASE_URL + contest_url_fragement + '/grand-final'
    response = requests.get(full_url)

  contest_soup = BeautifulSoup(response.text, 'lxml')

  header_cells = contest_soup.select('table thead tr th '
                                     '.small-caps__SmallCaps-s1ooca2g-0')
  headers = [cell.text for cell in header_cells]

  finalists_rows = contest_soup.select('table tbody tr')

  finalists = []

  for finalist in finalists_rows:
    finalists_cells = finalist.select('td')
    entry = {
      'contest_id': uuid.uuid3(uuid.NAMESPACE_URL, BASE_URL + contest_url_fragement)
    }

    for i in range(len(finalist)):
      entry[headers[i]] = finalists_cells[i].text

    finalists.append(entry)

  return finalists


In [15]:
get_finalists('/event/bergen-1986')

[{'Artist': 'Sherisse Laurence',
  'Country': 'Luxembourg',
  'PtsPoints': '117',
  'R/O': '01',
  'RankRanking': '3',
  'contest_id': UUID('0ee2efac-f244-3b1d-894f-7d1ebfb873ec'),
  'song': "L'amour De Ma Vie"},
 {'Artist': 'Doris Dragovic',
  'Country': 'Yugoslavia',
  'PtsPoints': '49',
  'R/O': '02',
  'RankRanking': '11',
  'contest_id': UUID('0ee2efac-f244-3b1d-894f-7d1ebfb873ec'),
  'song': 'Zeljo Moja'},
 {'Artist': 'Cocktail Chic',
  'Country': 'France',
  'PtsPoints': '13',
  'R/O': '03',
  'RankRanking': '17',
  'contest_id': UUID('0ee2efac-f244-3b1d-894f-7d1ebfb873ec'),
  'song': 'Européennes'},
 {'Artist': 'Ketil Stokkan',
  'Country': 'Norway',
  'PtsPoints': '44',
  'R/O': '04',
  'RankRanking': '12',
  'contest_id': UUID('0ee2efac-f244-3b1d-894f-7d1ebfb873ec'),
  'song': 'Romeo'},
 {'Artist': 'Ryder',
  'Country': 'United Kingdom',
  'PtsPoints': '72',
  'R/O': '05',
  'RankRanking': '7',
  'contest_id': UUID('0ee2efac-f244-3b1d-894f-7d1ebfb873ec'),
  'song': 'Runner In

In [40]:
# Okay, I guess we're just going to loop over the url fragments and use that 
# to create a new dataframe with all the contest entries that reached the final

finalists_list = []

for url in contests_df['url']:
  finalists_list = finalists_list + get_finalists(url)

finalists_df = pd.DataFrame(finalists_list)
finalists_df

Unnamed: 0,Artist,Country,PtsPoints,R/O,RankRanking,contest_id,song
0,MELOVIN,Ukraine,130,01,17,/event/lisbon-2018,Under The Ladder
1,Amaia y Alfred,Spain,61,02,23,/event/lisbon-2018,Tu Canción
2,Lea Sirk,Slovenia,64,03,22,/event/lisbon-2018,"Hvala, ne!"
3,Ieva Zasimauskaitė,Lithuania,181,04,12,/event/lisbon-2018,When We're Old
4,Cesár Sampson,Austria,342,05,3,/event/lisbon-2018,Nobody But You
5,Elina Nechayeva,Estonia,245,06,8,/event/lisbon-2018,La Forza
6,Alexander Rybak,Norway,144,07,15,/event/lisbon-2018,That's How You Write A Song
7,Cláudia Pascoal,Portugal,39,08,26,/event/lisbon-2018,O Jardim
8,SuRie,United Kingdom,48,09,24,/event/lisbon-2018,Storm
9,Sanja Ilić & Balkanika,Serbia,113,10,19,/event/lisbon-2018,Nova Deca
