In [None]:
import gspread
from oauth2client.service_account import ServiceAccountCredentials
 
# use creds to create a client to interact with the Google Drive API
#scope = ['https://spreadsheets.google.com/feeds']
scope = ['https://spreadsheets.google.com/feeds',
'https://www.googleapis.com/auth/drive']

creds = ServiceAccountCredentials.from_json_keyfile_name('credenciales.json', scope)
client = gspread.authorize(creds)
 
# Find a workbook by name and open the first sheet
# Make sure you use the right name here.
doc = client.open("Prode AA Rusia 2018")
 
found=False
for sheet in doc.worksheets():
    if sheet.title == "Goles":
        print("Found it!")
        found=True
        break

In [1]:
from selenium import webdriver
from time import sleep

from datetime import datetime, timedelta
from geopy import geocoders
from tzwhere import tzwhere
import pytz

tz = tzwhere.tzwhere()

tz_arg = pytz.timezone("America/Argentina/Buenos_Aires")
# tz_rus = pytz.timezone("Europe/Volgograd")
tz_rus = {
    'Moscow': 'Europe/Moscow',
    'Ekaterinburg': 'Asia/Yekaterinburg',
    'St. Petersburg': 'Europe/Moscow',
    'Sochi': 'Europe/Moscow',
    'Kazan': 'Europe/Moscow',
    'Saransk': 'Europe/Moscow',
    'Kaliningrad': 'Europe/Kaliningrad',
    'Samara': 'Europe/Samara',
    'Rostov-On-Don': 'Europe/Moscow',
    'Nizhny Novgorod': 'Europe/Moscow',
    'Volgograd': 'Europe/Volgograd' }

geocoder = geocoders.GoogleV3()  # Para buscar si es que me falta un timezone

In [2]:
d = webdriver.Chrome()
sleep(2)
d.maximize_window()

In [None]:
class Partido:
    def __init__(self, html):
        
        self.link = html.get_attribute('href')
        self.pid = int(self.link.split('/')[-2:-1][0])
        
        self.grupo = html.find_element_by_class_name("fi__info__group").text
        self.equipos = [x.text for x in html.find_elements_by_class_name("fi-t__nText ")]
        self.ciudad = html.find_element_by_class_name("fi__info__venue").text
        
        # Paso la ciudad a timezone
        if not self.ciudad in tz_rus:
            sleep(2)  # Para no saturar la API
            place, (lat, lng) = geocoder.geocode(self.ciudad)
            tz_rus[self.ciudad] = tz.tzNameAt(lat, lng)
            
        self.timezone = pytz.timezone(tz_rus[self.ciudad])
        # print(self.ciudad, self.timezone)
        
        fecha_aux = html.find_element_by_class_name("fi-mu__info__datetime").text
        self.fecha_local = self.timezone.localize(datetime.strptime(fecha_aux, "%d %b %Y - %H:%M Local time"))
        self.fecha = self.fecha_local.astimezone(tz_arg)
        # print(self.fecha_local, self.fecha)
        
        # No dependo de que actualicen fi-s__scoreText
        if self.fecha + timedelta(minutes=90) <= tz_arg.localize(datetime.now()):
            self.pendiente = False
            try:
                self.marcador = [int(x) for x in html.find_element_by_class_name("fi-s__scoreText").text.split('-')]
            except:
                self.marcador = [ -1, -1 ]
        else:
            self.pendiente = True
            
    def actualizarGoles(self):
        d.get(self.link)
        sleep(5)
        
        self.goles = { self.equipos[0]: [], self.equipos[1]: [] }
        for lista in d.find_elements_by_class_name("fi-mh__scorers__home"):
            for html in lista.find_elements_by_class_name("fi-mh__scorer"):
                gol = Gol(html, self.pid)
                self.goles[self.equipos[0]].append(gol)
                contarGoles(gol)  # De paso lo cuento para el torneo
        for lista in d.find_elements_by_class_name("fi-mh__scorers__away"):
            for html in lista.find_elements_by_class_name("fi-mh__scorer"):
                gol = Gol(html, self.pid)
                self.goles[self.equipos[1]].append(gol)
                contarGoles(gol)  # De paso lo cuento para el torneo
    
    def __repr__(self):
        if self.pendiente:
            return "%s [%s] %s - %s" % (self.fecha, self.grupo,
                                          self.equipos[0], self.equipos[1])
        else:
            return "%s [%s] %s %d - %s %d" % (self.fecha, self.grupo,
                                          self.equipos[0], self.marcador[0],
                                          self.equipos[1], self.marcador[1])

In [None]:
class Gol:
    def __init__(self, html, pid):
        self.pid = pid
        self.jid = int(html.find_element_by_tag_name('a').get_attribute('href').split('/')[-1])
        self.jugador = html.find_element_by_class_name("fi-p__n").text
        
        self.minuto = html.find_element_by_class_name("fi-mh__scorer__minute").text
        # self.minuto = int(self.minuto[:-1])  # Que onda 90'+1
        
        self.etiqueta = html.find_element_by_class_name("fi-mh__scorer__label").text
        
    def __repr__(self):
        if self.etiqueta:
            return "%s %s (%s)" % (self.jugador, self.minuto, self.etiqueta)
        else:
            return "%s %s" % (self.jugador, self.minuto)

In [None]:
goles_torneo = {}
def contarGoles(g):
    if not g.jugador in goles_torneo:
            goles_torneo[g.jugador] = []
    goles_torneo[g.jugador].append(g)

In [None]:
d.get('https://www.fifa.com/worldcup/matches/')
fixture = {}

for link in d.find_elements_by_class_name('fi-mu__link'):
    partido = Partido(link)
       
    # Compruebo que el partido esté terminado o en marcha
    if partido.pendiente:
        continue
        
    print(partido)
    
    
    if not partido.grupo in fixture:
        fixture[partido.grupo] = []
        
    fixture[partido.grupo].append(partido)

In [None]:
# Busco los goles
for grupo in fixture.keys():
    for partido in fixture[grupo]:
        partido.actualizarGoles()
        print(partido.goles)

In [None]:
# Busco los equipos
teams_url = "https://www.fifa.com/worldcup/teams/"
d.get(teams_url)

teams = []
for t in d.find_elements_by_class_name('fi-team-card__team'):
    team = {
        'id': int(t.get_attribute('data-team')),
        'name': t.text, 
        'url': t.get_attribute('href'),
    }
    print(team)
    teams.append(team)
    db.teams.insert(team).execute()

In [None]:
# Busco los jugadores
for team in teams:
    d.get(team['url'])
    sleep(5)
    
    players=[]
    for p in d.find_elements_by_class_name('fi-p'):
        pos = p.find_element_by_class_name('fi-p__info--role').text
        if pos == "COACH":
            continue
            
        url = p.find_element_by_class_name('fi-p--link').get_attribute('href')
        player = {
            'id': int(url.split('/')[-2]),
            'team_id': team['id'],
            'num': int(p.find_element_by_class_name('fi-p__num').text),
            'name': p.find_element_by_class_name('fi-p__nShorter').text, 
            'pos': pos, 
            'url': url,
        }
        print(player)
        players.append(player)
        db.players.insert(player).execute()

In [None]:
from database import Database

db = Database()

In [9]:
# Busco los partidos
matches_url = "https://www.fifa.com/worldcup/matches/"
#d.get(matches_url)
#sleep(5)

matches = {}
for m in d.find_elements_by_class_name('fi-mu__link'):
    if not "FULL-TIME" in m.text:
        continue
    
    score = m.find_element_by_class_name('fi-s__scoreText').text.split('-')
    match = {
        'home': m.find_elements_by_class_name('fi-t__nText')[0].text,
        'home_score': int(score[0]),
        'away': m.find_elements_by_class_name('fi-t__nText')[1].text,
        'away_score': int(score[0]),
    }
    print(match)

{'home': 'Russia', 'home_score': 5, 'away': 'Saudi Arabia', 'away_score': 5}
{'home': 'Egypt', 'home_score': 0, 'away': 'Uruguay', 'away_score': 0}
{'home': 'Morocco', 'home_score': 0, 'away': 'IR Iran', 'away_score': 0}
{'home': 'Portugal', 'home_score': 3, 'away': 'Spain', 'away_score': 3}
{'home': 'France', 'home_score': 2, 'away': 'Australia', 'away_score': 2}
{'home': 'Argentina', 'home_score': 1, 'away': 'Iceland', 'away_score': 1}
{'home': 'Peru', 'home_score': 0, 'away': 'Denmark', 'away_score': 0}
{'home': 'Croatia', 'home_score': 2, 'away': 'Nigeria', 'away_score': 2}
{'home': 'Costa Rica', 'home_score': 0, 'away': 'Serbia', 'away_score': 0}
{'home': 'Germany', 'home_score': 0, 'away': 'Mexico', 'away_score': 0}
{'home': 'Brazil', 'home_score': 1, 'away': 'Switzerland', 'away_score': 1}
{'home': 'Sweden', 'home_score': 1, 'away': 'Korea Republic', 'away_score': 1}
{'home': 'Belgium', 'home_score': 3, 'away': 'Panama', 'away_score': 3}
{'home': 'Tunisia', 'home_score': 1, 'awa

In [None]:
# Terminados
for html_partido in d.find_elements_by_class_name('result'):
    partido = Partido(html_partido)
    print(partido)
    
# Ongoing
for html_partido in d.find_elements_by_class_name('live'):
    partido = Partido(html_partido)
    print(partido)