In [17]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from tqdm import tqdm
from datetime import date

In [18]:
maps = {47 : "Ancient",
       29 : "Cache",
       39 : "Cobblestone", 
       31 : "Dust2", 
       33 : "Inferno",
       32 : "Mirage",
       34 : "Nuke",
       40 : "Overpass",
       30 : "Seasons", 
       35 : "Train", 
       46 : "Vertigo"}

In [19]:
main_url = "https://www.hltv.org/stats/teams"

In [20]:
def correct_format(string):
    
    string = string.split()
    string = "-".join(string)
    
    return string.lower()

In [21]:
months = {"Jan" : 1,
          "Feb" : 2,
          "Mar" : 3, 
          "Apr" : 4,
          "May" : 5,
          "Jun" : 6,
          "Jul" : 7,
          "Aug" : 8,
          "Sep" : 9, 
          "Oct" : 10, 
          "Nov" : 11,
          "Dec" : 12}

In [22]:
def convert_hltv_date(hltv_date):
    
    hltv_date = hltv_date.split()
    month = months[hltv_date[0]]
    year = int(hltv_date[1])

    return date(year, month, 1)

In [23]:
class roster():
    def __init__(self, lineup, start_date, end_date):
        
        self._lineup = lineup
        self._start_date = start_date
        self._end_date = end_date

    def get_lineup(self):
        return self._lineup
    
    def get_dates(self):
        return self._start_date, self._end_date

In [24]:
def str_to_date(string):
    
    day, month, year = map(int, string.split("/"))
    
    year += 2000
    
    return date(year, month, day)

In [25]:
class match():
    def __init__(self, score_1, score_2, team_1, team_2, event, map_name, dates):
        self.score = [score_1, score_2]
        self.participants = [team_1, team_2]
        self.map = map_name
        self.event = event
        self.date = dates

In [26]:
class TableParser():
    
    def __init__(self):
        pass
    
    def get_table(self, url):
    
        html_text = requests.get(url).text
        soup = BeautifulSoup(html_text, 'html.parser')
        table = soup.find("table")
    
        return table
    
    def get_stats_from_table(self, table, map_title):
    
        stats = []
        columns = {0 : "date", 
               1 : "opponent", 
               2 : "event", 
               3 : "result"}

        for row in table.get_rows():

            team_1 = team_name

            for j, elem in enumerate(row.findAll("td")):

                if columns[j] == "result":

                    score_1 = int(elem.text.split()[0])
                    score_2 = int(elem.text.split()[-1])

                elif columns[j] == "opponent": team_2 = correct_format(elem.text)

                elif columns[j] == "date": match_date = str_to_date(elem.text)
                else: event = elem.text
                    
            current_match = match(score_1, score_2, team_1, team_2, event, map_title, match_date)

            stats.append(current_match)

        return stats

In [27]:
class Team():
    
    def __init__(self, team_name, team_id):
        
        self._name = team_name
        self._id = team_id
        self._rosters = []
        
    def add_roster(self, roster):
        
        self._rosters.append(roster)
    
    def get_rosters(self):
        
        for roster in self._rosters:
            yield roster
            
    def get_name(self):
        return self._name
    
    def get_id(self):
        return self._id
    
    def get_lineup(self, date):
    
        for roster in self._rosters:
            dates = roster.get_dates()


            if date >= dates[0] and date < dates[1]:
                lineup = roster.get_lineup()
                return lineup
        return None
        

In [28]:
class Table():
    
    def __init__(self, table):
        
        self._rows = table.findAll("tr")
        
    def get_rows(self):
        
        return self._rows[1:]
        

Getting most popular teams id and names

In [29]:
parser = TableParser()
teams_table = Table(parser.get_table(main_url))
teams = []

for row in teams_table.get_rows():
    
    elem = row.findAll("td")[0]
    href = str(elem.find("a").get("href"))
    href = href.split("/")
    team_id = href[-2]

    team_name = correct_format(href[-1])

    current_team = Team(team_name, team_id)
    teams.append(current_team)

In [30]:
for team in teams:
    
    team_id = team.get_id()
    team_name = team.get_name()
    
    roster_url = f'{main_url}/lineups/{team_id}/{team_name}'
    html_text = requests.get(roster_url).text
    soup = BeautifulSoup(html_text, 'html.parser')
    
    for elem in soup.findAll("div", class_ = "lineup-container"):

        raw_lineup = elem.findAll("div", class_ = "text-ellipsis")
        
        lineup = []
        
        for i in raw_lineup:
            
            lineup.append(i.text)
            


        lineup_date = elem.findAll("div", class_ = "lineup-year")
        
        for month in lineup_date:

            dates = month.findAll("span")[:-1]

            if len(dates) == 1:

                start_date = convert_hltv_date(dates[0].text)
                end_date = date.today()

            else:

                start_date = convert_hltv_date(dates[0].text)
                end_date = convert_hltv_date(dates[1].text)

        team.add_roster(roster(lineup, start_date, end_date))

Getting final stats for all time

In [None]:
stats = []

for team in tqdm(teams):
    for map_id, map_title in maps.items():
        
        team_id = team.get_id()
        team_name = team.get_name()
        
        current_url = f'{main_url}/map/{map_id}/{team_id}/{team_name}'
        #print(current_url)
        
        current_table = Table(parser.get_table(current_url))
        stats += parser.get_stats_from_table(current_table, map_title)
        

  0%|                                                                                           | 0/62 [00:00<?, ?it/s]

Getting teams lineups

In [None]:
for game in stats:
    
    team_1, team_2 = game.participants
    date = game.date
    
    for team in teams:
        if team.get_name() == team_1:
            lineup_1 = team.get_lineup(date)
            break
    for team in teams:
        if team.get_name() == team_2:
            lineup_2 = team.get_lineup(date)
            break
    
    game.lineups = [lineup_1, lineup_2]