# M5 World Championship Statistics

<!-- The M5 World Championship is the final event and the World Championship of the MLBB 2023 Competitive Season organized by Moonton.

I scraped the result statistics provided in https://liquipedia.net/mobilelegends/M5_World_Championship/Statistics and stored it to 'm5_world_championship_stats.csv'.

Included here are the Hero picks and bans, win rate, the teams who played the hero, etc.

This statistics could help MLBB players determine which heroes to play in their games.

libraries used: Requests, BeautifulSoup, Pandas, Matplotlib -->


In [16]:
import requests
from bs4 import BeautifulSoup
import csv

url = "https://liquipedia.net/mobilelegends/M5_World_Championship/Statistics"

# Send an HTTP request to the URL and get the content
response = requests.get(url)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Parse the HTML content using BeautifulSoup
    soup = BeautifulSoup(response.text, 'html.parser')
    
    output_csv_path = 'm5_world_championship_stats.csv'

    # Create a CSV file and write the header
    with open(output_csv_path, 'w', newline='') as csvfile:
        csv_writer = csv.writer(csvfile)
        header = ['Rank', 'Hero', 'Games Played', 'Games Won', 'Games Lost', 'Win Rate', 'Pick Rate',
                  'Blue Games Played', 'Blue Games Won', 'Blue Games Lost', 'Blue Win Rate',
                  'Red Games Played', 'Red Games Won', 'Red Games Lost', 'Red Win Rate',
                  'Games Banned', 'Ban Rate', 'Bans and Picks', 'Ban and Pick Rate', 'Team Picks']
        csv_writer.writerow(header)
        
        rows = soup.find_all("tr", attrs={"class":"dota-stat-row"})

      # Extract information from each row
        for row in rows:
            columns = row.find_all('td')
            
            hero_rank = columns[0].text
            hero_name = columns[1].find_all('a')[1].get_text()

            # Picks over 152 games
            games_played = columns[2].text
            games_won = columns[3].text
            games_lost = columns[4].text
            win_rate = columns[5].text
            pick_rate = columns[6].text

            # Blue Side Picks
            blue_games_played = columns[7].text
            blue_games_won = columns[8].text
            blue_games_lost = columns[9].text
            blue_win_rate = columns[10].text

            # Red Side Picks
            red_games_played = columns[11].text
            red_games_won = columns[12].text
            red_games_lost = columns[13].text
            red_win_rate = columns[14].text

            # Bans
            games_banned = columns[15].text
            ban_rate = columns[16].text

            # Picks and Bans
            bans_and_picks = columns[17].text
            ban_and_pick_rate = columns[18].text
            
            # Team Picks        
            teams = columns[19].find_all("span", attrs={"data-highlightingclass": True})
            team_list = [element["data-highlightingclass"] for element in teams]
            
            records = columns[19].text
            separated_records = []

            current_record = ""
            for record in records:
                current_record += record
                if record.endswith(')'):
                    separated_records.append(current_record)
                    current_record = ""

            team_pick_records = dict(zip(team_list, separated_records))
            
            csv_writer.writerow([hero_rank, hero_name, games_played, games_won, games_lost, win_rate, pick_rate,
                                blue_games_played, blue_games_won, blue_games_lost, blue_win_rate,
                                red_games_played, red_games_won, red_games_lost, red_win_rate,
                                games_banned, ban_rate, bans_and_picks, ban_and_pick_rate, team_pick_records])
        print(f"Data has been written to {output_csv_path}")
else:
    print("Failed to retrieve the page. Status Code:", response.status_code)


Data has been written to m5_world_championship_stats.csv


<!--  Here I used the last column of 'm5_world_championship_stats.csv' to extract the teams who played that hero during the tournament
 This data could help in determining which hero each team likes to play with -->

In [14]:
import ast  # Import the ast module for literal_eval

# Replace 'your_file.csv' with the actual path to your CSV file
csv_file_path = 'm5_world_championship_stats.csv'

# Dictionary to store the last column values
last_column_dict = {}

# Read the CSV file and extract the last column
with open(csv_file_path, 'r') as csvfile:
    csv_reader = csv.reader(csvfile)
    
    for row in csv_reader:
        # Assuming the last column is in the last position of each row
        last_column_value_str = row[-1]

        # Convert the string representation of the dictionary to an actual dictionary
        try:
            last_column_dict_value = ast.literal_eval(last_column_value_str)
            # Assuming the key for the dictionary is the hero_name
            hero_name = row[1]  # Adjust the index based on your CSV structure
            last_column_dict[hero_name] = last_column_dict_value
        except (ValueError, SyntaxError):
            continue
            # Handle the case where literal_eval fails (e.g., invalid dictionary format)
            print(f"Error processing row: {row}")

# Print the dictionary
# print(last_column_dict)

<!--  I then used this dictionary to create another csv file to store the data as 'm5_world_championship_hero_teams.csv' -->

In [12]:
hero_data = last_column_dict
output_csv_path = 'm5_world_championship_hero_teams.csv'

# Write data to the new CSV file
with open(output_csv_path, 'w', newline='') as csvfile:
    csv_writer = csv.writer(csvfile)

    # Write header
    csv_writer.writerow(['Hero Name', 'Team', 'Games Played', 'Wins', 'Loss'])

    # Iterate through the hero data
    for hero_name, team_data in hero_data.items():
        for team, records in team_data.items():
            # Extract relevant information from records
            games_played = int(records.split('(')[0].strip())
            wins = int(records.split('(')[1].split('-')[0].strip())
            losses = int(records.split('(')[1].split('-')[1].replace(')', '').strip())

            # Write data to CSV
            csv_writer.writerow([hero_name, team, games_played, wins, losses])

print(f"Data has been written to {output_csv_path}")

Data has been written to m5_world_championship_hero_teams.csv
