In [60]:
import os
import bz2
import json

# Specify the path to the main directory containing folders
main_directory = "C:/Users/truls/Downloads/data/BASIC/2023/May/"

# Define a list of Premier League team names from the previous season
premier_league_teams = [
    "Arsenal", "Aston Villa", "Bournemouth", "Brentford", "Brighton",
    "Chelsea", "Crystal Palace", "Everton", "Fulham", "Leeds",
    "Leicester", "Liverpool", "Man City", "Man Utd", "Newcastle",
    "Nottm Forest", "Southampton", "Tottenham", "West Ham", "Wolves"
]

# Generate a list of Premier League fixtures in the form "Team A v Team B"
premier_league_fixtures = []
for team1 in premier_league_teams:
    for team2 in premier_league_teams:
        if team1 == team2:
            continue
        fixture = f"{team1} v {team2}"
        premier_league_fixtures.append(fixture)

# Function to check if eventName matches any Premier League fixture
def is_premier_league_fixture(eventName):
    return eventName in premier_league_fixtures

for day in range(1, 32):
    print(f"\n{day}")

    # Generate the folder path for the specific day
    base_directory = os.path.join(main_directory, f"{day}")

    # Get a list of folder names in the main directory
    folders = [item for item in os.listdir(base_directory) if os.path.isdir(os.path.join(base_directory, item))]

    # Iterate through the folders and extract and parse the first .bz2 file in each folder
    for folder in folders:
        folder_path = os.path.join(base_directory, folder)
        files_in_folder = os.listdir(folder_path)

        # Filter for .bz2 files
        bz2_files = [file for file in files_in_folder if file.endswith(".bz2")]

        if bz2_files:
            first_bz2_file = bz2_files[0]
            file_path = os.path.join(folder_path, first_bz2_file)

            # Extract and process the .bz2 file
            with open(file_path, 'rb') as bz2_file:
                compressed_data = bz2_file.read()
                # Decompress the data
                decompressed_data = bz2.decompress(compressed_data)

                # Decode the decompressed data as UTF-8 (assuming it contains text)
                text_data = decompressed_data.decode('utf-8')

                # Split the text data into lines and parse the first line as JSON
                first_line = text_data.split('\n', 1)[0]
                try:
                    json_data = json.loads(first_line)

                    # Check if eventName matches any Premier League fixture
                    eventName = json_data['mc'][0]['marketDefinition']['eventName']
                    if is_premier_league_fixture(eventName):
                        print(f"{eventName} in folder {folder}")
                except (json.JSONDecodeError, KeyError) as e:
                    print(f"Error parsing JSON from {first_bz2_file} in folder {folder}: {e}")
        else:
            print(f"No .bz2 files found in folder {folder}")



1

2
Arsenal v Chelsea in folder 32294920

3
Liverpool v Fulham in folder 32294810

4
Brighton v Man Utd in folder 32294784
Man City v West Ham in folder 32294815

5

6
Leicester v Everton in folder 32294812
Man City v Leeds in folder 32294814
Tottenham v Crystal Palace in folder 32294818
Bournemouth v Chelsea in folder 32294832
Wolves v Aston Villa in folder 32295380

7
Newcastle v Arsenal in folder 32303142
West Ham v Man Utd in folder 32305797

8
Liverpool v Brentford in folder 32294834
Fulham v Leicester in folder 32305796

9
Nottm Forest v Southampton in folder 32305996
Brighton v Everton in folder 32307350

10

11

12

13
Aston Villa v Tottenham in folder 32314391
Chelsea v Nottm Forest in folder 32314392
Crystal Palace v Bournemouth in folder 32314395
Leeds v Newcastle in folder 32314396
Man Utd v Wolves in folder 32314399
Southampton v Fulham in folder 32314400

14
Arsenal v Brighton in folder 32314389
Brentford v West Ham in folder 32314393
Everton v Man City in folder 323143

In [27]:
import os
import bz2
import json

def get_json_keys_from_bz2_files(folder_path):
    # Get a list of .bz2 files in the folder
    bz2_files = [file for file in os.listdir(folder_path) if file.endswith(".bz2")]

    # Iterate through the .bz2 files
    for bz2_file_name in bz2_files:
        bz2_file_path = os.path.join(folder_path, bz2_file_name)
        
        try:
            # Extract and process the .bz2 file
            with open(bz2_file_path, 'rb') as bz2_file:
                compressed_data = bz2_file.read()
                # Decompress the data
                decompressed_data = bz2.decompress(compressed_data)

                # Decode the decompressed data as UTF-8 (assuming it contains text)
                text_data = decompressed_data.decode('utf-8')

                # Split the text data into lines and parse the last line as JSON
                last_line = text_data.split('\n', 1)[-2]
                json_data = json.loads(last_line)

                # Print the keys in the JSON object
                marketDefinition = json_data['mc'][0]['marketDefinition']
                for key in marketDefinition.keys():
                    # print(f"Key: {key}, Type: {type(marketDefinition[key]).__name__}")
                    if key == "marketType":
                        if marketDefinition[key] == "MATCH_ODDS":
                            print(f"'marketType' in {bz2_file_name}:")
                            print(marketDefinition[key])
        except (json.JSONDecodeError, KeyError) as e:
            print(f"Error parsing JSON from {bz2_file_name}: {e}")
        # print("\n")

if __name__ == "__main__":
    # Specify the path to the folder containing .bz2 files
    folder_path = "C:/Users/truls/Downloads/data/BASIC/2023/May/2/32294920"

    # Call the function to get keys from JSON objects in .bz2 files
    get_json_keys_from_bz2_files(folder_path)


'marketType' in 1.213313106.bz2:
MATCH_ODDS


In [41]:
import os
import bz2
import json

bz2_file_path = "C:/Users/truls/Downloads/data/BASIC/2023/May/2/32294920/1.213313106.bz2"

# Initialize an empty JSON object
json_data = {}

# Extract and process the .bz2 file
with open(bz2_file_path, 'rb') as bz2_file:
    compressed_data = bz2_file.read()
    
    # Decompress the data
    decompressed_data = bz2.decompress(compressed_data)

    # Split the decompressed data into lines
    lines = decompressed_data.splitlines()

    # Iterate over the lines in reverse order to find the last complete JSON object
    for line in reversed(lines):
        try:
            json_data = json.loads(line.decode('utf-8'))
            break  # Stop when a valid JSON object is found
        except json.JSONDecodeError:
            continue  # Continue if the line is not a valid JSON object

# Print the name and bsp of every runner
runners = json_data.get('mc', [{}])[0].get('marketDefinition', {}).get('runners', [])
for runner in runners:
    name = runner.get('name', 'N/A')
    bsp = runner.get('bsp', 'N/A')
    print(f"Name: {name}, BSP: {bsp}")


Name: Arsenal, BSP: 1.63
Name: Chelsea, BSP: 6.12
Name: The Draw, BSP: 4.41
