### Imports

In [50]:
import os
import pandas as pd
import numpy as np
from functools import reduce


In [51]:
data_dir = "../base_data"
new_data_dir = "new_data"

In [52]:
columns_to_keep_matches = {
    'fixtures.csv': ['eventId', 'homeTeamId', 'awayTeamId', 'homeTeamWinner', 'awayTeamWinner', 'homeTeamScore', 'awayTeamScore', 'date'],
    'teamStats.csv': ['eventId', 'teamId', 'possessionPct', 'totalShots', 'shotsOnTarget', 'saves', 'wonCorners'],
}

In [53]:
dfs_matches = []

for filename, columns in columns_to_keep_matches.items():
    path = os.path.join(data_dir, filename)
    df = pd.read_csv(path, usecols=columns)
    dfs_matches.append(df)
   

In [54]:
merged_df_matches = reduce(lambda left, right: pd.merge(left, right, on='eventId', how='inner'), dfs_matches)

# Generate ordered column list automatically
ordered_columns_m = []
for cols in columns_to_keep_matches.values():
    for col in cols:
        if col not in ordered_columns_m:
            ordered_columns_m.append(col)

# Reorder columns safely
merged_df_matches = merged_df_matches[[col for col in ordered_columns_m if col in merged_df_matches.columns]]
merged_df_matches = merged_df_matches.dropna()

In [55]:
cols_to_int = ['totalShots','shotsOnTarget','saves','wonCorners']
merged_df_matches[cols_to_int] = merged_df_matches[cols_to_int].astype(int)

In [56]:
output_path_matches = os.path.join(new_data_dir, 'database_matches.csv')

os.makedirs(os.path.dirname(output_path_matches), exist_ok=True)

merged_df_matches.to_csv(output_path_matches, index=False)

print(f"Merged CSV saved to {output_path_matches}")


Merged CSV saved to new_data\database_matches.csv
