### Imports

In [None]:
import os
import pandas as pd
import numpy as np
from functools import reduce


In [None]:
data_dir = "../base_data"
new_data_dir = "new_data"

In [None]:
columns_to_keep_teams = {
    'teamRoster.csv': ['teamId', 'teamName'],
    'standings.csv': ['teamId', 'form', 'teamRank', 'teamId','gamesPlayed','wins', 'ties', 'losses', 'points','gf','ga','gd'],
}


In [None]:
dfs_teams = []

for filename, columns in columns_to_keep_teams.items():
    path = os.path.join(data_dir, filename)
    df = pd.read_csv(path, usecols=columns)
    dfs_teams.append(df)

In [None]:
merged_df_teams = reduce(lambda left, right: pd.merge(left, right, on='teamId', how='inner'), dfs_teams)

# Generate ordered column list automatically
ordered_columns_t = []
for cols in columns_to_keep_teams.values():
    for col in cols:
        if col not in ordered_columns_t:
            ordered_columns_t.append(col)

# Reorder columns safely
merged_df_teams = merged_df_teams[[col for col in ordered_columns_t if col in merged_df_teams.columns]]
merged_df_teams = merged_df_teams.dropna()


In [None]:
cols_to_int = ['teamRank', 'gamesPlayed', 'wins', 'ties', 'losses', 'points', 'gf', 'ga', 'gd']
merged_df_teams[cols_to_int] = merged_df_teams[cols_to_int].astype(int)


In [None]:
merged_df_teams.head()

In [None]:
df = merged_df_teams.groupby(['teamId']).agg(
homeTeamId=pd.NamedAgg(column="homeTeamId", aggfunc="first"),
homeTeamScore=pd.NamedAgg(column="homeTeamScore", aggfunc="first"),
homeTeamWinner=pd.NamedAgg(column="homeTeamWinner", aggfunc="first"),
possessionPct_home=pd.NamedAgg(column="possessionPct", aggfunc="first"),
totalShots_home=pd.NamedAgg(column="totalShots", aggfunc="first"),
shotsOnTarget_home=pd.NamedAgg(column="shotsOnTarget", aggfunc="first"),
saves_home=pd.NamedAgg(column="saves", aggfunc="first"),
wonCorners_home=pd.NamedAgg(column="wonCorners", aggfunc="first"),
).reset_index()

In [None]:
output_path_teams = os.path.join(new_data_dir, 'database_teams.csv')

os.makedirs(os.path.dirname(output_path_teams), exist_ok=True)

merged_df_teams.to_csv(output_path_teams, index=False)

print(f"Merged CSV saved to {output_path_teams}")
