### Imports

In [None]:
import os
import pandas as pd
import numpy as np
from functools import reduce


In [None]:
data_dir = "../base_data"
new_data_dir = "new_data"

In [None]:
columns_to_keep_teams = {
    'teamRoster.csv': ['teamId', 'teamName'],
    'standings.csv': ['teamId', 'leagueId', 'form', 'teamRank', 'teamId','gamesPlayed','wins', 'ties', 'losses', 'points','gf','ga','gd', 'timeStamp'],
}


In [None]:
dfs_teams = []

for filename, columns in columns_to_keep_teams.items():
    path = os.path.join(data_dir, filename)
    df = pd.read_csv(path, usecols=columns)
    dfs_teams.append(df)

In [None]:
merged_df_teams = reduce(lambda left, right: pd.merge(left, right, on='teamId', how='inner'), dfs_teams)

# Generate ordered column list automatically
ordered_columns_t = []
for cols in columns_to_keep_teams.values():
    for col in cols:
        if col not in ordered_columns_t:
            ordered_columns_t.append(col)

# Reorder columns safely
merged_df_teams = merged_df_teams[[col for col in ordered_columns_t if col in merged_df_teams.columns]]
merged_df_teams = merged_df_teams.dropna()


In [None]:
cols_to_int = ['teamRank', 'gamesPlayed', 'wins', 'ties', 'losses', 'points', 'gf', 'ga', 'gd']
merged_df_teams[cols_to_int] = merged_df_teams[cols_to_int].astype(int)


In [None]:
merged_df_teams

In [None]:
target_leagues = [720, 740, 700, 730]

merged_df_teams = merged_df_teams[merged_df_teams['leagueId'].isin(target_leagues)]
merged_df_teams

In [None]:
merged_df_teams = (
    merged_df_teams
    .groupby('teamId', as_index=False)
    .agg(
        League=('leagueId', 'first'),
        Rank=('teamRank', 'first'),
        Team=('teamName', 'first'),
        Form=('form', 'first'),
        Games=('gamesPlayed', 'first'),
        Wins=('wins', 'first'),
        Ties=('ties', 'first'),
        Losses=('losses', 'first'),
        Points=('points', 'first'),
        Goal_for=('gf', 'first'),
        Goal_against=('ga', 'first'),
        Goal_difference=('gd', 'first')
    )
    .sort_values(by=['League', 'Rank'])
)


In [None]:
# Define custom league name mapping
league_name_map = {
    720: "GER1",
    740: "ESP1",
    700: "ENG1",
    730: "ITA1"
}

# Replace leagueId with custom league names
merged_df_teams['League'] = merged_df_teams['League'].map(league_name_map)


In [None]:
merged_df_teams["avg_points_per_match"] = (merged_df_teams["Points"] / merged_df_teams["Games"]).round(2)
merged_df_teams

In [None]:
output_path_teams = os.path.join(new_data_dir, 'database_teams.csv')

os.makedirs(os.path.dirname(output_path_teams), exist_ok=True)

merged_df_teams.to_csv(output_path_teams, index=False)

print(f"Merged CSV saved to {output_path_teams}")
