In [127]:
import pandas as pd
from pathlib import Path

In [128]:
# Paths
BASE = Path("../data/clean/vct_2024")
OUT = Path("../data/gold/facts")
DIM = Path("../data/gold/dimensions")

OUT.mkdir(parents=True, exist_ok=True)

In [129]:
# Silver source
overview = pd.read_parquet(
    BASE / "matches/overview.parquet"
)

# Dimensions
dim_player = pd.read_parquet(DIM / "dim_player.parquet")
dim_team = pd.read_parquet(DIM / "dim_team.parquet")
dim_tournament = pd.read_parquet(DIM / "dim_tournament.parquet")
dim_match = pd.read_parquet(DIM / "dim_match.parquet")
dim_map = pd.read_parquet(DIM / "dim_map.parquet")
dim_agent = pd.read_parquet(DIM / "dim_agent.parquet")

In [130]:
# Remove aggregate rows
overview = overview[
    overview["map_name"] != "All Maps"
].copy()

In [131]:
overview.head()

Unnamed: 0,tournament_name,stage_name,match_type_name,match_name,map_name,player_name,team_name,agent_name,rating,average_combat_score,...,deaths,assists,kills_deaths_ratio,kill_assist_trade_survive_percentage,average_damage_per_round,headshot_percentage,first_kills,first_deaths,first_kills_deaths_ratio,side
0,Valorant Champions 2024,Group Stage,Opening (B),Gen.G vs Sentinels,Haven,t3xture,Gen.G,jett,1.6,305.0,...,12.0,4.0,12.0,0.81,188.0,0.31,3.0,4.0,-1.0,both
1,Valorant Champions 2024,Group Stage,Opening (B),Gen.G vs Sentinels,Haven,t3xture,Gen.G,jett,1.71,364.0,...,5.0,2.0,7.0,0.89,216.0,0.37,2.0,2.0,0.0,attack
2,Valorant Champions 2024,Group Stage,Opening (B),Gen.G vs Sentinels,Haven,t3xture,Gen.G,jett,1.52,261.0,...,7.0,2.0,5.0,0.75,168.0,0.24,1.0,2.0,-1.0,defend
3,Valorant Champions 2024,Group Stage,Opening (B),Gen.G vs Sentinels,Haven,Meteor,Gen.G,killjoy,1.23,221.0,...,10.0,4.0,7.0,0.76,140.0,0.26,1.0,4.0,-3.0,both
4,Valorant Champions 2024,Group Stage,Opening (B),Gen.G vs Sentinels,Haven,Meteor,Gen.G,killjoy,1.16,229.0,...,4.0,1.0,4.0,0.78,127.0,0.35,1.0,1.0,0.0,attack


In [132]:
fact = overview.merge(
    dim_player,
    on="player_name",
    how="left"
)

In [133]:
print(fact['player_id'].isna().sum())

0


In [134]:
fact = fact.merge(
    dim_team,
    on="team_name",
    how="left"
)

In [135]:
print(fact['team_id'].isna().sum())

0


In [136]:
fact = fact.merge(
    dim_tournament,
    on=[
        "tournament_name",
        "stage_name",
        "match_type_name"
    ],
    how="left"
)


In [137]:
print(fact["tournament_id"].isna().sum())

0


In [138]:
fact.columns

Index(['tournament_name', 'stage_name', 'match_type_name', 'match_name',
       'map_name', 'player_name', 'team_name', 'agent_name', 'rating',
       'average_combat_score', 'kills', 'deaths', 'assists',
       'kills_deaths_ratio', 'kill_assist_trade_survive_percentage',
       'average_damage_per_round', 'headshot_percentage', 'first_kills',
       'first_deaths', 'first_kills_deaths_ratio', 'side', 'player_id',
       'team_id', 'tournament_id', 'stage_id', 'match_type_id'],
      dtype='str')

In [139]:
dim_match.columns

Index(['match_id', 'game_id', 'match_name', 'map_name', 'tournament_id',
       'stage_id', 'match_type_id'],
      dtype='str')

In [140]:
fact = fact.merge(
    dim_match,
    on=[
        "tournament_id",
        "stage_id",
        "match_type_id",
        "match_name",
        "map_name"
    ],
    how="left"
)

In [141]:
print(fact["match_id"].isna().sum())

0


In [142]:
fact = fact.merge(
    dim_map,
    on="map_name",
    how="left"
)

In [143]:
print(fact["map_id"].isna().sum())

0


In [144]:
fact = fact.merge(
    dim_agent,
    on="agent_name",
    how="left"
)

In [145]:
print(fact["agent_id"].isna().sum())

0


In [147]:
fact.columns

Index(['tournament_name', 'stage_name', 'match_type_name', 'match_name',
       'map_name', 'player_name', 'team_name', 'agent_name', 'rating',
       'average_combat_score', 'kills', 'deaths', 'assists',
       'kills_deaths_ratio', 'kill_assist_trade_survive_percentage',
       'average_damage_per_round', 'headshot_percentage', 'first_kills',
       'first_deaths', 'first_kills_deaths_ratio', 'side', 'player_id',
       'team_id', 'tournament_id', 'stage_id', 'match_type_id', 'match_id',
       'game_id', 'map_id', 'agent_id'],
      dtype='str')

In [149]:
# Index(['tournament_name', 'stage_name', 'match_type_name', 'match_name',
#        'map_name', 'player_name', 'team_name', 'agent_name', 'rating',
#        'average_combat_score', 'kills', 'deaths', 'assists',
#        'kills_deaths_ratio', 'kill_assist_trade_survive_percentage',
#        'average_damage_per_round', 'headshot_percentage', 'first_kills',
#        'first_deaths', 'first_kills_deaths_ratio', 'side', 'player_id',
#        'team_id', 'tournament_id', 'stage_id', 'match_type_id', 'match_id',
#        'game_id', 'map_id', 'agent_id'],
#       dtype='str')

fact_player_match_map_stats = fact[
    [
        # Keys
        "match_id",
        "game_id",

        "player_id",
        "team_id",

        "tournament_id",
        "stage_id",
        "match_type_id",

        "map_id",
        "agent_id",

        "side",

        # Metrics
        "rating",
        "average_combat_score",
        "kills",
        "deaths",
        "assists",
        "kills_deaths_ratio",
        "kill_assist_trade_survive_percentage",
        "average_damage_per_round",
        "headshot_percentage",
        "first_kills",
        "first_deaths",
        "first_kills_deaths_ratio"
    ]
].copy()


In [150]:
fact_player_match_map_stats.info()

<class 'pandas.DataFrame'>
RangeIndex: 33120 entries, 0 to 33119
Data columns (total 22 columns):
 #   Column                                Non-Null Count  Dtype   
---  ------                                --------------  -----   
 0   match_id                              33120 non-null  int64   
 1   game_id                               33120 non-null  int64   
 2   player_id                             33120 non-null  int64   
 3   team_id                               33120 non-null  int64   
 4   tournament_id                         33120 non-null  int64   
 5   stage_id                              33120 non-null  int64   
 6   match_type_id                         33120 non-null  int64   
 7   map_id                                33120 non-null  int64   
 8   agent_id                              33120 non-null  int64   
 9   side                                  33120 non-null  category
 10  rating                                24450 non-null  float64 
 11  average_comba

In [151]:
fact_player_match_map_stats.isna().sum()

match_id                                   0
game_id                                    0
player_id                                  0
team_id                                    0
tournament_id                              0
stage_id                                   0
match_type_id                              0
map_id                                     0
agent_id                                   0
side                                       0
rating                                  8670
average_combat_score                    5789
kills                                   5780
deaths                                  5780
assists                                 5780
kills_deaths_ratio                      5782
kill_assist_trade_survive_percentage    8680
average_damage_per_round                7201
headshot_percentage                     7218
first_kills                             7190
first_deaths                            7190
first_kills_deaths_ratio                7190
dtype: int

In [163]:
fact_player_match_map_stats

Unnamed: 0,match_id,game_id,player_id,team_id,tournament_id,stage_id,match_type_id,map_id,agent_id,side,...,kills,deaths,assists,kills_deaths_ratio,kill_assist_trade_survive_percentage,average_damage_per_round,headshot_percentage,first_kills,first_deaths,first_kills_deaths_ratio
0,378662,180369,9196,17,2097,4035,26223,7,12,both,...,24.0,12.0,4.0,12.0,0.81,188.0,0.31,3.0,4.0,-1.0
1,378662,180369,9196,17,2097,4035,26223,7,12,attack,...,12.0,5.0,2.0,7.0,0.89,216.0,0.37,2.0,2.0,0.0
2,378662,180369,9196,17,2097,4035,26223,7,12,defend,...,12.0,7.0,2.0,5.0,0.75,168.0,0.24,1.0,2.0,-1.0
3,378662,180369,13039,17,2097,4035,26223,7,14,both,...,17.0,10.0,4.0,7.0,0.76,140.0,0.26,1.0,4.0,-3.0
4,378662,180369,13039,17,2097,4035,26223,7,14,attack,...,8.0,4.0,1.0,4.0,0.78,127.0,0.35,1.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33115,297258,155777,2489,17,1924,3723,23316,3,25,attack,...,8.0,6.0,2.0,2.0,0.83,119.0,0.41,1.0,2.0,-1.0
33116,297258,155777,2489,17,1924,3723,23316,3,25,defend,...,5.0,4.0,2.0,1.0,0.80,150.0,0.12,2.0,0.0,2.0
33117,297258,155777,773,17,1924,3723,23316,3,21,both,...,10.0,9.0,8.0,1.0,1.00,90.0,0.25,1.0,0.0,1.0
33118,297258,155777,773,17,1924,3723,23316,3,21,attack,...,6.0,6.0,6.0,0.0,1.00,98.0,0.31,0.0,0.0,0.0


In [164]:
out_path = OUT / "fact_player_match_map_stats.parquet"

fact_player_match_map_stats.to_parquet(
    out_path,
    index=False
)