In [18]:
import pandas as pd
from pathlib import Path

In [19]:
# Paths
BASE = Path("../data/clean/vct_2024")
OUT = Path("../data/gold/dimensions")

OUT.mkdir(parents=True, exist_ok=True)

In [56]:
players_raw = pd.read_parquet(BASE / "ids/players_ids.parquet")

In [57]:
players_raw.head()

Unnamed: 0,player_name,player_id
0,t3xture,9196
1,Meteor,13039
2,Lakia,773
3,Karon,34974
4,Munchkin,2489


In [58]:
dim_players = players_raw[["player_id", "player_name"]].drop_duplicates().sort_values("player_id").reset_index(drop=True)

In [59]:
output_path = OUT / "dim_player.parquet"

dim_players.to_parquet(output_path, index=False)

In [60]:
teams_raw = pd.read_parquet(BASE / "ids/teams_ids.parquet")

In [61]:
teams_raw.head()

Unnamed: 0,team_name,team_id
0,Gen.G,17
1,Sentinels,2
2,FunPlus Phoenix,11328
3,Team Heretics,1001
4,DRX,8185


In [62]:
dim_teams = teams_raw[["team_id", "team_name"]].drop_duplicates().sort_values("team_id").reset_index(drop=True)

In [63]:
output_path = OUT / "dim_team.parquet"

dim_teams.to_parquet(output_path, index=False)

In [9]:
t_raw = pd.read_parquet(BASE / "ids/tournaments_stages_match_types_ids.parquet")

In [11]:
t_raw.head()

Unnamed: 0,tournament_name,tournament_id,stage_name,stage_id,match_type_name,match_type_id
0,Valorant Champions 2024,2097,Playoffs,4131,Upper Quarterfinals,26235
1,Valorant Champions 2024,2097,Playoffs,4131,Upper Semifinals,26236
2,Valorant Champions 2024,2097,Playoffs,4131,Upper Final,26237
3,Valorant Champions 2024,2097,Playoffs,4131,Grand Final,26238
4,Valorant Champions 2024,2097,Playoffs,4131,Lower Round 1,26239


In [12]:
dim_tournament = (
    t_raw[
        [
            "tournament_id",
            "tournament_name",
            "stage_id",
            "stage_name",
            "match_type_id",
            "match_type_name",
        ]
    ]
    .drop_duplicates()
    .sort_values(
        ["tournament_id", "stage_id", "match_type_id"]
    )
    .reset_index(drop=True)
)

In [14]:
dim_tournament.head()

Unnamed: 0,tournament_id,tournament_name,stage_id,stage_name,match_type_id,match_type_name
0,1921,Champions Tour 2024: Masters Madrid,3663,Swiss Stage,23896,Round 1
1,1921,Champions Tour 2024: Masters Madrid,3663,Swiss Stage,23902,Round 2
2,1921,Champions Tour 2024: Masters Madrid,3663,Swiss Stage,23903,Round 3
3,1921,Champions Tour 2024: Masters Madrid,3821,Playoffs,23897,Upper Semifinals
4,1921,Champions Tour 2024: Masters Madrid,3821,Playoffs,23898,Upper Final


In [51]:
dim_tournament.to_parquet(
    OUT / "dim_tournament.parquet",
    index=False
)

In [26]:
valorant_maps = [
    "Ascent",
    "Bind",
    "Breeze",
    "Corrode",
    "Fracture",
    "Haven",
    "Icebox",
    "Lotus",
    "Pearl",
    "Split",
    "Sunset",
    "Abyss"
]


valorant_agents = [
    "Astra",
    "Breach",
    "Brimstone",
    "Chamber",
    "Clove",
    "Cypher",
    "Deadlock",
    "Fade",
    "Gekko",
    "Harbor",
    "Iso",
    "Jett",
    "KayO",
    "Killjoy",
    "Neon",
    "Omen",
    "Phoenix",
    "Raze",
    "Reyna",
    "Sage",
    "Skye",
    "Sova",
    "Tejo",
    "Viper",
    "Veto",
    "Vyse",
    "Waylay",
    "Yoru"
]

In [27]:
valorant_agents = [agent.lower() for agent in valorant_agents]

In [28]:
dim_map = pd.DataFrame({
    "map_name": sorted(valorant_maps)
})

dim_map["map_id"] = dim_map.index + 1

dim_map = dim_map[["map_id", "map_name"]]

In [29]:
dim_map_path = OUT / "dim_map.parquet"

dim_map.to_parquet(dim_map_path, index=False)

In [30]:
dim_agent = pd.DataFrame({
    "agent_name": sorted(valorant_agents)
})

dim_agent["agent_id"] = dim_agent.index + 1

dim_agent = dim_agent[["agent_id", "agent_name"]]

In [31]:
dim_agent_path = OUT / "dim_agent.parquet"

dim_agent.to_parquet(dim_agent_path, index=False)

In [7]:
m_raw = pd.read_parquet(BASE / "ids/tournaments_stages_matches_games_ids.parquet")
t_raw = pd.read_parquet(BASE / "ids/tournaments_stages_match_types_ids.parquet")

In [8]:
m_raw.head()

Unnamed: 0,tournament_name,tournament_id,stage_name,stage_id,match_type_name,match_name,match_id,map_name,game_id
0,Valorant Champions 2024,2097,Group Stage,4035,Opening (B),Gen.G vs Sentinels,378662,Haven,180369
1,Valorant Champions 2024,2097,Group Stage,4035,Opening (B),Gen.G vs Sentinels,378662,Ascent,180370
2,Valorant Champions 2024,2097,Group Stage,4035,Opening (B),FunPlus Phoenix vs Team Heretics,378663,Abyss,180372
3,Valorant Champions 2024,2097,Group Stage,4035,Opening (B),FunPlus Phoenix vs Team Heretics,378663,Bind,180373
4,Valorant Champions 2024,2097,Group Stage,4035,Opening (B),FunPlus Phoenix vs Team Heretics,378663,Lotus,180374


In [9]:
m_merged = m_raw.merge(
    t_raw[
        [
            "tournament_name",
            "stage_name",
            "match_type_name",
            "match_type_id",
        ]
    ],
    on=[
        "tournament_name",
        "stage_name",
        "match_type_name",
    ],
    how="left"
)


In [10]:
m_merged.columns

Index(['tournament_name', 'tournament_id', 'stage_name', 'stage_id',
       'match_type_name', 'match_name', 'match_id', 'map_name', 'game_id',
       'match_type_id'],
      dtype='str')

In [14]:
dim_match = (
    m_merged[
        [
            "match_id",
            "game_id",
            "match_name",
            "map_name",
            "tournament_id",
            "stage_id",
            "match_type_id",
        ]
    ]
    .drop_duplicates()
    .sort_values(["match_id", "game_id"])
    .reset_index(drop=True)
)


In [15]:
dim_match.head()

Unnamed: 0,match_id,game_id,match_name,map_name,tournament_id,stage_id,match_type_id
0,295605,153722,NRG Esports vs FURIA,Breeze,1923,3637,23017
1,295605,153723,NRG Esports vs FURIA,Ascent,1923,3637,23017
2,295606,153725,Cloud9 vs MIBR,Bind,1923,3637,23017
3,295606,153726,Cloud9 vs MIBR,Split,1923,3637,23017
4,295606,153727,Cloud9 vs MIBR,Lotus,1923,3637,23017


In [16]:
dim_match_path = OUT / "dim_match.parquet"

dim_match.to_parquet(dim_match_path, index=False)