In [1]:
import json
import os
import pandas as pd
import numpy as np

# change this to TRUE if you want to collect data from API
collect_data = False

prog_path = os.getcwd()

In [2]:
from src.recovery.ps_recovery import remove_invalid_char_for_url
from src.utils.ps_utils import get_ps_replays, save_to_files, get_replay_details, get_player_details


if collect_data:
    replays = get_ps_replays()
    json_formatted_data = json.dumps(replays, indent=4)
    print(json_formatted_data)
    
    # save replays to text
    os.makedirs(f"{prog_path}/replays", exist_ok=True)
    save_to_files(
        f"{prog_path}/replays/replay_save.json",
        replays)
    
    # get replay details:
    r_details = get_replay_details(replays)
    
    # save replay details to file
    save_to_files(
        f"{prog_path}/replays/replay_details.json",
        r_details)
    
    # get player_details for this run
    player_ids = set()
    for replay in replays:
        player_ids.add(remove_invalid_char_for_url(replay['players'][0]))
        player_ids.add(remove_invalid_char_for_url(replay['players'][1]))
    player_details = get_player_details(player_ids)
    
    # save player details to file
    save_to_files(
        f"{prog_path}/replays/player_details.json",
        player_details)



In [3]:
from src.utils.ps_game_state_utils import ini_data_file, process_replay_file
from src.utils.ps_utils import load_static_data

file_path = f"{prog_path}/replays/replay_details.json"
output_path = f"{prog_path}/game_state_data/game_state.csv"

ps_static = load_static_data()

# create the output data file
ini_data_file(output_path)

# track duplicated games
game_ids = set()

process_replay_file(file_path, output_path, ps_static, game_ids)

print(f"Finished generating games tate data file {output_path}")
print(f"Number of games: {len(game_ids)}")

Calling get API: [https://play.pokemonshowdown.com/data/typechart.js]
Calling get API: [https://play.pokemonshowdown.com/data/abilities.js]
Calling get API: [https://play.pokemonshowdown.com/data/moves.js]
Calling get API: [https://play.pokemonshowdown.com/data/items.js]
Calling get API: [https://play.pokemonshowdown.com/data/pokedex.js]
Calling get API: [https://play.pokemonshowdown.com/data/learnsets.js]
Processing file: /Users/viethnguyen/Documents/CEU_BA_MSc/Coding_2_Web_Scraping_ECBS5306/CEU_MSc_BA_ECBS5306_Coding_2_Webscraping/replays/replay_details.json
Processing battle log: gen9doublesou-2010461162
Writing game state data to file /Users/viethnguyen/Documents/CEU_BA_MSc/Coding_2_Web_Scraping_ECBS5306/CEU_MSc_BA_ECBS5306_Coding_2_Webscraping/game_state_data/game_state.csv
Processing battle log: gen9doublesou-2010461583
Processing battle log: gen9doublesou-2010451902
Writing game state data to file /Users/viethnguyen/Documents/CEU_BA_MSc/Coding_2_Web_Scraping_ECBS5306/CEU_MSc_BA_

In [4]:
game_state_filepath = output_path
df = pd.read_csv(game_state_filepath)
df.info(verbose=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2850 entries, 0 to 2849
Data columns (total 228 columns):
 #    Column                   Dtype 
---   ------                   ----- 
 0    game_id                  object
 1    p1_win                   int64 
 2    T_ELECTRIC               int64 
 3    T_GRASSY                 int64 
 4    T_MISTY                  int64 
 5    T_PSYCHIC                int64 
 6    W_SUN                    int64 
 7    W_RAIN                   int64 
 8    W_SAND                   int64 
 9    W_SNOW                   int64 
 10   W_EX_SUN                 int64 
 11   W_EX_RAIN                int64 
 12   W_EX_WIND                int64 
 13   R_TRICK_ROOM             int64 
 14   R_WONDER_ROOM            int64 
 15   R_MAGIC_ROOM             int64 
 16   p1_reflect               int64 
 17   p1_light_screen          int64 
 18   p1_tailwind              int64 
 19   p1_webbed                int64 
 20   p1_has_bug               int64 
 21   p1_has_dark 