In [None]:
# tournament_kagaribi_8_db.xlsxで指定した大会の試合情報をsmash_ggから取得し、出力する

In [1]:
# https://smash.gg/admin/user/782f51a2/developer で取得したsmashggトークン
# プログラム公開時はマスキングすること！

smashgg_token = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"

In [2]:
import pysmashgg
import pandas as pd
import numpy as np
import time
from datetime import datetime
from tqdm import tqdm
import swifter

In [3]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [4]:
import warnings
warnings.filterwarnings('ignore')

In [5]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 20)
pd.set_option("display.max_colwidth", 80)

In [6]:
def fetch_tournament_name(tournament_URL):
    """
    大会イベントURLを与えると、URL中の大会名を返す
    """
    separated_url = tournament_URL.split("/")
    return separated_url[separated_url.index("tournament")+1]

In [7]:
def fetch_event_name(tournament_URL):
    """
    大会イベントURLを与えると、URL中のイベント名を返す
    """
    separated_url = tournament_URL.split("/")
    return separated_url[separated_url.index("event")+1]

In [8]:
def fetch_df_all_players(tournament_event_id:int):
    """
    大会イベントIDを与えると、参加した全選手のデータフレームを返す関数
    event_show_setsで試合を取得すると、なぜか取得漏れが発生するのでこの形式にしている
    """

    df_players = pd.DataFrame()
    page = 1 # 開始ページ。なぜか-1などの負の数も指定可(最後の重複除去時に結局同じ結果になる)
    
    # 全ページ巡回
    while 1:
        df_tmp = pd.DataFrame.from_dict(smash.event_show_entrants(tournament_event_id, page))
        if len(df_tmp):
            df_players = pd.concat([df_players, df_tmp])
            page += 1
        else:
            break
    
    # リストを内包しているentrantPlayersカラムから必要な情報を抜き出す
    df_players = (df_players
                  .assign(playerId = lambda x: x.entrantPlayers.swifter.apply(lambda y: y[0]["playerId"]), progress_bar=False)
                  .assign(playerTag = lambda x: x.entrantPlayers.swifter.apply(lambda y: y[0]["playerTag"]), progress_bar=False)
                  .drop(columns="entrantPlayers")
                  .drop_duplicates()
                  .assign(tournament_event_id = tournament_event_id)
                 )

    return df_players

In [9]:
def fetch_df_all_sets(tournament_event_id:int, player_tags:list):
    """
    大会イベントIDと全プレイヤーの"tag"リストを与えると、全試合(set)のデータフレームを返す関数
    """
    
    df_sets = pd.DataFrame()

    for player_tag in player_tags:
        df_sets = pd.concat([df_sets, pd.DataFrame.from_dict(smash.event_show_entrant_sets(tournament_event_id, player_tag))])

    df_sets = (df_sets
               .drop_duplicates()
               .assign(tournament_event_id = tournament_event_id)
              )
    
    return df_sets

In [10]:
# 取得したsmashggトークンを使ってクラス定義
# 2番目の変数はオートリトライの有無

smash = pysmashgg.SmashGG(smashgg_token, True)

In [20]:
# 大会情報データフレームを作成、出力する
# smashgg上の大会イベントURL読み込み(URL取得はfetch_tournament_url.ipynbを使用)と、大会イベントIDの取得

df_t = (pd.read_excel("tournament_kagaribi_8_db.xlsx")
        [["tournament_url"]]
        .assign(tournament_name = lambda x: x.tournament_url.swifter.apply(fetch_tournament_name))
        .assign(event_name = lambda x: x.tournament_url.swifter.apply(fetch_event_name))
        .assign(tournament_event_id = lambda x: x.swifter.apply(lambda x: smash.tournament_show_event_id(x.tournament_name, x.event_name), axis=1))
       )

Pandas Apply:   0%|          | 0/178 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/178 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/178 [00:00<?, ?it/s]

Error 429: Sending too many requests right now, trying again in 10 seconds
Error 429: Sending too many requests right now, trying again in 20 seconds
Error 429: Sending too many requests right now, trying again in 10 seconds
Error 429: Sending too many requests right now, trying again in 10 seconds
Error 429: Sending too many requests right now, trying again in 20 seconds


In [30]:
# 大会名を使用してsmashggから大会情報を取得

df_tmp = pd.DataFrame()
for t_name in df_t.tournament_name:
    df_tmp = pd.concat([df_tmp, pd.DataFrame(smash.tournament_show(t_name), index=[""])]) # ダミーインデックスが無いとエラー

Error 429: Sending too many requests right now, trying again in 10 seconds
Error 429: Sending too many requests right now, trying again in 20 seconds
Error 429: Sending too many requests right now, trying again in 10 seconds
Error 429: Sending too many requests right now, trying again in 10 seconds
Error 429: Sending too many requests right now, trying again in 20 seconds


In [32]:
# 大会情報データフレームを加工して横連結

df_tmp = (df_tmp
          .rename(columns={"name":"JPN_name"}) # 後々カラム名が重複するので改名
          .reset_index(drop=True)
          .assign(startday = lambda x: x.startTimestamp.swifter.apply(datetime.fromtimestamp).dt.date) # timestampを日付に修正
          .assign(endday = lambda x: x.endTimestamp.swifter.apply(datetime.fromtimestamp).dt.date)
          .drop(columns=["startTimestamp", "endTimestamp"])
         )
df_tmp.columns = ["tournament_" + x for x in df_tmp.columns.to_list()] # 大会情報のカラム名にprefixをつける

df_t = pd.concat([df_t, df_tmp], axis=1)
df_t.to_csv("df_t.csv", index=False, encoding="utf-8-sig")
df_t

Pandas Apply:   0%|          | 0/178 [00:00<?, ?it/s]

Pandas Apply:   0%|          | 0/178 [00:00<?, ?it/s]

Unnamed: 0,tournament_url,tournament_name,event_name,tournament_event_id,tournament_id,tournament_JPN_name,tournament_country,tournament_state,tournament_city,tournament_entrants,tournament_startday,tournament_endday
0,https://smash.gg/tournament/3-kagaribi-3/event/singles/overview,3-kagaribi-3,singles,558662,275805,篝火#3 / Kagaribi#3,JP,東京都,大田区,132,2021-03-27,2021-03-27
1,https://smash.gg/tournament/sp14/event/ishibura-sp14/overview,sp14,ishibura-sp14,562395,263073,いしぶら!SP14,JP,石川県,野々市市,64,2021-04-04,2021-04-04
2,https://smash.gg/tournament/winner-1/event/singles-1/overview,winner-1,singles-1,584890,294706,WINNER!,JP,東京都,豊島区,48,2021-06-21,2021-06-21
3,https://smash.gg/tournament/4-kagaribi-4-1/event/singles/overview,4-kagaribi-4-1,singles,566593,281939,篝火 #4 / Kagaribi #4,JP,東京都,大田区,420,2021-06-26,2021-06-27
4,https://smash.gg/tournament/1on1-top-5-maesumatop-5/event/singles-tournament...,1on1-top-5-maesumatop-5,singles-tournament,588686,297173,【1on1部門】マエスマTOP#5/ MaesumaTOP#5,JP,大阪府,大東市,128,2021-07-04,2021-07-04
...,...,...,...,...,...,...,...,...,...,...,...,...
173,https://www.start.gg/tournament/02-kurobraweekly-02/event/sp-1on1,02-kurobraweekly-02,sp-1on1,825399,502429,クロブラ平日大会02/KurobraWeekly#02,JP,神奈川県,横浜市,28,2022-12-08,2022-12-08
174,https://www.start.gg/tournament/03-kurobraweekly-03/event/sp-1on1,03-kurobraweekly-03,sp-1on1,828681,504302,クロブラ平日大会03/KurobraWeekly#03,JP,神奈川県,横浜市,26,2022-12-15,2022-12-15
175,https://www.start.gg/tournament/delta-1/event/singles/overview,delta-1,singles,814192,496169,DELTA #1,JP,東京都,中野区,71,2022-12-18,2022-12-18
176,https://www.start.gg/tournament/tsb-17/event/special-squad-strike-3-v-3/over...,tsb-17,special-squad-strike-3-v-3,816447,497471,TSB#17,JP,Tokyo,Nakano City,20,2022-11-20,2022-11-20


In [58]:
# 各大会イベントに参加した全プレイヤーを抽出する(｢大会+参加プレイヤー｣ごとにユニークになる点に注意)

df_players = pd.DataFrame()
pass_flg = True

for tournament_event_id in df_t.tournament_event_id:
    if tournament_event_id == 724892:
        pass_flg = False
    if pass_flg:
        continue
    else:
        df_players = pd.concat([df_players, fetch_df_all_players(tournament_event_id)])
    
df_players.to_csv("df_players.csv", index=False, encoding="utf-8-sig")
df_players

In [73]:
# 各大会イベントの参加プレイヤーの全試合を抽出し、結合して出力する
# 742002(https://www.start.gg/tournament/offline-feat/event/singles-tournament/overview)は成功しても試合を取得できない

failed_tournament_event_id = []

# 全試合取得。7hくらいかかる
df_sets = pd.DataFrame()
for tournament_event_id in df_t.tournament_event_id:
    try:
        df_sets = pd.concat([df_sets, fetch_df_all_sets(tournament_event_id, df_players[df_players.tournament_event_id == tournament_event_id].tag)])
        df_sets.to_csv("df_original_sets.csv", index=False, encoding="utf-8-sig")
    except:
        failed_tournament_event_id.append(tournament_event_id)

In [28]:
# 取得失敗分だけやり直す
# サーバーエラーが頻発するようになったらRestart Kernelからやり直す。根気よくリトライしているとそのうち成功する

df_failed_sets = pd.DataFrame()

while failed_tournament_event_id:
    tournament_event_id = failed_tournament_event_id.pop(0)
    try:
        df_failed_sets = pd.concat([df_failed_sets, fetch_df_all_sets(tournament_event_id, df_players[df_players.tournament_event_id == tournament_event_id].tag)])
        df_failed_sets.to_csv("df_failed_sets.csv", index=False, encoding="utf-8-sig")
        print("success", tournament_event_id)
    except:
        failed_tournament_event_id.append(tournament_event_id)
        print("failure", tournament_event_id)

df_failed_sets

Error 429: Sending too many requests right now, trying again in 10 seconds
Error 429: Sending too many requests right now, trying again in 20 seconds
Error 429: Sending too many requests right now, trying again in 10 seconds
Error 429: Sending too many requests right now, trying again in 20 seconds
Error 429: Sending too many requests right now, trying again in 10 seconds
Error 429: Sending too many requests right now, trying again in 20 seconds
success 742002


Unnamed: 0,tournament_event_id


In [26]:
# 成功分と取得失敗分を縦結合する

df_sets = (pd.concat([df_sets, df_failed_sets])
           .drop_duplicates()
          )
df_sets.to_csv("df_original_sets.csv", index=False, encoding="utf-8-sig")

In [27]:
# 勝者のマージ用データフレーム
df_tmp1 = df_players.copy()
df_tmp1.columns = ["winner_"+x for x in df_tmp1.columns.to_list()]
df_tmp1 = df_tmp1.rename(columns={"winner_tournament_event_id":"tournament_event_id", "winner_entrantId":"winnerId"})

# 敗者のマージ用データフレーム
df_tmp2 = df_players.copy()
df_tmp2.columns = ["loser_"+x for x in df_tmp2.columns.to_list()]
df_tmp2 = df_tmp2.rename(columns={"loser_tournament_event_id":"tournament_event_id", "loser_entrantId":"loserId"})

# 大会情報とプレイヤー情報をマージし、時系列順にソート、さらにDQを除く
# 理想的に大会結果が入力されていれば、大会開催日→大会イベントID→bracketId→idの順に昇順に並べると時系列順になる模様
# idが稀に前後しているので、個別に作成したsetRound_orderによるソートを挟む
df_sets = (df_sets
           .merge(df_tmp1, on=["tournament_event_id", "winnerId"])
           .merge(df_tmp2, on=["tournament_event_id", "loserId"])
           .merge(df_t, on="tournament_event_id")
           .merge(pd.read_excel("setRound_sort.xlsx"), on="setRound", how="left")
           .sort_values(["tournament_startday", "tournament_event_id", "bracketId", "setRound_order", "id"])
           .pipe(lambda x: x[(x.entrant1Score >= 0) & (x.entrant2Score >= 0)]) # DQ除外。DQはentrantScoreが-1となる
          )
    
df_sets.to_csv("df_sets.csv", index=False, encoding="utf-8-sig")
df_sets

Unnamed: 0,id,entrant1Id,entrant2Id,entrant1Name,entrant2Name,entrant1Score,entrant2Score,completed,winnerId,loserId,winnerName,loserName,setRound,bracketId,tournament_event_id,winner_tag,winner_finalPlacement,winner_seed,winner_playerId,winner_progress_bar,winner_playerTag,loser_tag,loser_finalPlacement,loser_seed,loser_playerId,loser_progress_bar,loser_playerTag,tournament_url,tournament_name,event_name,tournament_id,tournament_JPN_name,tournament_country,tournament_state,tournament_city,tournament_entrants,tournament_startday,tournament_endday,setRound_order
24330,36244343.0,7055308.0,7044186.0,Lir,ぐんそう,2.0,0.0,True,7055308.0,7044186.0,Lir,ぐんそう,Winners Round 1,1460283.0,558662,Lir,65,54,1486058,False,Lir,ぐんそう,97,129,570710,False,ぐんそう,https://smash.gg/tournament/3-kagaribi-3/event/singles/overview,3-kagaribi-3,singles,275805,篝火#3 / Kagaribi#3,JP,東京都,大田区,132,2021-03-27,2021-03-27,11.0
24200,36244415.0,7055248.0,7044186.0,Rom,ぐんそう,0.0,2.0,True,7044186.0,7055248.0,ぐんそう,Rom,Losers Round 1,1460283.0,558662,ぐんそう,97,129,570710,False,ぐんそう,Rom,129,81,29912,False,Rom,https://smash.gg/tournament/3-kagaribi-3/event/singles/overview,3-kagaribi-3,singles,275805,篝火#3 / Kagaribi#3,JP,東京都,大田区,132,2021-03-27,2021-03-27,12.0
24044,36244358.0,7007280.0,7055308.0,GW | zackray,Lir,2.0,0.0,True,7007280.0,7055308.0,GW | zackray,Lir,Winners Round 2,1460283.0,558662,GW | zackray,4,1,498962,False,zackray,Lir,65,54,1486058,False,Lir,https://smash.gg/tournament/3-kagaribi-3/event/singles/overview,3-kagaribi-3,singles,275805,篝火#3 / Kagaribi#3,JP,東京都,大田区,132,2021-03-27,2021-03-27,13.0
24326,36244359.0,7055285.0,7055283.0,けさん,のーまる,0.0,2.0,True,7055283.0,7055285.0,のーまる,けさん,Winners Round 2,1460283.0,558662,のーまる,65,72,2215177,False,のーまる,けさん,97,64,1398291,False,ちょけ,https://smash.gg/tournament/3-kagaribi-3/event/singles/overview,3-kagaribi-3,singles,275805,篝火#3 / Kagaribi#3,JP,東京都,大田区,132,2021-03-27,2021-03-27,13.0
24258,36244360.0,7010227.0,7055273.0,Yoshimoto | Rain,Dackie,2.0,0.0,True,7010227.0,7055273.0,Yoshimoto | Rain,Dackie,Winners Round 2,1460283.0,558662,Yoshimoto | Rain,49,1,135808,False,Rain,Dackie,65,43,1471808,False,Dackie,https://smash.gg/tournament/3-kagaribi-3/event/singles/overview,3-kagaribi-3,singles,275805,篝火#3 / Kagaribi#3,JP,東京都,大田区,132,2021-03-27,2021-03-27,13.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21764,55221097.0,11897223.0,11898399.0,いなばっち,Danger Cat,2.0,0.0,True,11897223.0,11898399.0,いなばっち,Danger Cat,Winners Semi-Final,1925126.0,820764,いなばっち,1,2,2528015,False,いなばっち,Danger Cat,2,11,40454,False,Danger Cat,https://www.start.gg/tournament/61-sp/event/sp,61-sp,sp,499859,第61回 美らブラSP 沖縄スマブラ大会 うるま市,JP,沖縄県,うるま市,19,2022-12-18,2022-12-18,36.0
21771,55221129.0,11863945.0,11898399.0,れお,Danger Cat,0.0,2.0,True,11898399.0,11863945.0,Danger Cat,れお,Losers Semi-Final,1925126.0,820764,Danger Cat,2,11,40454,False,Danger Cat,れお,4,3,2770949,False,れお,https://www.start.gg/tournament/61-sp/event/sp,61-sp,sp,499859,第61回 美らブラSP 沖縄スマブラ大会 うるま市,JP,沖縄県,うるま市,19,2022-12-18,2022-12-18,37.0
21765,55221098.0,11863973.0,11897223.0,FTJ,いなばっち,0.0,2.0,True,11897223.0,11863973.0,いなばっち,FTJ,Winners Final,1925126.0,820764,いなばっち,1,2,2528015,False,いなばっち,FTJ,3,1,2712138,False,FTJ,https://www.start.gg/tournament/61-sp/event/sp,61-sp,sp,499859,第61回 美らブラSP 沖縄スマブラ大会 うるま市,JP,沖縄県,うるま市,19,2022-12-18,2022-12-18,39.0
21766,55221130.0,11863973.0,11898399.0,FTJ,Danger Cat,0.0,2.0,True,11898399.0,11863973.0,Danger Cat,FTJ,Losers Final,1925126.0,820764,Danger Cat,2,11,40454,False,Danger Cat,FTJ,3,1,2712138,False,FTJ,https://www.start.gg/tournament/61-sp/event/sp,61-sp,sp,499859,第61回 美らブラSP 沖縄スマブラ大会 うるま市,JP,沖縄県,うるま市,19,2022-12-18,2022-12-18,40.0
