In [1]:
from pymongo import MongoClient
from pprint import pprint
import numpy as np
import pandas as pd
from scipy.stats import pearsonr
import itertools
import time
import re

In [2]:
re_ignore_upgrade = re.compile('\+\d+$')
def ignore_upgrade(cardname: str) -> str:
    return re.sub(re_ignore_upgrade, '', cardname)

In [42]:
client = MongoClient('mongodb://localhost:27017')
db = client.spire
status = db.command('serverStatus')
runs = db.runs

search = {
  # "event.character_chosen": "THE_SILENT",
  "event.character_chosen": "IRONCLAD",
  "event.ascension_level": {'$gte': 5},
  "event.floor_reached": {'$gte': 18}
}
num_runs = runs.count_documents(search)
print('num_runs', num_runs)

result = runs.find(search)

# count how many decks contain each card
# ignore card upgrades
all_cards = {}
deck_lists = {}
for r in result:
    # only count a card once per deck
    unique_cards = {ignore_upgrade(card) for card in set(r['event']['master_deck'])}
    for card in unique_cards:
        if card in all_cards:
            all_cards[card] += 1
        else:
            all_cards[card] = 1
    deck_lists[r['event']['play_id']] = {ignore_upgrade(card) for card in set(r['event']['master_deck'])} # don't care about duplicates right now

# check the top cards
cards_by_freq = sorted(all_cards, key=all_cards.get, reverse=True)
print('top cards')
for card in cards_by_freq[:5]:
    print(card, all_cards[card])
print('total cards', len(all_cards))

# remove cards appearing in less than 1% of runs
for card in list(all_cards.keys()):
    if all_cards[card] < .01 * num_runs:
        del all_cards[card]
final_cards = set(all_cards.keys())
print('total cards after removing those only appearing in <1% runs:',
      len(all_cards),
      len(final_cards))

num_runs 11531
top cards
Bash 10878
Defend_R 10742
Strike_R 9795
AscendersBane 7921
Shrug It Off 5644
total cards 282
total cards after removing those only appearing in <1% runs: 105 105


In [47]:
# print(deck_lists)
# all_cards
timings = {}
t1 = time.perf_counter()
df = pd.DataFrame(columns=all_cards, index=deck_lists.keys())
timings['df_setup'] = time.perf_counter() - t1
t1 = time.perf_counter()

# mark the cards which appear in each run
for play_id, dl in deck_lists.items():
    df.loc[play_id] = [(card in dl) for card in df.columns]
timings['df_fill'] = time.perf_counter() - t1
print(timings)
df.fillna(False, inplace = True)
print(df)

{'df_setup': 0.04314131000000998, 'df_fill': 1.3933389979997628}
                                      Rampage  Impervious  Defend_R  \
be781d3b-6cde-47fc-b900-97f35e064001     True        True      True   
349582b6-8695-499d-9657-1254b033e67c    False       False      True   
e04e2e20-73c4-41fb-84fc-71de9bf6ec08    False       False      True   
a099980f-9728-4369-b15b-01b9bb718389    False       False      True   
734202ac-d6ce-4379-a3f6-69082c8881c5    False       False      True   
...                                       ...         ...       ...   
68269c04-9f3d-455e-99b7-b00957bc8ff3    False       False      True   
21f466f9-8d1b-4a71-a842-17f1353848fd     True       False      True   
302140e5-0a08-415c-bfd1-0671d4fe8258    False       False      True   
8f4e7661-32fd-4d72-89b8-9ec8d0d16727    False       False      True   
17ad76d8-8ecd-4862-8a01-9a1d735bc0a7    False        True      True   

                                      AscendersBane  Strike_R  Mayhem  \
be781d3b-

In [48]:
len(df.columns.tolist())

105

In [49]:
df.dtypes

Rampage          bool
Impervious       bool
Defend_R         bool
AscendersBane    bool
Strike_R         bool
                 ... 
Doubt            bool
Finesse          bool
J.A.X.           bool
Bandage Up       bool
Blind            bool
Length: 105, dtype: object

In [50]:
correlations = {}
cards = df.columns.tolist()
print(cards)
for card_a, card_b in itertools.combinations(cards, 2):
    correlations[card_a + '__' + card_b] = pearsonr(df.loc[:, card_a], df.loc[:, card_b])

result = pd.DataFrame.from_dict(correlations, orient = 'index')
result.columns = ['PCC', 'p-value']
print(result.sort_values(by='PCC', ascending=False).head(30))

['Rampage', 'Impervious', 'Defend_R', 'AscendersBane', 'Strike_R', 'Mayhem', 'Body Slam', 'Bash', 'Double Tap', 'Ghostly Armor', 'Demon Form', 'Madness', 'Berserk', 'Heavy Blade', 'Apotheosis', 'Disarm', 'Shockwave', 'Feed', 'Dropkick', 'Barricade', 'Shrug It Off', 'Whirlwind', 'Clash', 'Inflame', 'Flex', 'Seeing Red', 'Armaments', 'Infernal Blade', 'Rage', 'Reaper', 'Wild Strike', 'Perfected Strike', 'Bloodletting', 'Sword Boomerang', 'Cleave', 'CurseOfTheBell', 'Uppercut', 'Iron Wave', 'True Grit', 'Second Wind', 'Clothesline', 'Corruption', 'Sever Soul', 'Immolate', 'Thunderclap', 'Battle Trance', 'Twin Strike', 'Ghostly', 'Headbutt', 'Rupture', 'Offering', 'Sentinel', 'Limit Break', 'Dual Wield', 'Carnage', 'Blood for Blood', 'Metallicize', 'Pummel', 'Fiend Fire', 'Dark Embrace', 'Havoc', 'Pommel Strike', 'Warcry', 'Reckless Charge', 'Flame Barrier', 'Burning Pact', 'RitualDagger', 'Searing Blow', 'Power Through', 'Bite', 'Feel No Pain', 'HandOfGreed', 'Entrench', 'Necronomicurse',