In [1]:
from pymongo import MongoClient
from pprint import pprint
import numpy as np
import pandas as pd
from scipy.stats import pearsonr
import itertools
import time
import re

In [2]:
re_ignore_upgrade = re.compile('\+\d+$')
def ignore_upgrade(cardname: str) -> str:
    return re.sub(re_ignore_upgrade, '', cardname)

In [7]:
client = MongoClient('mongodb://localhost:27017')
db = client.spire
status = db.command('serverStatus')
runs = db.runs

search = {
  # "event.character_chosen": "THE_SILENT",
  "event.character_chosen": "IRONCLAD",
  "event.ascension_level": {'$gte': 5},
  "event.floor_reached": {'$gte': 18}
}
num_runs = runs.count_documents(search)
print('num_runs', num_runs)

result = runs.find(search)

# count how many decks contain each card
# ignore card upgrades
all_cards = {}
all_relics = {} # cards + relics
deck_lists = {}
relic_lists = {} # cards + relics
for r in result:
    # only count a card once per deck
    unique_cards = {ignore_upgrade(card) for card in set(r['event']['master_deck'])}
    for card in unique_cards:
        if card in all_cards:
            all_cards[card] += 1
        else:
            all_cards[card] = 1
    # count relics
    for relic in r['event']['relics']:
        if relic in all_relics:
            all_relics[relic] += 1
        else:
            all_relics[relic] = 1
    deck_lists[r['event']['play_id']] = {ignore_upgrade(card) for card in set(r['event']['master_deck'])} # don't care about duplicates right now
    relic_lists[r['event']['play_id']] = set(r['event']['relics'])

# check the top cards
cards_by_freq = sorted(all_cards, key=all_cards.get, reverse=True)
print('=== top cards ===')
for card in cards_by_freq[:5]:
    print(card, all_cards[card])
print('=== total cards ===', len(all_cards))

# check the top relics
relics_by_freq = sorted(all_relics, key=all_relics.get, reverse=True)
print('=== top relics ===')
for relic in relics_by_freq[:5]:
    print(relic, all_relics[relic])
print('=== total relics ===', len(all_relics))

# remove cards appearing in less than 1% of runs
for card in list(all_cards.keys()):
    if all_cards[card] < .01 * num_runs:
        del all_cards[card]
final_cards = set(all_cards.keys())

# remove relics appearing in less than 1% of runs
for relic in list(all_relics.keys()):
    if all_relics[relic] < .01 * num_runs:
        del all_relics[relic]
final_relics = set(all_relics.keys())

print('total cards after removing those only appearing in <1% runs:',
      len(all_cards),
      len(final_cards))

print('total relics after removing those only appearing in <1% runs:',
      len(all_relics),
      len(final_relics))

num_runs 11531
=== top cards ===
Bash 10878
Defend_R 10742
Strike_R 9795
AscendersBane 7921
Shrug It Off 5644
=== total cards === 282
=== top relics ===
Burning Blood 9739
NeowsBlessing 4095
Golden Idol 1795
Red Skull 1761
Vajra 1716
=== total relics === 152
total cards after removing those only appearing in <1% runs: 105 105
total relics after removing those only appearing in <1% runs: 143 143


In [20]:
# print(deck_lists)
# all_cards
timings = {}
t1 = time.perf_counter()
df = pd.DataFrame(
    columns=list(all_cards.keys()) + list(all_relics.keys()),
    index=deck_lists.keys())
timings['df_setup'] = time.perf_counter() - t1
t1 = time.perf_counter()

# mark the cards which appear in each run
for play_id, dl in deck_lists.items():
    item_list = dl.union(relic_lists[play_id])
    df.loc[play_id] = [(item in item_list) for item in df.columns]
timings['df_fill'] = time.perf_counter() - t1
print(timings)
df.fillna(False, inplace = True)
print(df)

{'df_setup': 0.07961953699123114, 'df_fill': 1.5937955179979326}
                                      Bash  Berserk  Rampage   Feed  \
be781d3b-6cde-47fc-b900-97f35e064001  True     True     True   True   
349582b6-8695-499d-9657-1254b033e67c  True    False    False   True   
e04e2e20-73c4-41fb-84fc-71de9bf6ec08  True    False    False  False   
a099980f-9728-4369-b15b-01b9bb718389  True    False    False  False   
734202ac-d6ce-4379-a3f6-69082c8881c5  True    False    False  False   
...                                    ...      ...      ...    ...   
68269c04-9f3d-455e-99b7-b00957bc8ff3  True    False    False  False   
21f466f9-8d1b-4a71-a842-17f1353848fd  True    False     True  False   
302140e5-0a08-415c-bfd1-0671d4fe8258  True    False    False  False   
8f4e7661-32fd-4d72-89b8-9ec8d0d16727  True    False    False   True   
17ad76d8-8ecd-4862-8a01-9a1d735bc0a7  True    False    False  False   

                                      Body Slam  Defend_R  Demon Form  \
be781d3b-

In [21]:
len(df.columns.tolist())

248

In [22]:
df.dtypes

Bash             bool
Berserk          bool
Rampage          bool
Feed             bool
Body Slam        bool
                 ... 
Nilry's Codex    bool
Strange Spoon    bool
Bloody Idol      bool
Du-Vu Doll       bool
Chemical X       bool
Length: 248, dtype: object

In [25]:
correlations = {}
items = df.columns.tolist()
print(items)
for item_a, item_b in itertools.combinations(items, 2):
    correlations[item_a + '__' + item_b] = pearsonr(df.loc[:, item_a], df.loc[:, item_b])

result = pd.DataFrame.from_dict(correlations, orient = 'index')
result.columns = ['PCC', 'p-value']
print(result.sort_values(by='PCC', ascending=False).head(50))

['Bash', 'Berserk', 'Rampage', 'Feed', 'Body Slam', 'Defend_R', 'Demon Form', 'Apotheosis', 'Mayhem', 'Heavy Blade', 'Strike_R', 'Disarm', 'Dropkick', 'Ghostly Armor', 'Impervious', 'Madness', 'Shockwave', 'Double Tap', 'AscendersBane', 'Barricade', 'Clash', 'Whirlwind', 'Inflame', 'Flex', 'Shrug It Off', 'Rage', 'Seeing Red', 'Infernal Blade', 'Armaments', 'Sword Boomerang', 'Reaper', 'Wild Strike', 'Bloodletting', 'Perfected Strike', 'True Grit', 'Sever Soul', 'Iron Wave', 'Immolate', 'Clothesline', 'Uppercut', 'Second Wind', 'Cleave', 'Thunderclap', 'CurseOfTheBell', 'Corruption', 'Battle Trance', 'Twin Strike', 'Headbutt', 'Dual Wield', 'Blood for Blood', 'Offering', 'Rupture', 'Ghostly', 'Limit Break', 'Sentinel', 'Carnage', 'Fiend Fire', 'Havoc', 'Dark Embrace', 'Pummel', 'Metallicize', 'Pommel Strike', 'Reckless Charge', 'Burning Pact', 'Warcry', 'Flame Barrier', 'RitualDagger', 'Searing Blow', 'Power Through', 'Bite', 'Feel No Pain', 'HandOfGreed', 'Entrench', 'Fire Breathing',