In [30]:
from pymongo import MongoClient
from pprint import pprint
import numpy as np
import pandas as pd
from scipy.stats import pearsonr
import itertools
import time
import re

In [31]:
re_ignore_upgrade = re.compile('\+\d+$')
def ignore_upgrade(cardname: str) -> str:
    return re.sub(re_ignore_upgrade, '', cardname)

In [33]:
client = MongoClient('mongodb://localhost:27017')
db = client.spire
status = db.command('serverStatus')
runs = db.runs

search = {
  "event.character_chosen": "IRONCLAD",
  "event.ascension_level": {'$gte': 5},
  "event.floor_reached": {'$gte': 18}
}
num_runs = runs.count_documents(search)
print('num_runs', num_runs)

result = runs.find(search)

# count how many decks contain each card
all_cards = {}
deck_lists = {}
for r in result:
    # only count a card once per deck
    unique_cards = {ignore_upgrade(card) for card in set(r['event']['master_deck'])}
    for card in unique_cards:
        if card in all_cards:
            all_cards[card] += 1
        else:
            all_cards[card] = 1
    deck_lists[r['event']['play_id']] = {ignore_upgrade(card) for card in set(r['event']['master_deck'])} # don't care about duplicates right now

cards_by_freq = sorted(all_cards, key=all_cards.get, reverse=True)
print('top cards')
for card in cards_by_freq[:5]:
    print(card, all_cards[card])
print('total cards', len(all_cards))
for card in list(all_cards.keys()):
    if all_cards[card] < .01 * num_runs:
        del all_cards[card]
final_cards = set(all_cards.keys())
print('total cards after removing those only appearing in <1% runs:',
      len(all_cards),
      len(final_cards))

num_runs 1317
top cards
Bash 1252
Defend_R 1220
Strike_R 1114
AscendersBane 882
Shrug It Off 664
total cards 150
total cards after removing those only appearing in <1% runs: 107 107


In [34]:
# print(deck_lists)
# all_cards
timings = {}
t1 = time.perf_counter()
df = pd.DataFrame(columns=all_cards, index=deck_lists.keys())
timings['df_setup'] = time.perf_counter() - t1
t1 = time.perf_counter()

for play_id, dl in deck_lists.items():
    for card in dl.intersection(final_cards):
        df.loc[play_id][card] = True
timings['df_fill'] = time.perf_counter() - t1
print(timings)
df.fillna(False, inplace = True)
print(df)

{'df_setup': 0.09749611699953675, 'df_fill': 1.3780862389830872}
                                      Heavy Blade  AscendersBane  Body Slam  \
be781d3b-6cde-47fc-b900-97f35e064001         True           True       True   
349582b6-8695-499d-9657-1254b033e67c         True          False       True   
e04e2e20-73c4-41fb-84fc-71de9bf6ec08         True           True      False   
a099980f-9728-4369-b15b-01b9bb718389        False           True      False   
734202ac-d6ce-4379-a3f6-69082c8881c5        False           True      False   
...                                           ...            ...        ...   
4f820fd2-2902-477f-9df5-fd7b5208d8f7        False           True      False   
211231a2-9280-4758-a5d9-11b3421fcef7         True          False      False   
075f7766-4fd1-447e-894d-3b95dd8da86b         True           True      False   
aac5afef-1f87-48b6-a9c0-f9f9f649e228         True          False      False   
ca186d9f-c0ac-460d-84c7-7812f520115a         True           True  

In [35]:
len(df.columns.tolist())

107

In [36]:
correlations = {}
cards = df.columns.tolist()
print(cards)
for card_a, card_b in itertools.combinations(cards, 2):
    correlations[card_a + '__' + card_b] = pearsonr(df.loc[:, card_a], df.loc[:, card_b])

result = pd.DataFrame.from_dict(correlations, orient = 'index')
result.columns = ['PCC', 'p-value']
print(result.sort_values(by='PCC', ascending=False).head(30))

['Heavy Blade', 'AscendersBane', 'Body Slam', 'Berserk', 'Dropkick', 'Defend_R', 'Feed', 'Double Tap', 'Bash', 'Ghostly Armor', 'Mayhem', 'Madness', 'Strike_R', 'Disarm', 'Rampage', 'Apotheosis', 'Impervious', 'Shockwave', 'Demon Form', 'Whirlwind', 'Inflame', 'Barricade', 'Clash', 'Flex', 'Shrug It Off', 'Armaments', 'Seeing Red', 'Infernal Blade', 'Rage', 'Bloodletting', 'Reaper', 'Perfected Strike', 'Sword Boomerang', 'Wild Strike', 'Iron Wave', 'Sever Soul', 'Uppercut', 'Clothesline', 'Immolate', 'CurseOfTheBell', 'Cleave', 'True Grit', 'Thunderclap', 'Corruption', 'Second Wind', 'Twin Strike', 'Battle Trance', 'Dual Wield', 'Headbutt', 'Rupture', 'Blood for Blood', 'Offering', 'Ghostly', 'Limit Break', 'Carnage', 'Sentinel', 'Pummel', 'Dark Embrace', 'Havoc', 'Metallicize', 'Fiend Fire', 'Pommel Strike', 'Burning Pact', 'Flame Barrier', 'Warcry', 'Reckless Charge', 'RitualDagger', 'Searing Blow', 'Power Through', 'Bite', 'HandOfGreed', 'Feel No Pain', 'Entrench', 'Necronomicurse',