In [87]:
from pymongo import MongoClient
from pprint import pprint
import numpy as np
import pandas as pd
from scipy.stats import pearsonr
import itertools
import time
import re

In [88]:
re_ignore_upgrade = re.compile('\+\d+$')
def ignore_upgrade(cardname: str) -> str:
    return re.sub(re_ignore_upgrade, '', cardname)

In [89]:
%%time
client = MongoClient('mongodb://localhost:27017')
db = client.spire
status = db.command('serverStatus')
runs = db.runs

search = {
  # "event.character_chosen": "THE_SILENT",
  "event.character_chosen": "IRONCLAD",
  "event.ascension_level": {'$gte': 5},
  "event.floor_reached": {'$gte': 30}
}
num_runs = runs.count_documents(search)
print('num_runs', num_runs)

result = runs.find(search)

num_runs 6396
CPU times: user 9.1 ms, sys: 0 ns, total: 9.1 ms
Wall time: 1.61 s


In [90]:
%%time
# count how many decks contain each card
# ignore card upgrades
all_cards = {}
all_relics = {}
deck_lists = {}
relic_lists = {}

for r in result:

    # count cards - only count a card once per deck
    unique_cards = {ignore_upgrade(card) for card in set(r['event']['master_deck'])}
    for card in unique_cards:
        if card in all_cards:
            all_cards[card] += 1
        else:
            all_cards[card] = 1

    # count relics
    for relic in r['event']['relics']:
        if relic in all_relics:
            all_relics[relic] += 1
        else:
            all_relics[relic] = 1

    deck_lists[r['event']['play_id']] = {ignore_upgrade(card) for card in set(r['event']['master_deck'])} # don't care about duplicates right now
    relic_lists[r['event']['play_id']] = set(r['event']['relics'])

# check the top cards
cards_by_freq = sorted(all_cards, key=all_cards.get, reverse=True)
print('=== top cards ===')
for card in cards_by_freq[:5]:
    print(card, all_cards[card])
print('=== total cards ===\n', len(all_cards))

# check the top relics
relics_by_freq = sorted(all_relics, key=all_relics.get, reverse=True)
print('=== top relics ===')
for relic in relics_by_freq[:5]:
    print(relic, all_relics[relic])
print('=== total relics ===\n', len(all_relics))

=== top cards ===
Bash 5917
Defend_R 5849
Strike_R 5117
AscendersBane 4227
Shrug It Off 3588
=== total cards ===
 261
=== top relics ===
Burning Blood 5341
NeowsBlessing 2160
Red Skull 1346
Vajra 1308
Bronze Scales 1244
=== total relics ===
 152
CPU times: user 2.78 s, sys: 18.7 ms, total: 2.8 s
Wall time: 4.2 s


In [91]:
%%time
# remove cards appearing in less than 1% of runs
for card in list(all_cards.keys()):
    if all_cards[card] < .01 * num_runs:
        del all_cards[card]
final_cards = set(all_cards.keys())

# remove relics appearing in less than 1% of runs
for relic in list(all_relics.keys()):
    if all_relics[relic] < .01 * num_runs:
        del all_relics[relic]
final_relics = set(all_relics.keys())

print('total cards after removing those only appearing in <1% runs:',
      len(all_cards),
      len(final_cards))

print('total relics after removing those only appearing in <1% runs:',
      len(all_relics),
      len(final_relics))

total cards after removing those only appearing in <1% runs: 112 112
total relics after removing those only appearing in <1% runs: 144 144
CPU times: user 945 µs, sys: 1 µs, total: 946 µs
Wall time: 2.38 ms


In [92]:
%%time
# print(deck_lists)
# all_cards
timings = {}
t1 = time.perf_counter()
df = pd.DataFrame(
    columns=list(all_cards.keys()) + list(all_relics.keys()),
    index=deck_lists.keys())
df.index.name = 'play_id'
timings['df_setup'] = time.perf_counter() - t1
t1 = time.perf_counter()

# mark the cards which appear in each run
for play_id, dl in deck_lists.items():
    item_list = dl.union(relic_lists[play_id])
    df.loc[play_id] = [(item in item_list) for item in df.columns]
timings['df_fill'] = time.perf_counter() - t1
print(timings)
df.fillna(False, inplace = True)
print(df.iloc[0:10, 0:5])

{'df_setup': 0.05605231199297123, 'df_fill': 0.9865181990026031}
                                      Bash  Berserk  Rampage   Feed  Body Slam
play_id                                                                       
be781d3b-6cde-47fc-b900-97f35e064001  True     True     True   True       True
e04e2e20-73c4-41fb-84fc-71de9bf6ec08  True    False    False  False      False
a099980f-9728-4369-b15b-01b9bb718389  True    False    False  False      False
734202ac-d6ce-4379-a3f6-69082c8881c5  True    False    False  False      False
efb1fa17-52cb-4b53-ba1e-3789dc9572fb  True    False    False  False      False
304e3642-ab5f-480e-aabc-3e8bcb1c1fb8  True    False    False  False      False
0ea75254-5b03-46db-bfd7-65a094f324b0  True    False    False  False       True
3efcdf1f-6c6a-4ac0-9d00-201302c6077b  True    False     True  False      False
806307da-5cec-444a-8b44-4e207b6739c0  True     True    False  False      False
f13ea280-30f0-40c3-a935-ac178401e56a  True    False    False  Fals

In [93]:
print('total items (cards + relics)')
len(df.columns.tolist())

total items (cards + relics)


256

In [94]:
df.dtypes

Bash             bool
Berserk          bool
Rampage          bool
Feed             bool
Body Slam        bool
                 ... 
SsserpentHead    bool
SacredBark       bool
Nilry's Codex    bool
FaceOfCleric     bool
Chemical X       bool
Length: 256, dtype: object

In [95]:
%%time
import random
correlations = {}
items = df.columns.tolist()
random_items = items.copy()
random.shuffle(random_items)
print('5 random items')
print(random_items[:5])
for item_a, item_b in itertools.combinations(items, 2):
    correlations[item_a + '__' + item_b] = pearsonr(df.loc[:, item_a], df.loc[:, item_b])

result = pd.DataFrame.from_dict(correlations, orient = 'index')
result.columns = ['PCC', 'p-value']
print(result.sort_values(by='PCC', ascending=False).head(50))

5 random items
['Tiny House', 'Lizard Tail', 'Calipers', 'Defend_R', 'Tiny Chest']
                                      PCC        p-value
Necronomicurse__Necronomicon     0.961105   0.000000e+00
CurseOfTheBell__Calling Bell     0.930321   0.000000e+00
Defend_R__Strike_R               0.438360  1.194599e-298
Body Slam__Entrench              0.320423  1.202002e-152
Body Slam__Barricade             0.314930  2.998137e-147
Pain__WarpedTongs                0.308234  7.996688e-141
Dark Embrace__Feel No Pain       0.292676  1.564289e-126
Rupture__Combust                 0.280495  5.948898e-116
Barricade__Entrench              0.279456  4.495111e-115
Corruption__Dark Embrace         0.273209  7.119039e-110
Clumsy__Cursed Key               0.267918  1.413133e-105
Perfected Strike__Twin Strike    0.256380   1.543130e-96
Whirlwind__Chemical X            0.239858   2.230633e-84
Power Through__Fire Breathing    0.234567   1.125872e-80
Second Wind__Power Through       0.234246   1.874238e-80
Wild 