In [1]:
from pymongo import MongoClient
from pprint import pprint
import numpy as np
import pandas as pd
from scipy.stats import pearsonr
import itertools
import time
import re

In [2]:
re_ignore_upgrade = re.compile('\+\d+$')
def ignore_upgrade(cardname: str) -> str:
    return re.sub(re_ignore_upgrade, '', cardname)

In [3]:
%%time
client = MongoClient('mongodb://localhost:27017')
db = client.spire
status = db.command('serverStatus')
runs = db.runs

search = {
  # "event.character_chosen": "THE_SILENT",
  "event.character_chosen": "IRONCLAD",
  "event.ascension_level": {'$gte': 10},
  "event.floor_reached": {'$gte': 40}
}
num_runs = runs.count_documents(search)
print('num_runs', num_runs)

result = runs.find(search)

num_runs 18736
CPU times: user 12.7 ms, sys: 2.78 ms, total: 15.5 ms
Wall time: 60.8 ms


In [4]:
%%time
# count how many decks contain each card
# ignore card upgrades
all_cards = {}
all_relics = {}
deck_lists = {}
relic_lists = {}

for r in result:

    # count cards - only count a card once per deck
    unique_cards = {ignore_upgrade(card) for card in set(r['event']['master_deck'])}
    for card in unique_cards:
        if card in all_cards:
            all_cards[card] += 1
        else:
            all_cards[card] = 1

    # count relics
    for relic in r['event']['relics']:
        if relic in all_relics:
            all_relics[relic] += 1
        else:
            all_relics[relic] = 1

    deck_lists[r['event']['play_id']] = {ignore_upgrade(card) for card in set(r['event']['master_deck'])} # don't care about duplicates right now
    relic_lists[r['event']['play_id']] = set(r['event']['relics'])

# check the top cards
cards_by_freq = sorted(all_cards, key=all_cards.get, reverse=True)
print('=== top cards ===')
for card in cards_by_freq[:5]:
    print(card, all_cards[card])
print('=== total cards ===\n', len(all_cards))

# check the top relics
relics_by_freq = sorted(all_relics, key=all_relics.get, reverse=True)
print('=== top relics ===')
for relic in relics_by_freq[:5]:
    print(relic, all_relics[relic])
print('=== total relics ===\n', len(all_relics))

=== top cards ===
AscendersBane 18700
Bash 16726
Defend_R 16574
Strike_R 13250
Shrug It Off 11769
=== total cards ===
 321
=== top relics ===
Burning Blood 15328
NeowsBlessing 6371
Vajra 4723
Red Mask 4666
Red Skull 4616
=== total relics ===
 153
CPU times: user 10.5 s, sys: 267 ms, total: 10.8 s
Wall time: 16.1 s


In [5]:
%%time
# remove cards appearing in less than 1% of runs
for card in list(all_cards.keys()):
    if all_cards[card] < .01 * num_runs:
        del all_cards[card]
final_cards = set(all_cards.keys())

# remove relics appearing in less than 1% of runs
for relic in list(all_relics.keys()):
    if all_relics[relic] < .01 * num_runs:
        del all_relics[relic]
final_relics = set(all_relics.keys())

print('total cards after removing those only appearing in <1% runs:',
      len(all_cards),
      len(final_cards))

print('total relics after removing those only appearing in <1% runs:',
      len(all_relics),
      len(final_relics))

total cards after removing those only appearing in <1% runs: 124 124
total relics after removing those only appearing in <1% runs: 144 144
CPU times: user 423 µs, sys: 27 µs, total: 450 µs
Wall time: 404 µs


In [6]:
%%time
# print(deck_lists)
# all_cards
timings = {}
t1 = time.perf_counter()
df = pd.DataFrame(
    columns=list(all_cards.keys()) + list(all_relics.keys()),
    index=deck_lists.keys())
df.index.name = 'play_id'
timings['df_setup'] = time.perf_counter() - t1
t1 = time.perf_counter()

# mark the cards which appear in each run
for play_id, dl in deck_lists.items():
    item_list = dl.union(relic_lists[play_id])
    df.loc[play_id] = [(item in item_list) for item in df.columns]
timings['df_fill'] = time.perf_counter() - t1
print(timings)
df.fillna(False, inplace = True)
print(df.iloc[0:10, 0:5])

{'df_setup': 0.3116861400194466, 'df_fill': 3.172118790971581}
                                      Twin Strike  Rampage  Defend_R  \
play_id                                                                
72816ebf-bfa0-4ed6-a2ef-02351f1380ab         True     True      True   
0e293216-2222-4b7c-8754-bbc397740717        False    False      True   
c74ad0ea-71ea-4c4d-af18-e74f1594cb82        False    False      True   
acc198c8-66b9-47b9-a831-90a7be8c8701        False    False      True   
dd972e1a-1801-4456-a95a-0fc30073f4a6        False    False      True   
cef77499-ca19-45bd-8b3a-91c1d2b8b491        False    False      True   
ad1c3fd7-6e4d-4b6b-b16f-2fd82786e762        False    False      True   
0f256162-8af3-47f0-95f7-f96c7264fc74         True    False      True   
ebdda67a-b3b3-47a5-b1c8-a7b4f42b05da        False    False      True   
bc0537c6-fd80-4d2b-a547-3f228d871439        False    False      True   

                                      Thunderclap  Ghostly Armor  
play_

In [7]:
print('total items (cards + relics)')
len(df.columns.tolist())

total items (cards + relics)


268

In [8]:
df.dtypes

Twin Strike      bool
Rampage          bool
Defend_R         bool
Thunderclap      bool
Ghostly Armor    bool
                 ... 
Nloth's Gift     bool
Frozen Eye       bool
SacredBark       bool
FaceOfCleric     bool
TheAbacus        bool
Length: 268, dtype: object

In [27]:
%%time
import random
correlations = {}
items = sorted(df.columns.tolist())

for item_a, item_b in itertools.combinations(items, 2):
    correlations[item_a + '|' + item_b] = pearsonr(df.loc[:, item_a], df.loc[:, item_b])
    # correlations[frozenset([item_a, item_b])] = pearsonr(df.loc[:, item_a], df.loc[:, item_b])

result = pd.DataFrame.from_dict(correlations, orient = 'index')
result.columns = ['PCC', 'p-value']
print('item correlations')
print(result.sort_values(by='PCC', ascending=True).head(50))

item correlations
                                    PCC        p-value
Defend_R|Pandora's Box        -0.826229   0.000000e+00
Black Blood|Burning Blood     -0.622910   0.000000e+00
Pandora's Box|Strike_R        -0.463762   0.000000e+00
Bite|Strike_R                 -0.325638   0.000000e+00
Astrolabe|Strike_R            -0.207913  4.793184e-182
Body Slam|Heavy Blade         -0.174991  1.007640e-128
Second Wind|Strike_R          -0.171888  3.359292e-124
Dark Embrace|Strike_R         -0.165662  2.201986e-115
Body Slam|Limit Break         -0.158794  4.690873e-106
Feel No Pain|Strike_R         -0.155719  5.169102e-102
Bash|Body Slam                -0.153925   1.079532e-99
Feel No Pain|Flex             -0.151417   1.697925e-96
Body Slam|Inflame             -0.142540   1.276217e-85
Barricade|Limit Break         -0.140864   1.212794e-83
Barricade|Heavy Blade         -0.140697   1.900858e-83
Flex|Snecko Eye               -0.140653   2.139787e-83
Barricade|Flex                -0.139838   1.910

In [30]:
# look up correlation of 2 cards
# result['PCC'][frozenset(['Body Slam', 'Barricade'])]
result['PCC']['Barricade|Body Slam']

0.3560905871598513

In [31]:
# result.to_pickle('./correlations.pkl')

In [38]:
corr_only = result.copy()
corr_only.drop(columns=['p-value'], inplace=True)
corr_only = corr_only.round(decimals=8)
print(corr_only['PCC']['Barricade|Body Slam'])
corr_only.to_json('correlations.json')
print('done')

0.35609059
done


In [33]:
corr_only.columns

Index(['PCC'], dtype='object')

In [42]:
import json
with open('item_names.json', 'w') as f:
    json.dump(items, f)