In [1]:
from pymongo import MongoClient
from pprint import pprint
import numpy as np
import pandas as pd
from scipy.stats import pearsonr
import itertools
import time
import re

In [2]:
re_ignore_upgrade = re.compile('\+\d+$')
def ignore_upgrade(cardname: str) -> str:
    return re.sub(re_ignore_upgrade, '', cardname)

In [34]:
client = MongoClient('mongodb://localhost:27017')
db = client.spire
status = db.command('serverStatus')
runs = db.runs

search = {
  # "event.character_chosen": "THE_SILENT",
  "event.character_chosen": "DEFECT",
  "event.ascension_level": {'$gte': 5},
  "event.floor_reached": {'$gte': 18}
}
num_runs = runs.count_documents(search)
print('num_runs', num_runs)

result = runs.find(search)

# count how many decks contain each card
all_cards = {}
deck_lists = {}
for r in result:
    # only count a card once per deck
    unique_cards = {ignore_upgrade(card) for card in set(r['event']['master_deck'])}
    for card in unique_cards:
        if card in all_cards:
            all_cards[card] += 1
        else:
            all_cards[card] = 1
    deck_lists[r['event']['play_id']] = {ignore_upgrade(card) for card in set(r['event']['master_deck'])} # don't care about duplicates right now

cards_by_freq = sorted(all_cards, key=all_cards.get, reverse=True)
print('top cards')
for card in cards_by_freq[:5]:
    print(card, all_cards[card])
print('total cards', len(all_cards))
for card in list(all_cards.keys()):
    if all_cards[card] < .01 * num_runs:
        del all_cards[card]
final_cards = set(all_cards.keys())
print('total cards after removing those only appearing in <1% runs:',
      len(all_cards),
      len(final_cards))

num_runs 8577
top cards
Dualcast 8342
Zap 8272
Defend_B 7959
Strike_B 6646
AscendersBane 5653
total cards 261
total cards after removing those only appearing in <1% runs: 102 102


In [35]:
# print(deck_lists)
# all_cards
timings = {}
t1 = time.perf_counter()
df = pd.DataFrame(columns=all_cards, index=deck_lists.keys())
timings['df_setup'] = time.perf_counter() - t1
t1 = time.perf_counter()

for play_id, dl in deck_lists.items():
    df.loc[play_id] = [(card in dl) for card in df.columns]
timings['df_fill'] = time.perf_counter() - t1
print(timings)
df.fillna(False, inplace = True)
print(df)

{'df_setup': 0.05119219499965766, 'df_fill': 1.097767105999992}
                                      Sunder   Zap  Chaos  Doubt  \
cd37c1b7-f07c-44b2-9d83-f07914b71cc7    True  True   True   True   
b6cd6a2a-dc38-482c-917b-049545e51982   False  True  False  False   
fabb1dbd-f1b7-4faa-b737-0a3430451e23   False  True   True  False   
5be1d399-16be-46d0-bc32-5644a2f010e2    True  True  False  False   
6b793c68-b5ba-4faf-beed-22f6ee173681   False  True  False  False   
...                                      ...   ...    ...    ...   
e0c8b436-0ac7-4c92-969c-d8b729eb9207   False  True  False  False   
63d1711b-3d50-446c-b23a-9b62a72d7136   False  True  False  False   
8eb2e3c6-e370-4862-aa95-04ee836147d2   False  True  False  False   
1b6d8ecf-c6ba-4c69-b144-210b84fe7c86   False  True  False  False   
55937f5f-e43d-4154-8f57-4be9a8d5604e   False  True   True  False   

                                      AscendersBane  Electrodynamics  \
cd37c1b7-f07c-44b2-9d83-f07914b71cc7           

In [36]:
len(df.columns.tolist())

102

In [37]:
df.dtypes

Sunder            bool
Zap               bool
Chaos             bool
Doubt             bool
AscendersBane     bool
                  ... 
Flash of Steel    bool
Discovery         bool
Mayhem            bool
Parasite          bool
Panache           bool
Length: 102, dtype: object

In [38]:
correlations = {}
cards = df.columns.tolist()
print(cards)
for card_a, card_b in itertools.combinations(cards, 2):
    correlations[card_a + '__' + card_b] = pearsonr(df.loc[:, card_a], df.loc[:, card_b])

result = pd.DataFrame.from_dict(correlations, orient = 'index')
result.columns = ['PCC', 'p-value']
print(result.sort_values(by='PCC', ascending=False).head(30))

['Sunder', 'Zap', 'Chaos', 'Doubt', 'AscendersBane', 'Electrodynamics', 'Coolheaded', 'Blizzard', 'Loop', 'Glacier', 'BootSequence', 'Genetic Algorithm', 'Dualcast', 'Defend_B', 'Turbo', 'Ball Lightning', 'Doom and Gloom', 'Self Repair', 'Steam', 'Conserve Battery', 'Strike_B', 'Streamline', 'Double Energy', 'HandOfGreed', 'Melter', 'Rebound', 'Hologram', 'Buffer', 'Thunder Strike', 'Meteor Strike', 'Capacitor', 'Storm', 'Static Discharge', 'Creative AI', 'Apotheosis', 'Go for the Eyes', 'Barrage', 'Redo', 'Sweeping Beam', 'All For One', 'Cold Snap', 'Steam Power', 'Chill', 'Tempest', 'Force Field', 'Heatsinks', 'White Noise', 'Biased Cognition', 'Echo Form', 'Machine Learning', 'Skim', 'Compile Driver', 'Consume', 'Gash', 'FTL', 'CurseOfTheBell', 'Beam Cell', 'Core Surge', 'Reinforced Body', 'Reboot', 'Leap', 'Shame', 'Hyperbeam', 'Scrape', 'Multi-Cast', 'Recycle', 'Defragment', 'Fission', 'Darkness', 'Lockon', 'Dark Shackles', 'Auto Shields', 'Bite', 'Seek', 'Stack', 'Rainbow', 'Ampl