In [77]:
import pandas as pd
import numpy as np
import seaborn as sns
import sklearn as sk
import tensorflow as tf
from collections import Counter
import json

In [92]:
with open("./common_types.txt", "rb") as f:
       common_types = pickle.load(f)

cols_include_all = ['colorIdentity', 'colors', 'firstPrinting', 'keywords', 'manaCost', 'manaValue',
                     'subtypes', 'supertypes', 'text', 'type', 'types', 'power', 'toughness',  'colorIndicator',
                     'asciiName', 'hasAlternativeDeckLimit']
cols_include_noncreature = ['loyalty',]
cols_edhrec = ["edhrecRank", "edhrecSaltiness"]
cols_legality = ['legalities.commander', 'legalities.duel', 'legalities.explorer',
       'legalities.historic', 'legalities.historicbrawl', 'legalities.legacy',
       'legalities.modern', 'legalities.oathbreaker', 'legalities.pauper',
       'legalities.paupercommander', 'legalities.penny', 'legalities.pioneer',
       'legalities.vintage', 'legalities.gladiator','legalities.alchemy',
       'legalities.brawl', 'legalities.future', 'legalities.standard', 'legalities.predh',
       'legalities.premodern', 'legalities.oldschool',]
cols_leadership = ['leadershipSkills.brawl',
       'leadershipSkills.commander', 'leadershipSkills.oathbreaker',]



In [65]:
def load_atomic(filename: str) -> pd.DataFrame:
    """Load from the Atomic standard files into a dataframe resembling the old data standard."""
    with open("../data/mtg/"+filename+".json") as f:
        json_data = json.load(f)  # Load from file
    json_data = json_data["data"]
    cards = [x[0] for x in json_data.values() if len(x) == 1] # Pull only cards with 1 face (no transform, fuse, split, flip cards, sorry Delver)
    df = pd.json_normalize(cards)
    return df

def prep_df(df: pd.DataFrame, monocolor: bool, creatures: bool, modern: bool) -> pd.DataFrame:
    """
    Preprocesses card DF
    @param df: Input DataFrame
    @param monocolor: If true, return only cards with 1 or less color
    @param creatures: If true, return only creatures
    @param modern: If true, filter by modern legality (excludes Uro, :'( )
    """
    df.set_index("asciiName")
    df["num_colors"] = df["colors"].map(len)
    if creatures:
        df = df.loc[df['type'].str.contains('Creature')]
    if monocolor:
        df = df.loc[df['num_colors'] <= 1]
    if modern:
        df = df.loc[df["legalities.modern"] == "Legal"]
    df["f_cmc"] = (df["manaValue"] / 7.5) - 1  # [0,15] -> [-1, 1]
    df['f_pow'] = df['power'].replace({"1+*": 1, "*": "0", "*+1": 1}) # Assume all *'s are 0 (as per the rules)
    df['f_pow'] = ((df['f_pow'].astype(int) + 1) / 9) - 1 # [-1,16] -> [-1, 1]
    df['f_tough'] = df['toughness'].replace({"1+*": 1, "*": "0", "*+1": 1}) # Assume all *'s are 0 (as per the rules)
    df['f_tough'] = ((df['f_tough'].astype(int) + 1) / 9) - 1 # [-1,16] -> [-1, 1]
    df = df[cols_include_all]
    return df

df = prep_df(load_atomic("ModernAtomic"), monocolor=False, creatures=True, modern=True)




In [89]:
def make_types_list(df: pd.DataFrame) -> None:
    """From a complete dataset, write a list of the 200 most common creature types to a file called common_types.txt"""
    all_types = []
    df["subtypes"].apply(all_types.extend)
    all_types = Counter(all_types)
    common_types = [x.lower() for x,y in all_types.most_common(200)]
    with open("./common_types.txt", "wb") as f:
        pickle.dump(common_types, f)

make_types_list(df)


TypeError: a bytes-like object is required, not 'str'

7380            Invoke Despair
11563      Reckoner Bankbuster
14976    The Meathook Massacre
Name: name, dtype: object

In [46]:
def to_feature_name(s: str) -> str:
    return "f_" + s.lower().replace(" ", "_")

def get_kw_list(filename: str):
    """Get list of keywords from file"""
    with open("../data/mtg/"+filename+".json") as f:
        json_data = json.load(f)
    data = json_data["data"]
    ability_words = data["abilityWords"]
    kw_abilities = data["keywordAbilities"]
    kw_actions = data["keywordActions"]
    all_kws = ability_words + kw_abilities + kw_actions
    return all_kws, ability_words, kw_abilities, kw_actions

all_type_labels = Counter([x for l in df['subtypes'] for x in l]) # We need to define this globally. Let's do it.
most_common_types = [x for x, y in all_type_labels.most_common(100)]

a,b,c,d = get_kw_list("Keywords")
a # 297 total keywords. Some terms (Counter being the big one) are ambiguous, while others can appear on card names (Assemble -> Assemble the Legion).
  # We can reduce the noise here by manually removing irrelevant keywords, but let's leave them for now.



297

In [8]:
df = mono_creatures


TypeError: descriptor 'lower' for 'str' objects doesn't apply to a 'list' object

In [7]:
df["subtypes"][0]

nan

In [10]:
df["subtypes"]

Abattoir Ghoul        NaN
Abbey Griffin         NaN
Abbot of Keral Keep   NaN
Aberrant Researcher   NaN
Abhorrent Overlord    NaN
                       ..
Zombie Musher         NaN
Zulaport Chainmage    NaN
Zulaport Cutthroat    NaN
Zulaport Enforcer     NaN
Zurgo Bellstriker     NaN
Name: subtypes, Length: 6346, dtype: float64

In [47]:


for c in most_common_types:
    c_name = to_feature_name("ct_"+c)
    df[c_name] = df.loc[df["types"].str.contains(c)]

for c in all_keywords:
    c_name = to_feature_name("ct_"+c)
    df[c_name] = df.loc[df["text"].str.contains(c)]

NameError: name 'most_common_types' is not defined

MAIN GOAL: Determine a creature's color identity based on: (number of features)
- CMC (1)
- Power (1)
- Toughness (1)
- Type (boolean cols for each of the top 100 tribes) (100)
- Keywords (see keywords.json and list of evergreen keywords on https://mtg.fandom.com/wiki/Evergreen) (20-100)
- Name? (Would need a way to break this down (https://web.stanford.edu/group/pdplab/pdphandbook/handbookch8.html))


In [None]:


evergreen_keywords = ["Activate", "Attach", "Cast", "Counter", "Create", "Destroy", "Discard", "Exchange", "Exile", "Fight",
                       "Mill", "Play", "Reveal", "Sacrifice", "Scry", "Search", "Shuffle", "Tap", "Untap"]
my_common_words = ["Enchantment", "Artifact", "+1/+1", "Token", "Draw" "Land", "Nonland", "Spell", "Creature",]
evergreen_abilities = ["Deathtouch", "Defender", "Double Strike", "Enchant", "Equip", "First Strike", "Flash", "Flying",
                        "Haste", "Hexproof", "Indestructible", "Lifelink", "Menace", "Protection", "Reach", "Trample",
                          "Vigilance", "Ward", "Regenerate", "Shroud", "Intimidate", "Prowess"]
all_keywords = evergreen_keywords + my_common_words + evergreen_abilities

# The more robust atmoic dataset contains split entries for DFC's, fuse cards, etc. How do we count these cards?
A. Remove them from the dataset.
    # By far the easiest approach.
B. Look at just the front.
    # Cleanest, will cause some outliers, namely on meld/fuse/transform cards
C. Add them as an additional row.
    # More accurate, but will likely be outliers
D. Add extra columns
    # Most accurate, but will mess with any ML algo if not weighted properly.