In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import sklearn as sk
import tensorflow as tf
from collections import Counter
import json
import pickle

In [21]:
with open("./common_types.txt", "rb") as f:
       common_types = pickle.load(f)

cols_include_all = ['colorIdentity', 'colors', 'firstPrinting', 'keywords', 'manaCost', 'manaValue',
                     'subtypes', 'supertypes', 'text', 'type', 'types', 'power', 'toughness',  'colorIndicator',
                     'name', 'hasAlternativeDeckLimit']
cols_include_noncreature = ['loyalty',]
cols_edhrec = ["edhrecRank", "edhrecSaltiness"]
cols_legality = ['legalities.commander', 'legalities.duel', 'legalities.explorer',
       'legalities.historic', 'legalities.historicbrawl', 'legalities.legacy',
       'legalities.modern', 'legalities.oathbreaker', 'legalities.pauper',
       'legalities.paupercommander', 'legalities.penny', 'legalities.pioneer',
       'legalities.vintage', 'legalities.gladiator','legalities.alchemy',
       'legalities.brawl', 'legalities.future', 'legalities.standard', 'legalities.predh',
       'legalities.premodern', 'legalities.oldschool',]
cols_leadership = ['leadershipSkills.brawl',
       'leadershipSkills.commander', 'leadershipSkills.oathbreaker',]

def to_feature_name(s: str, typ: bool = False) -> str:
    if typ:
        return "f_ct_" + s.lower().replace(" ", "_")
    return "f_" + s.lower().replace(" ", "_")

def get_kw_list(filename: str):
    """Get list of keywords from file"""
    with open("../data/mtg/"+filename+".json") as f:
        json_data = json.load(f)
    data = json_data["data"]
    ability_words = data["abilityWords"]
    kw_abilities = data["keywordAbilities"]
    kw_actions = data["keywordActions"]
    all_kws = ability_words + kw_abilities + kw_actions
    return all_kws, ability_words, kw_abilities, kw_actions

def make_types_list(df: pd.DataFrame, n: int) -> None:
    """From a complete dataset, write a list of the 200 most common creature types to a file called common_types.txt"""
    all_types = []
    df["subtypes"].apply(all_types.extend)
    all_types = Counter(all_types)
    common_types = [x for x,y in all_types.most_common(n)]
    with open("./common_types.txt", "wb") as f:
        pickle.dump(common_types, f)

# make_types_list(df, 200)

all_kws, _, _, _ = get_kw_list("Keywords")




In [22]:
def load_atomic(filename: str) -> pd.DataFrame:
    """Load from the Atomic standard files into a dataframe resembling the old data standard."""
    with open("../data/mtg/"+filename+".json") as f:
        json_data = json.load(f)  # Load from file
    json_data = json_data["data"]
    cards = [x[0] for x in json_data.values() if len(x) == 1] # Pull only cards with 1 face (no transform, fuse, split, flip cards, sorry Delver)
    df = pd.json_normalize(cards)
    return df

def prep_df(df: pd.DataFrame, monocolor: bool, creatures: bool, modern: bool) -> pd.DataFrame:
    """
    Preprocesses card DF
    @param df: Input DataFrame
    @param monocolor: If true, return only cards with 1 or less color
    @param creatures: If true, return only creatures
    @param modern: If true, filter by modern legality (excludes Uro, :'( )
    """
    df["num_colors"] = df["colors"].map(len)
    if creatures:
        df = df.loc[df['type'].str.contains('Creature')]
    if monocolor:
        df = df.loc[df['num_colors'] <= 1]
    if modern:
        df = df.loc[df["legalities.modern"] == "Legal"]
    df = df[cols_include_all]
    df["f_cmc"] = (df["manaValue"] / 7.5) - 1  # [0,15] -> [-1, 1]
    df['f_pow'] = df['power'].replace({"1+*": 1, "*": "0", "*+1": 1}) # Assume all *'s are 0 (as per the rules)
    df['f_pow'] = ((df['f_pow'].astype(int) + 1) / 9) - 1 # [-1,16] -> [-1, 1]
    df['f_tough'] = df['toughness'].replace({"1+*": 1, "*": "0", "*+1": 1}) # Assume all *'s are 0 (as per the rules)
    df['f_tough'] = ((df['f_tough'].astype(int) + 1) / 9) - 1 # [-1,16] -> [-1, 1]

    # Binary columns for types and keywords
    for kw in all_kws:
        feature = to_feature_name(kw)
        df[feature] = df["text"].str.lower().str.contains(kw.lower())  # This works, but Death's Shadow counts as a creature with Shadow. Could look into using reminder text?
    for typ in common_types:
        feature = to_feature_name(typ, True)
        df[feature] = df["subtypes"].apply(lambda x: 1 if typ in x else 0)
    df = df.set_index("name")
    return df

df = prep_df(load_atomic("ModernAtomic"), monocolor=False, creatures=True, modern=True)




  df[feature] = df["text"].str.lower().str.contains(kw.lower())  # This works, but Death's Shadow counts as a creature with Shadow. Could look into using reminder text?
  df[feature] = df["text"].str.lower().str.contains(kw.lower())  # This works, but Death's Shadow counts as a creature with Shadow. Could look into using reminder text?
  df[feature] = df["text"].str.lower().str.contains(kw.lower())  # This works, but Death's Shadow counts as a creature with Shadow. Could look into using reminder text?
  df[feature] = df["text"].str.lower().str.contains(kw.lower())  # This works, but Death's Shadow counts as a creature with Shadow. Could look into using reminder text?
  df[feature] = df["text"].str.lower().str.contains(kw.lower())  # This works, but Death's Shadow counts as a creature with Shadow. Could look into using reminder text?
  df[feature] = df["text"].str.lower().str.contains(kw.lower())  # This works, but Death's Shadow counts as a creature with Shadow. Could look into using r

In [26]:
list(df.columns)

['colorIdentity',
 'colors',
 'firstPrinting',
 'keywords',
 'manaCost',
 'manaValue',
 'subtypes',
 'supertypes',
 'text',
 'type',
 'types',
 'power',
 'toughness',
 'colorIndicator',
 'hasAlternativeDeckLimit',
 'f_cmc',
 'f_pow',
 'f_tough',
 'f_battalion',
 'f_bloodrush',
 'f_channel',
 'f_chroma',
 'f_cohort',
 'f_constellation',
 'f_converge',
 'f_delirium',
 'f_domain',
 'f_fateful_hour',
 'f_ferocious',
 'f_formidable',
 'f_grandeur',
 'f_hellbent',
 'f_heroic',
 'f_imprint',
 'f_inspired',
 'f_join_forces',
 'f_kinship',
 'f_landfall',
 'f_lieutenant',
 'f_metalcraft',
 'f_morbid',
 'f_parley',
 'f_radiance',
 'f_raid',
 'f_rally',
 'f_spell_mastery',
 'f_strive',
 'f_sweep',
 'f_tempting_offer',
 'f_threshold',
 'f_will_of_the_council',
 'f_adamant',
 'f_addendum',
 "f_council's_dilemma",
 'f_eminence',
 'f_enrage',
 "f_hero's_reward",
 'f_kinfall',
 'f_landship',
 'f_legacy',
 'f_revolt',
 'f_underdog',
 'f_undergrowth',
 'f_magecraft',
 'f_teamwork',
 'f_pack_tactics',
 'f

In [24]:

df["f_flying"].value_counts()


False    7199
True     2005
Name: f_flying, dtype: int64

In [8]:
df = mono_creatures


TypeError: descriptor 'lower' for 'str' objects doesn't apply to a 'list' object

MAIN GOAL: Determine a creature's color identity based on: (number of features)
- CMC (1)
- Power (1)
- Toughness (1)
- Type (boolean cols for each of the top 100 tribes) (100)
- Keywords (see keywords.json and list of evergreen keywords on https://mtg.fandom.com/wiki/Evergreen) (20-100)
- Name? (Would need a way to break this down (https://web.stanford.edu/group/pdplab/pdphandbook/handbookch8.html))


In [None]:


evergreen_keywords = ["Activate", "Attach", "Cast", "Counter", "Create", "Destroy", "Discard", "Exchange", "Exile", "Fight",
                       "Mill", "Play", "Reveal", "Sacrifice", "Scry", "Search", "Shuffle", "Tap", "Untap"]
my_common_words = ["Enchantment", "Artifact", "+1/+1", "Token", "Draw" "Land", "Nonland", "Spell", "Creature",]
evergreen_abilities = ["Deathtouch", "Defender", "Double Strike", "Enchant", "Equip", "First Strike", "Flash", "Flying",
                        "Haste", "Hexproof", "Indestructible", "Lifelink", "Menace", "Protection", "Reach", "Trample",
                          "Vigilance", "Ward", "Regenerate", "Shroud", "Intimidate", "Prowess"]
all_keywords = evergreen_keywords + my_common_words + evergreen_abilities

# Issue #1: Multi-faced cards from the Atomic dataset.
The more robust atmoic dataset contains split entries for DFC's, fuse cards, etc. How do we count these cards?
A. Remove them from the dataset.
    # By far the easiest approach.
B. Look at just the front.
    # Cleanest, will cause some outliers, namely on meld/fuse/transform cards
C. Add them as an additional row.
    # More accurate, but will likely be outliers
D. Add extra columns
    # Most accurate, but will mess with any ML algo if not weighted properly.

# Issue #2: Keywords Overlap with Creature Names (Death's Shadow, Flying Men)

# Issue 3: The word Counter

# Issue 4: Parsing Card Names