In [1]:
import polars as pl

https://github.com/minimaxir/ai-generated-magic-cards/blob/master/mtgjson_encoder.py


In [16]:
FIELDS = [
    "name",
    "manaCost",
    "type",
    "text",
    "power",
    "toughness",
    "loyalty",
    "rarity",
    "setCode",
]


df_cards = (
    pl.read_parquet("/Users/maxwoolf/Downloads/AllPrintingsParquetFiles/cards.parquet")
    .filter(pl.col("type") != "Vanguard")
    .select(FIELDS)
    .unique()
    .sort("name")
)

df_cards

name,manaCost,type,text,power,toughness,loyalty,rarity,setCode
str,str,str,str,str,str,str,str,str
"""""Ach! Hans, Run!""""","""{2}{R}{R}{G}{G}""","""Enchantment""","""At the beginning of your upkee…",,,,"""rare""","""UNH"""
"""""Brims"" Barone, Midway Mobster""","""{3}{W}{B}""","""Legendary Creature — Human Rog…","""When ""Brims"" Barone, Midway Mo…","""5""","""4""",,"""uncommon""","""UNF"""
"""""Intimidation Tactics""""","""{B}""","""Sorcery""","""Target opponent reveals their …",,,,"""uncommon""","""DFT"""
"""""Lifetime"" Pass Holder""","""{B}""","""Creature — Zombie Guest""","""""Lifetime"" Pass Holder enters …","""2""","""1""",,"""rare""","""UNF"""
"""""Name Sticker"" Goblin""","""{2}{R}""","""Creature — Goblin Guest""","""When this creature enters from…","""2""","""2""",,"""common""","""UNF"""
…,…,…,…,…,…,…,…,…
"""Éomer, Marshal of Rohan""","""{2}{R}{R}""","""Legendary Creature — Human Kni…","""Haste\nWhenever one or more ot…","""4""","""4""",,"""rare""","""PLTR"""
"""Éowyn, Fearless Knight""","""{2}{R}{W}""","""Legendary Creature — Human Kni…","""Haste\nWhen Éowyn, Fearless Kn…","""3""","""4""",,"""rare""","""PLTR"""
"""Éowyn, Fearless Knight""","""{2}{R}{W}""","""Legendary Creature — Human Kni…","""Haste\nWhen Éowyn, Fearless Kn…","""3""","""4""",,"""rare""","""LTR"""
"""Éowyn, Lady of Rohan""","""{2}{W}""","""Legendary Creature — Human Nob…","""At the beginning of combat on …","""2""","""4""",,"""uncommon""","""LTR"""


In [20]:
df_cards["rarity"].unique().to_list()

['common', 'bonus', 'special', 'rare', 'mythic', 'uncommon']

Get Sets data (will need to build a `Enum` for ordering)


In [17]:
df_sets = (
    pl.read_parquet("/Users/maxwoolf/Downloads/AllPrintingsParquetFiles/sets.parquet")
    .select(["tcgplayerGroupId", "name", "code", "releaseDate"])
    .unique()
    .sort(["releaseDate", "tcgplayerGroupId"])
)
df_sets

tcgplayerGroupId,name,code,releaseDate
f64,str,str,str
7.0,"""Limited Edition Alpha""","""LEA""","""1993-08-05"""
17.0,"""Limited Edition Beta""","""LEB""","""1993-10-04"""
115.0,"""Unlimited Edition""","""2ED""","""1993-12-01"""
1526.0,"""Collectors' Edition""","""CED""","""1993-12-10"""
1527.0,"""Intl. Collectors' Edition""","""CEI""","""1993-12-10"""
…,…,…,…
23848.0,"""Innistrad Remastered""","""INR""","""2025-01-24"""
,"""Japan Standard Cup""","""PJSC""","""2025-02-09"""
,"""Aetherdrift Promos""","""PDFT""","""2025-02-14"""
23874.0,"""Aetherdrift""","""DFT""","""2025-02-14"""


In [None]:
rarity_enum = pl.Enum(["common", "uncommon", "rare", "mythic", "bonus", "special"])
sets_enum = pl.Enum(df_sets["code"])

https://stackoverflow.com/a/73427426


In [30]:
df_cards_agg = (
    df_cards.with_columns(
        # text=pl.col("text").str.replace_all(pl.col("name"), "~"),
        rarity=pl.col("rarity").cast(rarity_enum),
        setCode=pl.col("setCode").cast(sets_enum),
    )
    .group_by(FIELDS[0:6])
    .agg(rarities=pl.col("rarity").unique(), sets=pl.col("setCode").unique())
    .sort("name")
)

df_cards_agg

name,manaCost,type,text,power,toughness,rarities,sets
str,str,str,str,str,str,list[enum],list[enum]
"""""Ach! Hans, Run!""""","""{2}{R}{R}{G}{G}""","""Enchantment""","""At the beginning of your upkee…",,,"[""rare""]","[""UNH""]"
"""""Brims"" Barone, Midway Mobster""","""{3}{W}{B}""","""Legendary Creature — Human Rog…","""When ""Brims"" Barone, Midway Mo…","""5""","""4""","[""uncommon""]","[""UNF""]"
"""""Intimidation Tactics""""","""{B}""","""Sorcery""","""Target opponent reveals their …",,,"[""uncommon""]","[""DFT""]"
"""""Lifetime"" Pass Holder""","""{B}""","""Creature — Zombie Guest""","""""Lifetime"" Pass Holder enters …","""2""","""1""","[""rare""]","[""UNF""]"
"""""Name Sticker"" Goblin""","""{2}{R}""","""Creature — Goblin Guest""","""When this creature enters from…","""2""","""2""","[""common""]","[""UNF""]"
…,…,…,…,…,…,…,…
"""Éomer, King of Rohan""","""{3}{R}{W}""","""Legendary Creature — Human Nob…","""Double strike\nÉomer, King of …","""2""","""2""","[""rare""]","[""LTC""]"
"""Éomer, Marshal of Rohan""","""{2}{R}{R}""","""Legendary Creature — Human Kni…","""Haste\nWhenever one or more ot…","""4""","""4""","[""rare""]","[""PLTR"", ""LTR""]"
"""Éowyn, Fearless Knight""","""{2}{R}{W}""","""Legendary Creature — Human Kni…","""Haste\nWhen Éowyn, Fearless Kn…","""3""","""4""","[""rare""]","[""PLTR"", ""LTR""]"
"""Éowyn, Lady of Rohan""","""{2}{W}""","""Legendary Creature — Human Nob…","""At the beginning of combat on …","""2""","""4""","[""uncommon""]","[""LTR""]"


Postprocess:

- Remove card name in text and replace with `~`: cannot be vectorized in polars.
- Set Basic Lands `sets` to `[*]`.


In [38]:
df_cards_agg = df_cards_agg.with_columns(
    text=pl.when(
        pl.col("text").str.contains(
            pl.col("name").str.replace_all(r"[\\^$.*+?()[\]{}|]", r"\$0"), literal=True
        )
    )
    .then(
        pl.col("text").str.replace(
            pl.col("name").str.replace_all(r"[\\^$.*+?()[\]{}|]", r"\$0"),
            "~",
            literal=True,
        )
    )
    .otherwise(pl.col("text"))
)

df_cards_agg

ComputeError: regex error: regex parse error:
    [\\^$.*+?()[\]{}|]
    ^
error: unclosed character class

Get Sets data (will need to build a `Enum` for ordering)
