In [1]:
import numpy as np
import pandas as pd

from etl import load_monsters, _mechanics, _stats, _flavor, _skills, get_monster_df

# Load data

In [2]:
monsters, ogl = load_monsters()
monster_df = get_monster_df(monsters)

In [3]:
monster_df[_mechanics].head()

Unnamed: 0_level_0,challenge_rating,armor_class,hit_dice,hit_points,condition_immunities,damage_immunities,damage_resistances,damage_vulnerabilities,actions,reactions,legendary_actions,special_abilities,size,speed,senses
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Aboleth,10.0,17,18d10,135,,,,,"[{'name': 'Multiattack', 'desc': 'The aboleth ...",[],"[{'name': 'Detect', 'desc': 'The aboleth makes...","[{'name': 'Amphibious', 'desc': 'The aboleth c...",Large,"10 ft., swim 40 ft.","darkvision 120 ft., passive Perception 20"
Acolyte,0.25,10,2d8,9,,,,,"[{'name': 'Club', 'desc': 'Melee Weapon Attack...",[],[],"[{'name': 'Spellcasting', 'desc': 'The acolyte...",Medium,30 ft.,passive Perception 12
Adult Black Dragon,14.0,19,17d12,195,,acid,,,"[{'name': 'Multiattack', 'desc': 'The dragon c...",[],"[{'name': 'Detect', 'desc': 'The dragon makes ...","[{'name': 'Amphibious', 'desc': 'The dragon ca...",Huge,"40 ft., fly 80 ft., swim 40 ft.","blindsight 60 ft., darkvision 120 ft., passive..."
Adult Blue Dracolich,17.0,19,18d12,225,"charmed, exhaustion, frightened, paralyzed, po...","lightning, poison",necrotic,,"[{'name': 'Multiattack', 'desc': 'The dracolic...",[],"[{'name': 'Detect', 'desc': 'The dracolich mak...","[{'name': 'Legendary Resistance (3/Day)', 'des...",Huge,"40 ft., burrow 30 ft., fly 80 ft.","blindsight 60 ft., darkvision 120 ft., passive..."
Adult Blue Dragon,16.0,19,18d12,225,,lightning,,,"[{'name': 'Multiattack', 'desc': 'The dragon c...",[],"[{'name': 'Detect', 'desc': 'The dragon makes ...","[{'name': 'Legendary Resistance (3/Day)', 'des...",Huge,"40 ft., burrow 30 ft., fly 80 ft.","blindsight 60 ft., darkvision 120 ft., passive..."


# Feature extraction
At this stage, we'll restrict ourselves to simple feature extraction and engineering. Let's first consider the `actions` column of `monster_df`. We first familiarize ourselves with the data.

In [4]:
actions_keys = {tuple(action.keys()) for actions in monster_df.actions for action in actions}
actions_keys

{('name', 'desc', 'attack_bonus'),
 ('name', 'desc', 'attack_bonus', 'damage_bonus'),
 ('name', 'desc', 'attack_bonus', 'damage_dice'),
 ('name', 'desc', 'attack_bonus', 'damage_dice', 'damage_bonus')}

In [5]:
def make_action_df(x):
    df = pd.DataFrame(x.iloc[0], columns=['name', 'desc', 'attack_bonus', 'damage_dice', 'damage_bonus'])
    df = df.rename(index=str, columns={'name':'action'})
    df = df.set_index(['action'])
    return df

actions_df = monster_df.actions.groupby('name').apply(make_action_df)

In [6]:
actions_df.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,desc,attack_bonus,damage_dice,damage_bonus
name,action,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Aboleth,Multiattack,The aboleth makes three tentacle attacks.,0,,
Aboleth,Tentacle,"Melee Weapon Attack: +9 to hit, reach 10 ft., ...",9,2d6,5.0
Aboleth,Tail,"Melee Weapon Attack: +9 to hit, reach 10 ft. o...",9,3d6,5.0
Aboleth,Enslave (3/day),The aboleth targets one creature it can see wi...,0,,
Acolyte,Club,"Melee Weapon Attack: +2 to hit, reach 5 ft., o...",2,1d4,
Adult Black Dragon,Multiattack,The dragon can use its Frightful Presence. It ...,0,,
Adult Black Dragon,Bite,"Melee Weapon Attack: +11 to hit, reach 10 ft.,...",11,2d10 + 1d8,6.0
Adult Black Dragon,Claw,"Melee Weapon Attack: +11 to hit, reach 5 ft., ...",11,2d6,6.0
Adult Black Dragon,Tail,"Melee Weapon Attack: +11 to hit, reach 15 ft.,...",11,2d8,6.0
Adult Black Dragon,Frightful Presence,Each creature of the dragon's choice that is w...,0,,


In [7]:
from collections import namedtuple
from functools import partial, reduce
import re


get_dc = partial(re.findall,
                 re.compile(r'\bDC (\d+) (' \
                            + r'|'.join(stat.capitalize() for stat in _stats) \
                            + r')\b'))

get_dice = partial(re.findall, re.compile(r'\b(\d+)?d(\d+)\b'))

In [8]:
# summary tools
def has_properties(x, pos=[], neg=[]):
    return any(all([z in y for z in pos] + [z not in y for z in neg]) for y in x)


def parse_actions(actions):
    multi = False
    for action in actions:
        if action['name'] == 'Multiattack':
            multi = True
            continue
        attack_bonus = action.get('attack_bonus', 0)
        damage_bonus = action.get('damage_bonus', 0)
        damage_dice = action.get('damage_dice', '')
        dcs = get_dc(action.get('desc'))
    return multi, attack_bonus, damage_bonus, damage_dice, dcs


neg=['damage_dice', 'damage_bonus']
def has_special_action(actions):
    return any(is_special_action(action) for action in actions)

def is_special_action(action):
    return 'Multiattack' not in action['name'] and all([z not in action for z in neg])

def get_special_action(actions):
    return [action for action in actions if is_special_action(action)]

monster_df.actions[monster_df.actions.apply(has_special_action)].apply(get_special_action).head()

name
Aboleth                 [{'name': 'Enslave (3/day)', 'desc': 'The abol...
Adult Black Dragon      [{'name': 'Frightful Presence', 'desc': 'Each ...
Adult Blue Dracolich    [{'name': 'Frightful Presence', 'desc': 'Each ...
Adult Blue Dragon       [{'name': 'Frightful Presence', 'desc': 'Each ...
Adult Brass Dragon      [{'name': 'Frightful Presence', 'desc': 'Each ...
Name: actions, dtype: object

In [9]:
monster_df.loc['Wereboar'].actions

[{'attack_bonus': 0,
  'desc': 'The wereboar makes two attacks, only one of which can be with its tusks.',
  'name': 'Multiattack (Humanoid or Hybrid Form Only)'},
 {'attack_bonus': 5,
  'damage_bonus': 3,
  'damage_dice': '2d6',
  'desc': 'Melee Weapon Attack: +5 to hit, reach 5 ft., one target. Hit: 10 (2d6 + 3) bludgeoning damage.',
  'name': 'Maul (Humanoid or Hybrid Form Only)'},
 {'attack_bonus': 5,
  'damage_bonus': 3,
  'damage_dice': '2d6',
  'desc': 'Melee Weapon Attack: +5 to hit, reach 5 ft., one target. Hit: 10 (2d6 + 3) slashing damage. If the target is a humanoid, it must succeed on a DC 12 Constitution saving throw or be cursed with wereboar lycanthropy.',
  'name': 'Tusks (Boar or Hybrid Form Only)'}]

In [10]:
f = partial(has_properties, pos=['damage_bonus'], neg=['damage_dice'])

monster_df.actions[monster_df.actions.apply(f)]

name
Badger             [{'name': 'Bite', 'desc': 'Melee Weapon Attack...
Bat                [{'name': 'Bite', 'desc': 'Melee Weapon Attack...
Cat                [{'name': 'Claws', 'desc': 'Melee Weapon Attac...
Crab               [{'name': 'Claw', 'desc': 'Melee Weapon Attack...
Flying Snake       [{'name': 'Bite', 'desc': 'Melee Weapon Attack...
Hawk               [{'name': 'Talons', 'desc': 'Melee Weapon Atta...
Homunculus         [{'name': 'Bite', 'desc': 'Melee Weapon Attack...
Lizard             [{'name': 'Bite', 'desc': 'Melee Weapon Attack...
Octopus            [{'name': 'Tentacles', 'desc': 'Melee Weapon A...
Owl                [{'name': 'Talons', 'desc': 'Melee Weapon Atta...
Poisonous Snake    [{'name': 'Bite', 'desc': 'Melee Weapon Attack...
Quipper            [{'name': 'Bite', 'desc': 'Melee Weapon Attack...
Rat                [{'name': 'Bite', 'desc': 'Melee Weapon Attack...
Scorpion           [{'name': 'Sting', 'desc': 'Melee Weapon Attac...
Spider             [{'name': 

In [11]:
get_dc(monster_df.actions[0][3]['desc'])

[('14', 'Wisdom')]

## Actions

In [12]:
from collections import namedtuple
Summary = namedtuple('Summary', field_names='has_multiattack attack_bonus damage difficulty_class')

def summarize_actions(actions):
    has_multiattack = 1 if any('Multiattack' in action['name'] for action in actions) else 0
    action_summaries = [summarize(action) for action in actions]
    if action_summaries:
        max_attack_bonus = max(action['attack_bonus'] for action in action_summaries)
        max_damage = max(action['damage'] for action in action_summaries)
        max_difficulty_class = max(action['difficulty_class'] for action in action_summaries)
        summary = Summary(has_multiattack=has_multiattack, 
                          attack_bonus=max_attack_bonus, 
                          damage=max_damage, 
                          difficulty_class=max_difficulty_class)
    else:
        summary = Summary(has_multiattack=0,
                          attack_bonus=2,
                          damage=1,
                          difficulty_class=10)
    return summary


def summarize(action):
    difficulty_class = get_dc(action['desc'])
    attack_bonus = int(action.get('attack_bonus', '0'))
    damage_dice = action.get('damage_dice', '0d0')
    damage_bonus = int(action.get('damage_bonus', '0'))
    return {'attack_bonus': attack_bonus,
            'damage': get_damage_potential(damage_dice, damage_bonus),
            'difficulty_class': get_max_dc(difficulty_class)}

def get_damage_potential(damage_dice, damage_bonus):
    damage = damage_bonus
    for roll in damage_dice.split(' + '):
        n, size = get_dice(roll)[0]
        damage += int(n) * int(size)
    return damage

def get_max_dc(difficulty_class):
    if difficulty_class:
        return max([int(x[0]) for x in difficulty_class if x])
    else:
        return 10

In [13]:
attacks = monster_df.actions.apply(summarize_actions)

In [14]:
attacks_df = pd.concat([attacks.map(lambda x: x[i]) for i in range(4)], axis=1)
attacks_df.columns = 'has_multiattack attack_bonus damage difficulty_class'.split(' ')

## Special abilities

In [15]:
special_keys = {tuple(ability.keys()) for abilities in monster_df.special_abilities for ability in abilities}
special_keys

{('name', 'desc', 'attack_bonus'),
 ('name', 'desc', 'attack_bonus', 'damage_dice')}

In [16]:
def make_special_abilities_df(x):
    df = pd.DataFrame(x.iloc[0], columns=['name', 'desc', 'attack_bonus', 'damage_dice'])
    df = df.rename(index=str, columns={'name':'special_ability'})
    df = df.set_index(['special_ability'])
    return df

special_abilities_df = monster_df.special_abilities.groupby('name').apply(make_special_abilities_df)

In [61]:
special_abilities_df.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,desc,attack_bonus,damage_dice
name,special_ability,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Aboleth,Amphibious,The aboleth can breathe air and water.,0,
Aboleth,Mucous Cloud,"While underwater, the aboleth is surrounded by...",0,
Aboleth,Probing Telepathy,If a creature communicates telepathically with...,0,
Acolyte,Spellcasting,The acolyte is a 1st-level spellcaster. Its sp...,0,
Adult Black Dragon,Amphibious,The dragon can breathe air and water.,0,
Adult Black Dragon,Legendary Resistance (3/Day),"If the dragon fails a saving throw, it can cho...",0,
Adult Blue Dracolich,Legendary Resistance (3/Day),"If the dracolich fails a saving throw, it can ...",0,
Adult Blue Dracolich,Magic Resistance,The dracolich has advantage on saving throws a...,0,
Adult Blue Dragon,Legendary Resistance (3/Day),"If the dragon fails a saving throw, it can cho...",0,
Adult Brass Dragon,Legendary Resistance (3/Day),"If the dragon fails a saving throw, it can cho...",0,


In [56]:
from collections import Counter

abilities = Counter(special_abilities_df.reset_index().special_ability)
abilities.most_common(15)

[('Magic Resistance', 32),
 ('Amphibious', 30),
 ('Legendary Resistance (3/Day)', 24),
 ('Innate Spellcasting', 20),
 ('Keen Smell', 19),
 ('Pack Tactics', 16),
 ('False Appearance', 15),
 ('Spider Climb', 13),
 ('Keen Hearing and Smell', 13),
 ('Spellcasting', 12),
 ('Magic Weapons', 12),
 ('Charge', 12),
 ('Shapechanger', 11),
 ('Swarm', 10),
 ('Water Breathing', 9)]

In [57]:
from functools import reduce

name_lists = monster_df.special_abilities.apply(lambda x: [y['name'] for y in x])
abilities = Counter(reduce(lambda x, y: x + y, name_lists, []))
abilities.most_common(15)

[('Magic Resistance', 32),
 ('Amphibious', 30),
 ('Legendary Resistance (3/Day)', 24),
 ('Innate Spellcasting', 20),
 ('Keen Smell', 19),
 ('Pack Tactics', 16),
 ('False Appearance', 15),
 ('Spider Climb', 13),
 ('Keen Hearing and Smell', 13),
 ('Spellcasting', 12),
 ('Magic Weapons', 12),
 ('Charge', 12),
 ('Shapechanger', 11),
 ('Swarm', 10),
 ('Water Breathing', 9)]

In [73]:
spellcasting_df = special_abilities_df[['Spellcasting' in x for x in special_abilities_df.index]]
spellcasting_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,desc,attack_bonus,damage_dice
name,special_ability,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Acolyte,Spellcasting,The acolyte is a 1st-level spellcaster. Its sp...,0,
Androsphinx,Spellcasting,The sphinx is a 12th-level spellcaster. Its sp...,0,
Archmage,Spellcasting,The archmage is an 18th-level spellcaster. Its...,0,
Cult Fanatic,Spellcasting,The fanatic is a 4th-level spellcaster. Its sp...,0,
Druid,Spellcasting,The druid is a 4th-level spellcaster. Its spel...,0,


In [70]:
innate_spellcasting_df = special_abilities_df[['Innate Spellcasting' in x for x in special_abilities_df.index]]
innate_spellcasting_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,desc,attack_bonus,damage_dice
name,special_ability,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Cloud Giant,Innate Spellcasting,The giant's innate spellcasting ability is Cha...,0,
Couatl,Innate Spellcasting,The couatl's spellcasting ability is Charisma ...,0,
Deep Gnome (Svirfneblin),Innate Spellcasting,The gnome's innate spellcasting ability is Int...,0,
Deva,Innate Spellcasting,The deva's spellcasting ability is Charisma (s...,0,
Djinni,Innate Spellcasting,The djinni's innate spellcasting ability is Ch...,0,


In [78]:
def parse_spellcasting(desc):
    header, *levels = desc.splitlines()
    print(header)
    print(levels[1:])
#     return header, levels

spellcasting_df.desc.apply(parse_spellcasting)

The acolyte is a 1st-level spellcaster. Its spellcasting ability is Wisdom (spell save DC 12, +4 to hit with spell attacks). The acolyte has following cleric spells prepared:
['• Cantrips (at will): light, sacred flame, thaumaturgy', '• 1st level (3 slots): bless, cure wounds, sanctuary']
The sphinx is a 12th-level spellcaster. Its spellcasting ability is Wisdom (spell save DC 18, +10 to hit with spell attacks). It requires no material components to cast its spells. The sphinx has the following cleric spells prepared:
['• Cantrips (at will): sacred flame, spare the dying, thaumaturgy', '• 1st level (4 slots): command, detect evil and good, detect magic', '• 2nd level (3 slots): lesser restoration, zone of truth', '• 3rd level (3 slots): dispel magic, tongues', '• 4th level (3 slots): banishment, freedom of movement', '• 5th level (2 slots): flame strike, greater restoration', "• 6th level (1 slot): heroes' feast"]
The archmage is an 18th-level spellcaster. Its spellcasting ability is I

name           special_ability
Acolyte        Spellcasting       None
Androsphinx    Spellcasting       None
Archmage       Spellcasting       None
Cult Fanatic   Spellcasting       None
Druid          Spellcasting       None
Guardian Naga  Spellcasting       None
Gynosphinx     Spellcasting       None
Lich           Spellcasting       None
Mage           Spellcasting       None
Mummy Lord     Spellcasting       None
Priest         Spellcasting       None
Spirit Naga    Spellcasting       None
Name: desc, dtype: object

## Special defenses
We now apply a bit of domain knowledge to separate out the following features.

In [23]:
from functools import partial, reduce

def string_to_list(x, sep=','):
    return [y.strip() for y in x.split(sep)]


def concat(lists):
    return reduce(lambda x, y: x + y, lists, [])


def get_unique_values(series, sep=','):
    lists = series.apply(partial(string_to_list, sep=sep))
    values = concat(lists)
    return sorted(v for v in set(values) if v)

In [24]:
condition_immunities = get_unique_values(monster_df.condition_immunities)
damage_immunities = get_unique_values(monster_df.damage_immunities, sep=';')
damage_resistances = get_unique_values(monster_df.damage_resistances, sep=';')
damage_vulnerabilities = get_unique_values(monster_df.damage_vulnerabilities)

In [25]:
damage_vulnerabilities

['bludgeoning',
 'cold',
 'fire',
 'piercing from magic weapons wielded by good creatures',
 'radiant',
 'thunder']

In [26]:
CONDITIONS = ['blinded',
              'charmed',
              'deafened',
              'exhaustion',
              'fatigued',
              'frightened',
              'grappled',
              'incapacitated',
              'invisible',
              'paralyzed',
              'petrified',
              'poisoned',
              'prone',
              'restrained',
              'stunned',
              'unconscious']

## Reactions

In [27]:
reactions = monster_df.reactions[monster_df['reactions'].apply(lambda x: len(x) > 0)]
for reaction in reactions: 
    for k, v in reaction[0].items():
        print("{}: {}".format(k, v))
    print('\n')

name: Parry
desc: The captain adds 2 to its AC against one melee attack that would hit it. To do so, the captain must see the attacker and be wielding a melee weapon.
attack_bonus: 0


name: Split
desc: When a pudding that is Medium or larger is subjected to lightning or slashing damage, it splits into two new puddings if it has at least 10 hit points. Each new pudding has hit points equal to half the original pudding's, rounded down. New puddings are one size smaller than the original pudding.
attack_bonus: 0


name: Unnerving Mask
desc: When a creature the devil can see starts its turn within 30 feet of the devil, the devil can create the illusion that it looks like one of the creature's departed loved ones or bitter enemies. If the creature can see the devil, it must succeed on a DC 14 Wisdom saving throw or be frightened until the end of its turn.
attack_bonus: 0


name: Parry
desc: The erinyes adds 4 to its AC against one melee attack that would hit it. To do so, the erinyes must 