In [1]:
from __future__  import annotations
from collections import Counter, defaultdict, namedtuple, deque
from itertools   import permutations, combinations, cycle, product, islice, chain, repeat
from functools   import lru_cache
from typing      import Dict, Tuple, Set, List, Iterator, Optional
from sys         import maxsize

import ast
import math
import operator
import re

import numpy as np

In [2]:
Food = namedtuple("Food", "ingredients allergens")

def read_data(input: str, parser=str, sep='\n', testing=False) -> list:
    if testing:
        sections = input.split(sep)
    else:
        sections = open(input).read().split(sep)
    return [parser(section) for section in sections]

In [3]:
def parse_expr(line) -> Food: 
    "Parse an expression: mxmxvkd kfcds sqjhc nhms (contains dairy, fish) => Food(ingredients={'sqjhc', 'mxmxvkd', 'nhms', 'kfcds'}, allergens={'dairy', 'fish'})."
    allergens = re.findall(r'.*\(contains (.*)\)', line).pop().split(", ")
    ingredients = re.findall(r'^([a-z\s]*)\(?.*', line).pop().split()
    return Food(set(ingredients), set(allergens))

parse_expr("mxmxvkd kfcds sqjhc nhms (contains dairy, fish)")

Food(ingredients={'kfcds', 'mxmxvkd', 'nhms', 'sqjhc'}, allergens={'dairy', 'fish'})

In [4]:
test_string = """mxmxvkd kfcds sqjhc nhms (contains dairy, fish)
trh fvjkl sbzzf mxmxvkd (contains dairy)
sqjhc fvjkl (contains soy)
sqjhc mxmxvkd sbzzf (contains fish)"""
test_ins = read_data(test_string, parser=parse_expr, sep="\n", testing=True)


Part I  

Determine which ingredients cannot possibly contain any of the allergens in your list. How many times do any of those ingredients appear?

In [5]:
def get_possible_allergens(foods: List[Food]) -> dict:
    possible_allergens = defaultdict(set)

    for food in foods:
        for allergen in food.allergens:
            if allergen in possible_allergens:
                possible_allergens[allergen] = possible_allergens[allergen] & food.ingredients
            else:
                possible_allergens[allergen] = food.ingredients
    
    return possible_allergens

In [6]:
def run_part1(input: List[Food]):
    possible_allergens = get_possible_allergens(input)
    return sum(len(food.ingredients - set(chain.from_iterable(possible_allergens.values()))) for food in input)

In [7]:
run_part1(test_ins)

5

In [8]:
real_ins = read_data("input.txt", parser=parse_expr, sep="\n")
run_part1(real_ins)

2162

Part II

What is your canonical dangerous ingredient list?

In [9]:
def eliminate_ingredient(possible_allergens: dict, ingredient: str):
    for allergen in possible_allergens.keys():
        # import pdb; pdb.set_trace()
        possible_allergens[allergen] = possible_allergens[allergen] - {ingredient}

def run_part2(input: List[Food]):
    possible_allergens = get_possible_allergens(input)
    all_allergens = set(possible_allergens.keys())
    final_allergens = {}
    while all_allergens:
        for allergen in possible_allergens.keys():
            ingredients = possible_allergens[allergen]
            if len(ingredients) == 1:
                ingredient = possible_allergens[allergen].pop()
                all_allergens.remove(allergen)
                final_allergens[allergen] = ingredient
                eliminate_ingredient(possible_allergens, ingredient)
    return ",".join(v for k, v in sorted(final_allergens.items(), key=lambda x: x[0]))

In [10]:
run_part2(test_ins)

'mxmxvkd,sqjhc,fvjkl'

In [11]:
run_part2(real_ins)

'lmzg,cxk,bsqh,bdvmx,cpbzbx,drbm,cfnt,kqprv'