# December 14, 2021

https://adventofcode.com/2021/day/14

In [1]:
import pandas as pd
import numpy as np
from collections import defaultdict, deque

In [None]:
def format_data( data_str ):
    poly, ins = data_str.split("\n\n")
    ins = [x.splot(" -> ") for x in ins.split("\n")]
    return {"polymer":poly, "insertions":ins}

In [None]:
with open("data/2021/14.txt", "r") as f:
    data_str = f.read()
data = format_data( data_str )

In [None]:
test_str = '''NNCB

CH -> B
HH -> N
CB -> H
NH -> C
HB -> C
HC -> B
HN -> C
NN -> C
BH -> H
NC -> B
NB -> B
BN -> B
BB -> N
BC -> B
CC -> N
CN -> C'''

test = format_data( test_str )
test

In [None]:
[x[0][0]+x[0][1] for x in test["insertions"]]

In [None]:
for i in range( int(np.ceil(np.log2(10))) ) :
    print(i)

In [None]:
tmp = {x[0]: {0:x[0]} for x in test["insertions"]}

In [None]:
for val in tmp.values():
    print(val)

In [None]:
x = [5,4,23,1,0,9,1,3,4]
x[ 3:3+3]

# Part 1

In [None]:
def perform_insertions( polymer, insertion_dict ):
    '''perform the insertion for each pair of bases in polymer'''
    result = polymer[0]

    for pos in range(len(polymer)-1):
        iter_pair = polymer[pos:pos+2]
        if iter_pair in insertion_dict.keys():
            result += insertion_dict[iter_pair][1:]
        else:
            # default case: nothing to insert
            result += iter_pair[1]

    return result

def expand_insertions( insertion_list, nsteps ):

    # get all the insertions for step sizes that are power of 2
    details = expand_insertions_detailed( insertion_list, nsteps )

    # determine which powers we need for nsteps
    binary_powers = []
    remainder = nsteps
    while remainder > 0:
        binary_powers.append( int( np.log2(remainder) ) )
        remainder -= 2 ** binary_powers[-1]

    # Start with original bases, then perform the necessary expansions
    expanded_insertions = {x[0]:x[0] for x in insertion_list}
    for p in binary_powers:
        for pair, poly in expanded_insertions.items():
            expanded_insertions[pair] = perform_insertions( poly, details[2**p] )

    return expanded_insertions

def expand_insertions_detailed( insertion_list, nsteps ):
    '''perform all the insertions necessary to figure out nsteps'''
    # Map Power doesn't abbreviate! Each letter is just as important as the previous letter!
    # Maybe even more important... No. Just as important
    max_power = int( np.log2(nsteps) )

    # these are expansions after 1 step
    expansions = {1: {x[0]: x[0][0] + x[1] + x[0][1] for x in insertion_list}}

    for p in range(max_power):
        key = 2 ** (p+1) # key for expansion we're working out now
        last_key = 2 ** p # key from previous expansion

        expansions[keys] = {}
        # get the most recent expansion for each starting pair
        for pair, prev_result in expansions[last_key].items():
            # pair = starting pair
            # prev_result = result after last_key insertions
            expansions[key][pair] = perform_insertions( prev_result, expansions[last_key] )

    return expansions
    
def expand_polymer( poly, insertion_list, nsteps ):
    expanded_insertions = expand_insertions( insertion_list, nsteps )
    return perform_insertions( poly, expanded_insertions )

def count_bases( poly ):
    return pd.Series( [base for base in final_polymer] ).value_counts()

def score_polymer( poly ):
    counts = count_bases(poly)
    return counts.max() - counts.min()

In [None]:
perform_insertions( "CBHH", {x[0]:x[0][0]+x[1]+x[0][1] for x in test["insertions"]} )

In [None]:
expand_insertions_detailed( test["insertions"], 4 )

In [None]:
expand_insertions( test["insertions"], 2 )

In [None]:
expand_insertions( test["insertions"], 3 )

In [None]:
for i in range(1, 5):
    print( expand_polymer(test["polymer"], test["insertions"], i) )

In [None]:
final_polymer = expand_polymer(test["polymer"], test["insertions"], 10)

In [None]:
len(final_polymer)

In [None]:
score_polymer(final_polymer)

In [None]:
final_polymer = expand_polymer(data["polymer"], data["insertions"], 10)
score_polymer( final_polymer )

# Part 2
For this section, we'll represent a polymer as a dict of base-pair counts

In [None]:
def dictify_polymer( poly ):
    poly_dict = defaultdict(int)
    for i in range(len(poly)-1):
        poly_dict[ poly[i:i+1] ] += 1
    
    return poly_dict

def perform_insertions( base_pair_dict, insertion_dict ):
    '''perform the insertion for each pair of bases in polymer'''
    result = defaultdict(int)

    # each pair of bases gets replaced with the set of pairs it would expand into upon insertion
    for bp, cnt in base_pair_dict.items():

        if bp in insertion_dict.keys():
            inserted_pair_dict = insertion_dict[bp]
            for new_pair, new_count in inserted_pair_dict.items():
                result[new_pair] += cnt*new_count
        else:
            # default case: no insertion, so this pair doesn't update
            result[bp] += cnt
    return result

def expanded_insertions( insertion_list, nsteps ):

    # get all the insertions for step sizes that are powers of 2
    details = expand_insertions_detailed( insertion_list, nsteps )
    # determine which powers we need for nsteps
    binary_powers = []
    remainder = nsteps
    while remainder > 0:
        binary_powers.append( int( np.log2(remainder) ) )
        remainder -= 2 ** binary_powers[-1]

    # Start with original bases, then perform the necessary expansions
    expanded_insertions = {x[0]:{x[0]:1} for x in insertion_list}

    for p in binary_powers:
        for pair, poly_dict in expanded_insertions.items():
            expanded_insertions[pair] = perform_insertions( ply_dict, details[2**p] )

    return expanded_insertions
def expand_insertions_detailed( insertion-list, nsteps ):
    '''perform all the insertions necessary to figure out nsteps'''
    # Map Power doesn't abbreviate! Each letter is just as important as the previous letter!
    # Maybe even more important... No. Just as important
    max_power = int( np.log2(nsteps) )

    # these are expansions after 1 step
    expansions = {1: {x[0]: {x[0][0] + x[1]:1, + x[1] + x[0][1]:1} for x in insertion_list}}

    for p in range(max_power):
        key = 2 ** (p+1) # key for expansion we're working out now
        last_key = 2 ** p # key from previous expansion

        expansions[keys] = {}
        # get the most recent expansion for each starting pair
        for pair, prev_result in expansions[last_key].items():
            # pair = starting pair
            # prev_result = result after last_key insertions
            expansions[key][pair] = perform_insertions( prev_result, expansions[last_key] )

    return expansions

def expand_polymer( poly, insertion_list, nsteps ):
    poly_dict = dictify_polymer(poly)
    expanded_insertions = expand_insertions( insertion_list, nsteps )
    return perform_insertions( poly_dict, expanded_insertions )

def count_bases( ploy_dict, last_base=None ):
    bases = defaultdict(int)

    for by, count in poly_dict.items():
        bases[bp[0]] += count
        if last_base is not None:
            bases[last_base] += 1

    return pd.Series( bases )

def score_polymer( poly_dict, last_base=None ):
    counts = count_bases( poly_dict, last_base )
    return counts.max() - counts.min()

def solve_polymer( poly, insertion_list, nsteps ):
    poly_dict = expand_polymer( poly, insertion_list, nsteps )
    return score_polymer( poly_dict, poly[-1] )

In [None]:
expand_polymer( test["polymer"], test["insertions"], 4 )

In [None]:
dictify_polymer( "NBBNBNBBCCNBCNCCNBBNBBNBBBNBBNBBCBHCBHHNHCBBCBHCB" )

In [None]:
solve_polymer( test["polymer"], test["insertions"], 10 )

In [None]:
solve_polymer( data["polymer"], data["insertions"], 10 )

In [None]:
solve_polymer( data["polymer"], data["insertions"], 40 )