In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
import os, sys 
sys.path.append('..')
import collections
import copy
from functools import cache 
import itertools
import aoc_utils as au
from tqdm import tqdm as tqdm
import math

In [3]:
input_text = au.read_txt_file_lines('input2.txt')
n_rows = len(input_text)
# n_cols = len(input_text[0])
# for ii in range(1, n_rows):
#     assert len(input_text[ii]) == n_cols, f'row {ii} has {len(input_text[ii])} cols, not {n_cols}'
# print(f'input has {n_rows} rows and {n_cols} cols')
input_text[:5]

['???.### 1,1,3',
 '.??..??...?##. 1,1,3',
 '?#?#?#?#?#?#?#? 1,3,1,6',
 '????.#...#... 4,1,1',
 '????.######..#####. 1,6,5']

In [4]:
data_row = collections.namedtuple('data_row', 'locations counts')
list_data_rows = []
list_n_qs = []
for row in input_text:
    assert '?' in row
    tmp = row.split(' ')
    assert len(tmp) == 2
    locations = tmp[0] 
    counts = [int(x) for x in tmp[1].split(',')] 
    # print(locations, counts)
    list_data_rows.append(data_row(locations, counts))    
    list_n_qs.append(locations.count('?'))
list_data_rows[:5]

[data_row(locations='???.###', counts=[1, 1, 3]),
 data_row(locations='.??..??...?##.', counts=[1, 1, 3]),
 data_row(locations='?#?#?#?#?#?#?#?', counts=[1, 3, 1, 6]),
 data_row(locations='????.#...#...', counts=[4, 1, 1]),
 data_row(locations='????.######..#####.', counts=[1, 6, 5])]

In [45]:
def place_hashes(size, count):
    '''with help from https://stackoverflow.com/questions/43816965/permutation-without-duplicates-in-python'''
    for positions in itertools.combinations(range(size), count):
        p = ['.'] * size
        for i in positions:
            p[i] = '#'
        yield p

def place_hashes_with_replacement(size, count):
    '''with help from https://stackoverflow.com/questions/43816965/permutation-without-duplicates-in-python'''
    for positions in itertools.combinations_with_replacement(range(size), count):
        p = ['.'] * size
        for i in positions:
            p[i] = '#'
        yield p

def place_hashes_with_replacement_without_duplicates(size, max_count=5):
    for count in range(np.minimum(size + 1, max_count)):
        for positions in itertools.combinations(range(size), count):
            p = ['.'] * size
            for i in positions:
                p[i] = '#'
            yield p

def check_sequence_correct(seq, counts):
    ## assert seq is a string of .s and #s 
    assert type(seq) == str
    assert set(seq).issubset(set('.#'))

    ## check that the number of #s in seq matches the counts
    seq_split = seq.split('.') 
    seq_count = [len(x) for x in seq_split if len(x) > 0]
    return seq_count == counts

def check_sequence_correct_fast(seq, counts):
    ## check that the number of #s in seq matches the counts
    seq_split = seq.split('.') 
    seq_count = [len(x) for x in seq_split if len(x) > 0]
    return seq_count == counts

def check_sequence_correct_faster(seq, counts):
    seq_split = seq.split('.')
    ii = 0
    for x in seq_split:
        if len(x) == 0:
            continue 
        if ii >= len(counts) or len(x) != counts[ii]:
            return False
        ii += 1
    return True

# def check_sequence_correct_list(seq, counts):
#     '''list of 0s and 1s'''
#     group_counts = [sum(1 for _ in group) for key, group in itertools.groupby(seq) if key == '#']
#     return group_counts == counts

def check_sequence_correct_01(seq, counts):
    '''list of 0s and 1s'''
    group_counts = [sum(1 for _ in group) for key, group in itertools.groupby(seq) if key == 1]
    return group_counts == counts

def get_n_perms_per_seq(seq, counts, verbose=0):
    assert '?' in seq
    assert type(seq) == str
    assert set(seq).issubset(set('.#?'))

    n_hash = seq.count('#')
    n_q = seq.count('?')
    n_hash_counts = sum(counts) - n_hash  ## number of #s that need to be placed
    n_dot_counts = n_q - n_hash_counts  ## number of .s that need to be placed
    assert n_dot_counts >= 0, f'{n_dot_counts} < 0, {seq}, {counts}, {n_q}, {n_hash_counts}'
    n_perms = 0
    if verbose:
        print(f'{n_dot_counts} dots, {n_hash_counts} hashes, {n_q} qs')
    
    for new_elements in place_hashes(n_q, n_hash_counts):  # generator of all possible combinations of #s and .s without duplicates 
        seq_try = list(seq)
        for ii, char in enumerate(new_elements):
            seq_try[seq_try.index('?')] = char
        seq_try = ''.join(seq_try)
        if check_sequence_correct_fast(seq_try, counts):
            n_perms += 1
    return n_perms

def get_n_perms_per_row(row):
    assert type(row) == data_row
    return get_n_perms_per_seq(row.locations, row.counts)

n_perm_total = 0 
for row in list_data_rows:
    # print(n_perm_total)
    n_perm_total += get_n_perms_per_row(row)
print(n_perm_total)
# get_n_perms_per_seq(list_data_rows[0].locations, list_data_rows[0].counts)  

21


In [5]:
check_sequence_correct_fast('##..#....###...##....###.##.##', [2, 1])

False

In [6]:
check_sequence_correct_faster('##..#....###...##....###.##.##', [2, 1])

False

In [5]:
# @cache
def check_sequence_correct_fast_set(seq, counts):
    ## check that the number of #s in seq matches the counts
    seq_split = seq.split('.') 
    seq_count = set([len(x) for x in seq_split if len(x) > 0])
    return seq_count == counts

# part 2 
tricky .. 
**Could start going through special cases? **
- (Note that the method above could be a lot faster if special cases were included via if statements, just to get down number of perms.. but is considerable effort)
- If sequence ends with two `.`s, then the added `?` will always be a `.`, and hence, the total number of sequence perms will be the original number of perms `** 5`. 
- If not, then there will still be repetition! 4 times the same thing, plus an ending without extra `?` (plus spill-over into next sequence). 
- So maybe we can split up the new combined sequence in 2 parts: the first sequence + spill-over, and the remainder of the fifth part. Solve both. Then `perm_total = perm_1 ** 4 * perm_2`


Okay plan is:
1. Create new data structure, of `seq_1`, `count_1`, `seq_2`, `count_2`. (Merge, split past spill-over. Maybe use special cases of whether start/ends are `?` or not?)
2. Solve both 
3. Combine perms 

**Update:**
- Actually this is not perfect. Eg `..?..?..?.. [1,1]` first has 3 combinations, but in part 2 would have to be re-evaluated as a whole... (leading to (19, 10) permutations)

Plan 2:
- Could we consider trying out all connecting `?` possibilities separately? Those are 16 combinations. 
- I think the issue is the same; we cannot shortcut by considering original sequences separately and combining (the example above still fails). Maybe that example is too extreme.. but it seems kind of fruitless to code this up and run into some annoying sequence like that. 

Plan 3: 
- Going back to the start; can we start by filling in all the spots we know for sure? Ie one forward pass through all (combined) sequences and filling in all `?` that have just one possibility. That might massively bring down number of perms.
- Secondly, if there are (large) contiguous groups of `?`, where we know for sure they must accomodate a large number of `#`, it would be brilliant if we could just take the (n, k) perm number and multiply instead of evaluating every combination of entire sequence .. 

Plan 4:
- Go through the combined seq, holding ONLY 3 counts in memory. (Starting with the first 3). You only need to consider `sum(counts[:3])` existing `#` in seq. 
- See how many combis work.
- Save those (number of perms) and go through sequence, multiplying cached n perms as you go. 

In [6]:
list_data_rows_part2 = []
n_repeats = 5
for row in list_data_rows:
    locations = '?'.join([row.locations] * n_repeats)
    counts = row.counts * n_repeats 
    list_data_rows_part2.append(data_row(locations, counts))
list_data_rows_part2[:5]


[data_row(locations='???.###????.###????.###????.###????.###', counts=[1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3]),
 data_row(locations='.??..??...?##.?.??..??...?##.?.??..??...?##.?.??..??...?##.?.??..??...?##.', counts=[1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3]),
 data_row(locations='?#?#?#?#?#?#?#???#?#?#?#?#?#?#???#?#?#?#?#?#?#???#?#?#?#?#?#?#???#?#?#?#?#?#?#?', counts=[1, 3, 1, 6, 1, 3, 1, 6, 1, 3, 1, 6, 1, 3, 1, 6, 1, 3, 1, 6]),
 data_row(locations='????.#...#...?????.#...#...?????.#...#...?????.#...#...?????.#...#...', counts=[4, 1, 1, 4, 1, 1, 4, 1, 1, 4, 1, 1, 4, 1, 1]),
 data_row(locations='????.######..#####.?????.######..#####.?????.######..#####.?????.######..#####.?????.######..#####.', counts=[1, 6, 5, 1, 6, 5, 1, 6, 5, 1, 6, 5, 1, 6, 5])]

In [7]:
# data_row_part2 = collections.namedtuple('data_row_part2', 'locations_1 counts_1 locations_2 counts_2')
# list_data_rows_part2 = [] 

# for row in list_data_rows:
#     print(row)
#     start_char = row.locations[0]
#     end_char = row.locations[-1] 
#     if start_char != '?' and end_char != '?': 
#         seq_1 = row.locations + '?'
    

#     break

In [8]:

def get_n_perms_per_seq_part2(seq, counts, verbose=0):
    assert '?' in seq
    assert type(seq) == str
    assert set(seq).issubset(set('.#?'))

    n_hash = seq.count('#')
    n_q = seq.count('?')
    n_hash_counts = sum(counts) - n_hash  ## number of #s that need to be placed
    n_dot_counts = n_q - n_hash_counts  ## number of .s that need to be placed
    assert n_dot_counts >= 0, f'{n_dot_counts} < 0, {seq}, {counts}, {n_q}, {n_hash_counts}'
    n_perms = 0
    if verbose:
        print(f'{n_dot_counts} dots, {n_hash_counts} hashes, {n_q} qs')
    n_combis = math.factorial(n_q) / (math.factorial(n_hash_counts) * math.factorial(n_dot_counts))
    print(f'{n_combis:.2e} combinations')
    if n_combis > 1e8:
        print('too many combinations')
        return None
    for new_elements in place_hashes(n_q, n_hash_counts):  # generator of all possible combinations of #s and .s without duplicates 
        seq_try = list(seq)
        for ii, char in enumerate(new_elements):
            seq_try[seq_try.index('?')] = char
        seq_try = ''.join(seq_try)
        if check_sequence_correct_fast(seq_try, counts):
            n_perms += 1
    return n_perms

In [7]:
list_data_rows_part2[:5]

[data_row(locations='???.###????.###????.###????.###????.###', counts=[1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3]),
 data_row(locations='.??..??...?##.?.??..??...?##.?.??..??...?##.?.??..??...?##.?.??..??...?##.', counts=[1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3, 1, 1, 3]),
 data_row(locations='?#?#?#?#?#?#?#???#?#?#?#?#?#?#???#?#?#?#?#?#?#???#?#?#?#?#?#?#???#?#?#?#?#?#?#?', counts=[1, 3, 1, 6, 1, 3, 1, 6, 1, 3, 1, 6, 1, 3, 1, 6, 1, 3, 1, 6]),
 data_row(locations='????.#...#...?????.#...#...?????.#...#...?????.#...#...?????.#...#...', counts=[4, 1, 1, 4, 1, 1, 4, 1, 1, 4, 1, 1, 4, 1, 1]),
 data_row(locations='????.######..#####.?????.######..#####.?????.######..#####.?????.######..#####.?????.######..#####.', counts=[1, 6, 5, 1, 6, 5, 1, 6, 5, 1, 6, 5, 1, 6, 5])]

In [11]:
get_n_perms_per_seq_part2(list_data_rows_part2[2].locations, list_data_rows_part2[2].counts)    

1.76e+12 combinations
too many combinations


In [20]:
for ii in place_hashes_with_replacement(5, 5):
    print(ii)
    

['#', '.', '.', '.', '.']
['#', '#', '.', '.', '.']
['#', '.', '#', '.', '.']
['#', '.', '.', '#', '.']
['#', '.', '.', '.', '#']
['#', '#', '.', '.', '.']
['#', '#', '#', '.', '.']
['#', '#', '.', '#', '.']
['#', '#', '.', '.', '#']
['#', '.', '#', '.', '.']
['#', '.', '#', '#', '.']
['#', '.', '#', '.', '#']
['#', '.', '.', '#', '.']
['#', '.', '.', '#', '#']
['#', '.', '.', '.', '#']
['#', '#', '.', '.', '.']
['#', '#', '#', '.', '.']
['#', '#', '.', '#', '.']
['#', '#', '.', '.', '#']
['#', '#', '#', '.', '.']
['#', '#', '#', '#', '.']
['#', '#', '#', '.', '#']
['#', '#', '.', '#', '.']
['#', '#', '.', '#', '#']
['#', '#', '.', '.', '#']
['#', '.', '#', '.', '.']
['#', '.', '#', '#', '.']
['#', '.', '#', '.', '#']
['#', '.', '#', '#', '.']
['#', '.', '#', '#', '#']
['#', '.', '#', '.', '#']
['#', '.', '.', '#', '.']
['#', '.', '.', '#', '#']
['#', '.', '.', '#', '#']
['#', '.', '.', '.', '#']
['#', '#', '.', '.', '.']
['#', '#', '#', '.', '.']
['#', '#', '.', '#', '.']
['#', '#', '

- split combined seq up in segments split by `.`
- for each, compute possible list of `#` counts (with cache) => create a namedtuple that has `(counts, n_combs)` (eg both `#..#` and `#.#.` make `counts=[1,1], n_combs=2)`)
- discard any of these where `max(counts) > max(seq_counts)`
- (maybe: discard if pattern does NOT occur in seq)
- Then, start combining all possible segment counts and see which ones work. Multiply `n_combs` of each. (In some kind of tree/recursive manner: start from front and discard each segment once its a mismatch). Maybe there is a quick look up table? Something like, once you have matched the first 3, pop them from the front of the seg_count list, and then match the next first elements. (so `seg_count == seq_count[:len(seg_count)]`)
- 

In [46]:
counts_possibilities = collections.namedtuple('counts_possibilities', 'counts n_possible')

def lookup_all_count_possibilities_sequence(seq, max_total_count=5):
    seq_split = seq.split('.') 
    list_seg_counts = []
    n_hash = seq.count('#')
    for segment in seq_split:
        if len(segment) > 0:
            list_possible_counts_segment = possible_counts_segment(segment, max_count=max_total_count - n_hash)
            # print(list_possible_counts_segment)
            list_seg_counts.append(list_possible_counts_segment)
    return list_seg_counts

@cache        
def possible_counts_segment(segment, max_count=5):
    n_hash = segment.count('#')
    n_q = segment.count('?')
    dict_pos = {}
    
    for new_elements in place_hashes_with_replacement_without_duplicates(n_q, max_count=max_count):
        segment_try = list(segment)
        for ii, char in enumerate(new_elements):
            segment_try[segment_try.index('?')] = char
        segment_try = ''.join(segment_try)
        n_hash_counts = [len(x) for x in segment_try.split('.') if len(x) > 0]
        n_hash_counts = '-'.join([str(x) for x in n_hash_counts])
        if n_hash_counts not in dict_pos:
            dict_pos[n_hash_counts] = 1 
        else:
            dict_pos[n_hash_counts] += 1 
    seg_count = []
    for key in dict_pos:
        if key == '':
            counts = []
        else:
            counts = [int(x) for x in key.split('-')]
        seg_count.append(counts_possibilities(counts, dict_pos[key]))
    return seg_count
    
def calculate_possibilities_count_sequence(seq, count):
    list_seg_counts = lookup_all_count_possibilities_sequence(seq)
    n_possibilities = 0 
    n_ops = np.prod([len(x) for x in list_seg_counts])


    return n_possibilities

In [54]:
# list_n_ops = []
# for row in tqdm(list_data_rows_part2):
#     tmp = lookup_all_count_possibilities_sequence(row.locations)
#     n_ops = np.prod([len(x) for x in tmp])
#     list_n_ops.append(n_ops)
i_row = 4
tmp = lookup_all_count_possibilities_sequence(list_data_rows_part2[i_row].locations, 
                                              max_total_count=sum(list_data_rows_part2[i_row].counts))

In [56]:
print([len(x) for x in tmp], np.prod([len(x) for x in tmp]))

[8, 1, 1, 12, 1, 1, 12, 1, 1, 12, 1, 1, 12, 1, 1] 165888


In [57]:
list_data_rows_part2[i_row]

data_row(locations='????.######..#####.?????.######..#####.?????.######..#####.?????.######..#####.?????.######..#####.', counts=[1, 6, 5, 1, 6, 5, 1, 6, 5, 1, 6, 5, 1, 6, 5])

In [58]:
possible_counts_segment('????')

[counts_possibilities(counts=[], n_possible=1),
 counts_possibilities(counts=[1], n_possible=4),
 counts_possibilities(counts=[2], n_possible=3),
 counts_possibilities(counts=[1, 1], n_possible=3),
 counts_possibilities(counts=[3], n_possible=2),
 counts_possibilities(counts=[2, 1], n_possible=1),
 counts_possibilities(counts=[1, 2], n_possible=1),
 counts_possibilities(counts=[4], n_possible=1)]