In [24]:
%load_ext autoreload
%autoreload 2

import numpy as np
import os, sys 
sys.path.append('..')
import collections
import copy
import itertools
import aoc_utils as au
from tqdm import tqdm as tqdm


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [25]:
input_text = au.read_txt_file_lines('input.txt')
n_rows = len(input_text)
# n_cols = len(input_text[0])
# for ii in range(1, n_rows):
#     assert len(input_text[ii]) == n_cols, f'row {ii} has {len(input_text[ii])} cols, not {n_cols}'
# print(f'input has {n_rows} rows and {n_cols} cols')
input_text[:5]

['??#.?#?#??? 1,3,1',
 '?????.??.???. 1,1,1',
 '??????.??..? 2,1,2',
 '.??#???.??? 3,1,1',
 '??##?#?????.. 5,1']

In [26]:
data_row = collections.namedtuple('data_row', 'locations counts')
list_data_rows = []
list_n_qs = []
for row in input_text:
    assert '?' in row
    tmp = row.split(' ')
    assert len(tmp) == 2
    locations = tmp[0] 
    counts = [int(x) for x in tmp[1].split(',')] 
    # print(locations, counts)
    list_data_rows.append(data_row(locations, counts))    
    list_n_qs.append(locations.count('?'))
list_data_rows[:5]

[data_row(locations='??#.?#?#???', counts=[1, 3, 1]),
 data_row(locations='?????.??.???.', counts=[1, 1, 1]),
 data_row(locations='??????.??..?', counts=[2, 1, 2]),
 data_row(locations='.??#???.???', counts=[3, 1, 1]),
 data_row(locations='??##?#?????..', counts=[5, 1])]

In [30]:
def place_hashes(size, count):
    '''with help from https://stackoverflow.com/questions/43816965/permutation-without-duplicates-in-python'''
    for positions in itertools.combinations(range(size), count):
        p = ['.'] * size

        for i in positions:
            p[i] = '#'

        yield p

def check_sequence_correct(seq, counts):
    ## assert seq is a string of .s and #s 
    assert type(seq) == str
    assert set(seq).issubset(set('.#'))

    ## check that the number of #s in seq matches the counts
    seq_split = seq.split('.') 
    seq_count = [len(x) for x in seq_split if len(x) > 0]
    return seq_count == counts

def get_n_perms_per_seq(seq, counts, verbose=0):
    assert '?' in seq
    assert type(seq) == str
    assert set(seq).issubset(set('.#?'))

    n_hash = seq.count('#')
    n_q = seq.count('?')
    n_hash_counts = sum(counts) - n_hash
    n_dot_counts = n_q - n_hash_counts
    assert n_dot_counts >= 0, f'{n_dot_counts} < 0, {seq}, {counts}, {n_q}, {n_hash_counts}'
    n_perms = 0
    if verbose:
        print(f'{n_dot_counts} dots, {n_hash_counts} hashes, {n_q} qs')
    
    n_loop = 0 
    for new_elements in place_hashes(n_q, n_hash_counts):
        n_loop += 1
        ## replace the ?s with the seq_try
        seq_try = list(seq)
        for ii, char in enumerate(new_elements):
            seq_try[seq_try.index('?')] = char
        seq_try = ''.join(seq_try)
        if verbose:
            print(new_elements)
        if check_sequence_correct(seq_try, counts):
            if verbose:
                print(seq_try, seq, counts)
            n_perms += 1
    if verbose:
        print(f'done in {n_loop} loops, {n_perms} perms')
    return n_perms

def get_n_perms_per_row(row):
    assert type(row) == data_row
    return get_n_perms_per_seq(row.locations, row.counts)

n_perm_total = 0 
for row in list_data_rows:
    # print(n_perm_total)
    n_perm_total += get_n_perms_per_row(row)
print(n_perm_total)
# get_n_perms_per_seq(list_data_rows[0].locations, list_data_rows[0].counts)  

6935


# part 2 
tricky .. 
**Could start going through special cases? **
- (Note that the method above could be a lot faster if special cases were included via if statements, just to get down number of perms.. but is considerable effort)
- If sequence ends with two `.`s, then the added `?` will always be a `.`, and hence, the total number of sequence perms will be the original number of perms `** 5`. 
- If not, then there will still be repetition! 4 times the same thing, plus an ending without extra `?` (plus spill-over into next sequence). 
- So maybe we can split up the new combined sequence in 2 parts: the first sequence + spill-over, and the remainder of the fifth part. Solve both. Then `perm_total = perm_1 ** 4 * perm_2`


Okay plan is:
1. Create new data structure, of `seq_1`, `count_1`, `seq_2`, `count_2`. (Merge, split past spill-over. Maybe use special cases of whether start/ends are `?` or not?)
2. Solve both 
3. Combine perms 

**Update:**
- Actually this is not perfect. Eg `..?..?..?.. [1,1]` first has 3 combinations, but in part 2 would have to be re-evaluated as a whole... (leading to (19, 10) permutations)

Plan 2:
- Could we consider trying out all connecting `?` possibilities separately? Those are 16 combinations. 
- I think the issue is the same; we cannot shortcut by considering original sequences separately and combining (the example above still fails). Maybe that example is too extreme.. but it seems kind of fruitless to code this up and run into some annoying sequence like that. 

Plan 3: 
- Going back to the start; can we start by filling in all the spots we know for sure? Ie one forward pass through all (combined) sequences and filling in all `?` that have just one possibility. That might massively bring down number of perms.
- Secondly, if there are (large) contiguous groups of `?`, where we know for sure they must accomodate a large number of `#`, it would be brilliant if we could just take the (n, k) perm number and multiply instead of evaluating every combination of entire sequence .. 

In [31]:
data_row_part2 = collections.namedtuple('data_row_part2', 'locations_1 counts_1 locations_2 counts_2')
list_data_rows_part2 = [] 

for row in list_data_rows:
    print(row)
    start_char = row.locations[0]
    end_char = row.locations[-1] 
    if start_char != '?' and end_char != '?': 
        seq_1 = row.locations + '?'
    

    break

data_row(locations='??#.?#?#???', counts=[1, 3, 1])


In [32]:
'...' + '?'

'...?'