# Advent of code 2022

In [1]:
import re

import pprint

## Part 1

In [2]:
test_input='''$ cd /
$ ls
dir a
14848514 b.txt
8504156 c.dat
dir d
$ cd a
$ ls
dir e
29116 f
2557 g
62596 h.lst
$ cd e
$ ls
584 i
$ cd ..
$ cd ..
$ cd d
$ ls
4060174 j
8033020 d.log
5626152 d.ext
7214296 k'''


In [3]:
with open('data/day07.txt') as fIn:
    puzzle_input=fIn.read()

I note that both the test input and my puzzle input start at `/`, so I'll just go with that.

I'm going to make a couple of simplifying assumptions:
1. Files and directories are only listed after `ls`, so we don't need to parse those lines; just need to look at the `cd`s. It's the case in the test input, I'll assume that it's also true in the puzzle input.
2. Directories only exist if they've been previously identified with `ls`.

In [4]:
def build_file_system(str_in):
    
    file_system={'files':{}, 'dirs':{'..':None}}
    cwd=file_system
    
    for nl in str_in.splitlines()[1:]:
        

        dir_check=re.match('dir (\S+)', nl)
        if dir_check:
            if dir_check.group(1) not in cwd['dirs']:
                cwd['dirs'][dir_check.group(1)]={'files':{}, 'dirs':{'..':cwd}}
        
        file_check=re.match('(\d+)\s+(\S+)', nl)
        if file_check:
            cwd['files'][file_check.group(2)]={'size':int(file_check.group(1))}

        cd_check=re.match('\$ cd (\S+)', nl)
        
        if cd_check:
            cwd=cwd['dirs'][cd_check.group(1)]

    return file_system    

In [5]:
fs=build_file_system(test_input)
pprint.pprint(fs)

{'dirs': {'..': None,
          'a': {'dirs': {'..': <Recursion on dict with id=4572975488>,
                         'e': {'dirs': {'..': <Recursion on dict with id=4572975744>},
                               'files': {'i': {'size': 584}}}},
                'files': {'f': {'size': 29116},
                          'g': {'size': 2557},
                          'h.lst': {'size': 62596}}},
          'd': {'dirs': {'..': <Recursion on dict with id=4572975488>},
                'files': {'d.ext': {'size': 5626152},
                          'd.log': {'size': 8033020},
                          'j': {'size': 4060174},
                          'k': {'size': 7214296}}}},
 'files': {'b.txt': {'size': 14848514}, 'c.dat': {'size': 8504156}}}


Now want to calculate the sizes of the directories:

In [6]:
def calculate_directory_sizes(file_system_in):
    
    out={'dirs':{}, 'size':0}
    
    for d in file_system_in['dirs']:
        if d != '..':
            out['dirs'][d]=calculate_directory_sizes(file_system_in['dirs'][d])
        out['size']=sum([v['size'] for (i, v) in file_system_in['files'].items()]) \
                    + sum([v['size'] for (i, v) in out['dirs'].items()])
    return out

In [7]:
pprint.pprint(calculate_directory_sizes(fs))

{'dirs': {'a': {'dirs': {'e': {'dirs': {}, 'size': 584}}, 'size': 94853},
          'd': {'dirs': {}, 'size': 24933642}},
 'size': 48381165}


Now walk the path and count up all the directories with size<100,000

In [8]:
def dir_size_walk(dir_sizes):
    out=[dir_sizes['size']]
    for d in dir_sizes['dirs'].values():
        out.extend(dir_size_walk(d))
    return out

In [9]:
dir_size_walk(calculate_directory_sizes(fs))

[48381165, 94853, 584, 24933642]

In [10]:
def day07_a(str_in):
    
    ds=dir_size_walk(calculate_directory_sizes(build_file_system(str_in)))

    return sum([i for i in ds if i<100000])

In [11]:
assert day07_a(test_input)==95437

In [12]:
day07_a(puzzle_input)

1845346

## Part 2

Should be OK; I can just reuse the list of values from `dir_size_walk`:

In [13]:
def day07_b(str_in):
    
    sizes=dir_size_walk(calculate_directory_sizes(build_file_system(str_in)))
    
    total_used=max(sizes)
    required_space=30000000-(70000000-total_used)
    
    # And return the size of the smallest directory
    # that's bigger than the required space

    return min([s for s in sizes if s>required_space])

In [14]:
assert day07_b(test_input)==24933642

In [15]:
day07_b(puzzle_input)

3636703