# 2024 Day 9

https://adventofcode.com/2024/day/9

https://adventofcode.com/2024/day/9/input

In [1]:
import re

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

In [2]:
test_text = "2333133121414131402"

In [3]:
text = open('input-09.txt').read().strip()

In [4]:
class Disk:
    def __init__(self, text):
        self.values = values = np.array(list(text)).astype('u8')
        
        self.file_sizes = file_sizes = values[::2]
        self.gaps = gaps = values[1::2]
        self.start_size = start_size = values.sum()
        
        file_ids = np.repeat(np.nan, start_size)
        index = 0
        for (i_file, (fs, g)) in enumerate(zip(file_sizes, gaps)):
            file_ids[index : index + fs] = i_file
            index += fs + g
        file_ids[index:] = i_file + 1
            
        self.file_ids = file_ids.copy()
        self.start_ids = file_ids.copy()
        
    def compress(self):
        """
        If there's an opening further left, swap chars.
        """
        file_ids = self.file_ids
        idx_avail = np.where(np.isnan(self.start_ids))[0]
        idx_filled = np.where(~np.isnan(self.start_ids))[0]
        for (i_avail, i_filled) in zip(idx_avail, reversed(idx_filled)):
            if i_filled <= i_avail:
                break
            file_ids[i_avail], file_ids[i_filled] = (file_ids[i_filled], file_ids[i_avail])
        return self

    def compress_defrag(self):
        """
        If there's an open range further left, swap ranges.
        """
        file_ids = self.file_ids
        ranges_filled = list(reversed(self.get_filled_ranges()))
        for (f0, f1) in ranges_filled:
            need = f1 - f0
            for (a0, a1) in self.get_avail_ranges():
                if a1 > f0:
                    break
                avail = a1 - a0
                if need <= avail:
                    file_ids[a0:a0+need] = file_ids[f0:f0+need]
                    file_ids[f0:f0+need] = np.nan
                    break
        return self
            
    def checksum(self):
        file_ids = self.file_ids
        
        mask = ~np.isnan(file_ids)
        rang = np.arange(len(file_ids))
        return int(np.dot(file_ids[mask], rang[mask]))
    
    def get_avail_ranges(self):
        """
        Get each (a0, a1) such that all i in range(a0, a1) are nan.
        """
        nan_mask = np.isnan(self.file_ids)
        edges = np.diff(np.r_[0, nan_mask.astype(int), 0])

        nan_starts = np.where(edges == 1)[0]
        nan_ends = np.where(edges == -1)[0]
        return list(zip(nan_starts, nan_ends))
    
    def get_filled_ranges(self):
        """
        Get each (f0, f1) such that i in range(f0, f1) have the same value.
        """
        top = int(np.nanmax(self.file_ids))
        out = []
        for i in range(top + 1):
            idx = np.where(self.file_ids == i)[0]
            out.append((min(idx), max(idx) + 1))
        return out

## Part 1

In [5]:
d = Disk(test_text).compress()
d.checksum()

1928

In [6]:
d = Disk(text).compress()
d.checksum()

6367087064415

## Part 2

In [7]:
d = Disk(test_text).compress_defrag()
d.checksum()

2858

In [8]:
d = Disk(text).compress_defrag()
d.checksum()

6390781891880