# Utils

> Various useful pieces

In [None]:
#| default_exp utils

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()
from nbdev.showdoc import *

In [None]:
#| export
import os
from pydantic_settings import BaseSettings
from pathlib import Path

# Load environment variables from .env file
from dotenv import load_dotenv; load_dotenv();

## Set up logging

In [None]:
#| export

import logging
import time
import colorlog

In [None]:
#| exporti

# Store the last log time globally
_last_log_time = time.time()

class TimeDeltaLogFormatter(colorlog.ColoredFormatter):
    """Custom formatter that shows time delta since last log message instead of timestamp."""

    def format(self, record: logging.LogRecord) -> str:
        global _last_log_time
        current_time = time.time()
        delta = current_time - _last_log_time
        _last_log_time = current_time

        # Add the delta as a field to the record
        record.delta = f"{delta:.3f}"

        # Convert the pathname to be relative to lovely-docs.
        if hasattr(record, "pathname"):
            if "ipykernel" in record.pathname:
                record.pathname = "<ipykernel>"
            if "lovely-docs/" in record.pathname:
                record.pathname = record.pathname.split("lovely-docs/")[1]

        return super().format(record)


def setup_logging() -> None:
    """Set up logging for the application."""

    # Known flooders
    logging.getLogger("httpx").setLevel(logging.WARNING)
    # logging.getLogger("anthropic").setLevel(logging.WARNING)
    logging.getLogger("httpcore").setLevel(logging.WARNING)

    # Reset handlers to avoid duplicate logs
    logger = logging.getLogger()
    for handler in logger.handlers[:]:  # Make a copy of the list
        logger.removeHandler(handler)

    # logger.setLevel(logging.DEBUG)

    console_handler = logging.StreamHandler()

    # Define color scheme
    log_colors = {
        'DEBUG': 'cyan',
        'INFO': 'green',
        'WARNING': 'yellow',
        'ERROR': 'red',
        'CRITICAL': 'red,bg_white',
    }

    # Create formatter with time delta and colors
    formatter = TimeDeltaLogFormatter(
        "+%(delta)ss %(log_color)s%(levelname)s%(reset)s %(blue)s%(pathname)s:"
        "%(lineno)d%(reset)s %(funcName)s %(message)s",
        log_colors=log_colors
    )

    console_handler.setFormatter(formatter)

    # Add handler to logger
    logger.addHandler(console_handler)

In [None]:
#| exporti

setup_logging()

## Settings

In [None]:
#| export

class Source(BaseSettings):
    name: str
    doc_dir: Path


class GitSource(Source):
    repo: str
    commit: str

class WebSource(Source):
    pass


class Settings(BaseSettings):
    model: str = "claude-haiku-4.5"
    git_dir: Path = Path("../git_dir") # Cloned git repos will be here
    output_dir: Path = Path("../processed_documents/")
    templates_dir: Path = Path("../templates")
    project_root: Path = Path(__file__).parent.parent if "__file__" in globals() else Path.cwd()
    sources: dict[str, Source] = {
        "sveltejs/svelte": GitSource(
            name="sveltejs-svelte",
            doc_dir="documentation/docs",
            repo="https://github.com/sveltejs/svelte",
            commit="main")
    }
    api_key: str = os.getenv("ANTHROPIC_API_KEY", "")


settings = Settings()

## Working with git

In [None]:
#| export

from git import Repo, InvalidGitRepositoryError, NoSuchPathError

In [None]:
#| export

def git_progress(op_code, cur_count, max_count=None, message=''):
    if max_count:
        print(f"\r{op_code}: {cur_count}/{max_count} {message}", end='', flush=True)
    else:
        print(f"\r{op_code}: {cur_count} {message}", end='', flush=True)


def clone_repo(source: GitSource):
    """Clone a git repository if it doesn't exist, or ensure an existing one is clean and at the specified commit.

    Args:
        source (GitSource): Configuration object containing the repository URL, name, and target commit.

    Returns:
        None

    Will raise exceptions on errors
    """
    clone_dir = settings.git_dir / source.name

    try:
        repo = Repo(clone_dir)
        repo.git.clean('-fdq') # Just in case
    except (InvalidGitRepositoryError, NoSuchPathError):
        repo = Repo.clone_from(source.repo, clone_dir, progress=git_progress)

    repo.git.reset('--hard', source.commit)
    return repo.head.commit.hexsha