# sveltejs/svelte

> 

In [None]:
#| default_exp sveltejs-svelte

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()
from nbdev.showdoc import *

In [None]:
#| export
from lovely_docs.utils import settings, GitSource, clone_repo
import llm
from llm.models import Usage
from pydantic import BaseModel
from jinja2 import Environment, FileSystemLoader
import asyncio
import json
import shutil
from pathlib import Path as PathLib

In [None]:
source = settings.sources["sveltejs/svelte"]
assert isinstance(source, GitSource)

clone_repo(source)
clone_dir = settings.git_dir / source.name

In [None]:
doc_dir_path = clone_dir / source.doc_dir

def get_files_from_fs(root):
    files = []
    for path in root.rglob('*'):
        if path.is_file():
            rel_path = path.relative_to(root)
            depth = len(rel_path.parts) - 1
            files.append((depth, str(rel_path)))
    files.sort(key=lambda x: (x[0], x[1]))
    return [f[1] for f in files]

files = get_files_from_fs(doc_dir_path)
files[:10]

['index.md',
 '01-introduction/01-overview.md',
 '01-introduction/02-getting-started.md',
 '01-introduction/03-svelte-files.md',
 '01-introduction/04-svelte-js-files.md',
 '01-introduction/index.md',
 '02-runes/01-what-are-runes.md',
 '02-runes/02-$state.md',
 '02-runes/03-$derived.md',
 '02-runes/04-$effect.md']

In [None]:
model = llm.get_async_model(settings.model)
model.key = settings.api_key

# await (await model.prompt("Reply with ok")).text()

In [None]:
# def wrap_doc(doc: str, name: str):
#     return f"""
# <page {name}>
# {doc}
# </page>
# """.strip()

In [None]:
# with open(clone_dir / source.doc_dir / files[2]) as f:
#     doc = f.read()
#     wrapped = wrap_doc(doc, files[2])
# # print(wrapped)

In [None]:
class ReplySchema(BaseModel):
    better_filename: str
    summary_markdown: str
    short_summary_markdown: str
    one_line_summary_txt: str
    relevant: bool


# prompt = f"""
# Read this file, it's part of the documentation for a library. Then:
# - Come up with a short filename that perfectly encompasses the contents. It's ok to keep the original if it's good already.
# - Then write 3 versions of the summary:
#   1. Extract bits that are important to a programmer. Don't include any extra notes. Be concise. One example per use case max.
#   2. Shorter summary, keep only the most important bits.
#   3. One-line summary of the file.
# - Decide if the content of this file is relevant to a programmer who is already somewhat familiar with the library in question.

# \n{wrapped}
# """


In [None]:
# res = await (await model.prompt(prompt=prompt, schema=ReplySchema))
# r = ReplySchema.model_validate_json(await res.text())
# print(r.better_filename)
# print(r.summary_markdown)
# print(r.short_summary_markdown)
# print(r.one_line_summary_txt)
# print(r.relevant)

In [None]:
# await res.usage()

In [None]:

# async def process_doc(filename: str) -> tuple[ReplySchema, Usage]:
#     with open(clone_dir / source.doc_dir / filename) as f:
#         doc = f.read()
#         wrapped = wrap_doc(doc, filename)

#     prompt = f"""
#     Read this file, it's part of the documentation for a library. Then:
#     - Come up with a short filename that perfectly encompasses the contents. It's ok to keep the original if it's good already. Don't incluide the name of the library in the name.
#     - Then write 3 versions of the summary:
#     1. Extract bits that are important to a programmer. Don't include any extra notes. Be concise. One example per use case max.
#     2. Shorter summary, keep only the most important bits.
#     3. One-line summary of the file.
#     - Decide if the content of this file is relevant to a programmer who is already somewhat familiar with the library in question.

#     \n{wrapped}
#     """

#     res = await (await model.prompt(prompt=prompt, schema=ReplySchema))
#     reply = ReplySchema.model_validate_json(await res.text())
#     usage = await res.usage()
#     return reply, usage

In [None]:
# r, u = await process_doc(files[3])
# print(" ---\n", r.better_filename)
# print(" ---\n", r.summary_markdown)
# print(" ---\n", r.short_summary_markdown)
# print(" ---\n", r.one_line_summary_txt)
# print(r.relevant)

In [None]:

# Set up Jinja environment
jinja_env = Environment(loader=FileSystemLoader(settings.templates_dir))

async def process_doc_jinja(filename: str) -> tuple[ReplySchema, Usage]:
    with open(clone_dir / source.doc_dir / filename) as f:
        doc = f.read()

    # Load and render template
    template = jinja_env.get_template("process_doc.j2")
    prompt = template.render(text=doc, filename=filename)

    res = await (await model.prompt(prompt=prompt, schema=ReplySchema))
    reply = ReplySchema.model_validate_json(await res.text())
    usage = await res.usage()
    return reply, usage

In [None]:
# Test the Jinja-based version
r_jinja, u_jinja = await process_doc_jinja(files[3])
print(" ---\n", r_jinja.better_filename)
print(" ---\n", r_jinja.summary_markdown)
print(" ---\n", r_jinja.short_summary_markdown)
print(" ---\n", r_jinja.one_line_summary_txt)
print(r_jinja.relevant)

 ---
 svelte-file-structure.md
 ---
 ## Svelte File Structure

`.svelte` files contain components written as a superset of HTML with three optional sections: `<script>`, `<style>`, and markup.

### `<script>` Block
- Contains JavaScript or TypeScript (add `lang="ts"` attribute)
- Runs when a component instance is created
- Top-level variables are accessible in markup
- Use runes to declare component props and add reactivity

### `<script module>` Block
- Runs once when the module first evaluates, not per instance
- Variables declared here can be referenced elsewhere in the component (one-way)
- Can `export` bindings from this block; these become module exports
- Cannot use `export default` (reserved for the component itself)
- TypeScript support: ensure editor setup recognizes exports (supported by Svelte VS Code extension and IntelliJ plugin)

### `<style>` Block
- CSS is scoped to the component only
- Selectors only affect elements within that component
- Example:
```svelte
<style>
 

In [None]:

class DocPage(BaseModel):
    """Represents a single documentation page"""
    filename: str
    original_path: str
    better_filename: str
    summary_markdown: str
    short_summary_markdown: str
    one_line_summary_txt: str
    relevant: bool
    usage: Usage

class DocDirectory(BaseModel):
    """Represents a directory in the documentation structure"""
    name: str
    path: str
    better_name: str|None = None
    pages: list[DocPage] = []
    subdirs: dict[str, 'DocDirectory'] = {}
    short_summary: str|None = None
    one_line_summary: str|None = None
    usage: Usage|None = None  # Usage for directory summarization only

def build_doc_tree(root_path: Path) -> DocDirectory:
    """Build a tree structure mirroring the documentation directory"""
    def build_tree(path: Path, rel_path: str = "") -> DocDirectory:
        dir_name = path.name if rel_path else "root"
        doc_dir = DocDirectory(name=dir_name, path=rel_path)

        # Get immediate children only
        for item in sorted(path.iterdir()):
            if item.is_file() and item.suffix == '.md':
                # We'll process files later, just record them
                doc_dir.pages.append(DocPage(
                    filename=item.name,
                    original_path=str(Path(rel_path) / item.name) if rel_path else item.name,
                    better_filename="",
                    summary_markdown="",
                    short_summary_markdown="",
                    one_line_summary_txt="",
                    relevant=False,
                    usage=Usage(input=0, output=0)
                ))
            elif item.is_dir():
                subdir_rel = str(Path(rel_path) / item.name) if rel_path else item.name
                doc_dir.subdirs[item.name] = build_tree(item, subdir_rel)

        return doc_dir

    return build_tree(root_path)

def calculate_total_usage(doc_dir: DocDirectory) -> Usage:
    """Calculate total usage for a directory tree including all pages, subdirs, and summaries"""
    total_input = 0
    total_output = 0

    # Add usage from all pages in this directory
    for page in doc_dir.pages:
        total_input += page.usage.input or 0
        total_output += page.usage.output or 0

    # Add usage from directory summarization
    if doc_dir.usage:
        total_input += doc_dir.usage.input or 0
        total_output += doc_dir.usage.output or 0

    # Recursively add usage from subdirectories
    for subdir in doc_dir.subdirs.values():
        subdir_usage = calculate_total_usage(subdir)
        total_input += subdir_usage.input or 0
        total_output += subdir_usage.output or 0

    return Usage(input=total_input, output=total_output)

doc_tree = build_doc_tree(doc_dir_path)
print(doc_tree.model_dump())



In [None]:
class DirectorySummarySchema(BaseModel):
    better_name: str
    short_summary: str
    one_line_summary: str

async def summarize_directory(dir_name: str, pages: list[DocPage], subdirs: list[DocDirectory]) -> tuple[DirectorySummarySchema, Usage]:
    """Create a summary for a directory based on its relevant pages and subdirectories"""
    template = jinja_env.get_template("summarize_directory.j2")
    prompt = template.render(dir_name=dir_name, pages=pages, subdirs=subdirs)

    res = await (await model.prompt(prompt=prompt, schema=DirectorySummarySchema))
    summary = DirectorySummarySchema.model_validate_json(await res.text())
    usage = await res.usage()
    return summary, usage

In [None]:

async def process_tree_depth_first(doc_dir: DocDirectory, root_path: Path) -> tuple[int, int]:
    """
    Process documentation tree depth-first with parallel processing.
    Returns (total_pages_processed, total_relevant_pages)
    """
    total_processed = 0
    total_relevant = 0

    # First, recursively process all subdirectories in parallel
    if doc_dir.subdirs:
        subdir_tasks = [
            process_tree_depth_first(subdir, root_path)
            for subdir in doc_dir.subdirs.values()
        ]
        subdir_results = await asyncio.gather(*subdir_tasks)

        for sub_processed, sub_relevant in subdir_results:
            total_processed += sub_processed
            total_relevant += sub_relevant

    # Then process all pages in this directory in parallel
    if doc_dir.pages:
        async def process_single_page(page: DocPage, index: int) -> tuple[int, DocPage]:
            print(f"Processing: {page.original_path}")
            reply, usage = await process_doc_jinja(page.original_path)

            return index, DocPage(
                filename=page.filename,
                original_path=page.original_path,
                better_filename=reply.better_filename,
                summary_markdown=reply.summary_markdown,
                short_summary_markdown=reply.short_summary_markdown,
                one_line_summary_txt=reply.one_line_summary_txt,
                relevant=reply.relevant,
                usage=usage
            )

        page_tasks = [
            process_single_page(page, i)
            for i, page in enumerate(doc_dir.pages)
        ]
        page_results = await asyncio.gather(*page_tasks)

        # Update pages with results
        for index, processed_page in page_results:
            doc_dir.pages[index] = processed_page
            total_processed += 1
            if processed_page.relevant:
                total_relevant += 1

    # After processing all pages and subdirs, create directory summary
    relevant_pages = [p for p in doc_dir.pages if p.relevant]
    relevant_subdirs = [s for s in doc_dir.subdirs.values() if s.short_summary is not None]

    if relevant_pages or relevant_subdirs:
        print(f"Summarizing directory: {doc_dir.path or 'root'} ({len(relevant_pages)} pages, {len(relevant_subdirs)} subdirs)")
        dir_summary, usage = await summarize_directory(doc_dir.name, relevant_pages, relevant_subdirs)
        doc_dir.better_name = dir_summary.better_name
        doc_dir.short_summary = dir_summary.short_summary
        doc_dir.one_line_summary = dir_summary.one_line_summary
        doc_dir.usage = usage

    return total_processed, total_relevant

In [None]:
doc_tree.subdirs = {
    "02-runes" : doc_tree.subdirs["02-runes"]
}
doc_tree.model_dump()

{'name': 'root',
 'path': '',
 'better_name': None,
 'pages': [{'filename': 'index.md',
   'original_path': 'index.md',
   'better_filename': '',
   'summary_markdown': '',
   'short_summary_markdown': '',
   'one_line_summary_txt': '',
   'relevant': False,
   'usage': {'input': 0, 'output': 0, 'details': None}}],
 'subdirs': {'02-runes': {'name': '02-runes',
   'path': '02-runes',
   'better_name': None,
   'pages': [{'filename': '01-what-are-runes.md',
     'original_path': '02-runes/01-what-are-runes.md',
     'better_filename': '',
     'summary_markdown': '',
     'short_summary_markdown': '',
     'one_line_summary_txt': '',
     'relevant': False,
     'usage': {'input': 0, 'output': 0, 'details': None}},
    {'filename': '02-$state.md',
     'original_path': '02-runes/02-$state.md',
     'better_filename': '',
     'summary_markdown': '',
     'short_summary_markdown': '',
     'one_line_summary_txt': '',
     'relevant': False,
     'usage': {'input': 0, 'output': 0, 'details

In [None]:
# Process the entire documentation tree
processed, relevant = await process_tree_depth_first(doc_tree, doc_dir_path)
print(f"\n\nTotal pages processed: {processed}")
print(f"Total relevant pages: {relevant}")

Processing: 02-runes/01-what-are-runes.md
Processing: 02-runes/02-$state.md
Processing: 02-runes/03-$derived.md
Processing: 02-runes/04-$effect.md
Processing: 02-runes/05-$props.md
Processing: 02-runes/06-$bindable.md
Processing: 02-runes/07-$inspect.md
Processing: 02-runes/08-$host.md
Processing: 02-runes/index.md
Summarizing directory: 02-runes (9 pages, 0 subdirs)
Processing: index.md
Summarizing directory: root (0 pages, 1 subdirs)


Total pages processed: 10
Total relevant pages: 9


In [None]:
# Calculate total usage for the entire tree
total_usage = calculate_total_usage(doc_tree)
print(f"\nTotal Usage:")
print(f"  Input tokens: {total_usage.input:,}")
print(f"  Output tokens: {total_usage.output:,}")
print(f"  Total tokens: {(total_usage.input + total_usage.output):,}")


Total Usage:
  Input tokens: 28,530
  Output tokens: 5,536
  Total tokens: 34,066


In [None]:
doc_tree.model_dump()

{'name': 'root',
 'path': '',
 'better_name': 'svelte-reactivity-system',
 'pages': [{'filename': 'index.md',
   'original_path': 'index.md',
   'better_filename': 'index.md',
   'summary_markdown': 'This file is the documentation index/landing page for Svelte. It appears to be a mostly empty template page with only a title defined in the frontmatter. No substantive content about Svelte features, API, guides, or other documentation material is present in this file.',
   'short_summary_markdown': 'Empty documentation index page with only a title specified in frontmatter.',
   'one_line_summary_txt': 'Empty documentation landing page.',
   'relevant': False,
   'usage': {'input': 971, 'output': 188, 'details': None}}],
 'subdirs': {'02-runes': {'name': '02-runes',
   'path': '02-runes',
   'better_name': 'svelte-runes-reference',
   'pages': [{'filename': '01-what-are-runes.md',
     'original_path': '02-runes/01-what-are-runes.md',
     'better_filename': 'runes-overview.md',
     'summ

In [None]:


def save_doc_tree(doc_dir: DocDirectory, doc_source_root: PathLib, output_root: PathLib, source_name: str):
    """
    Save the processed documentation tree to disk.

    Structure:
    - output_root/source_name/index.json - high-level metadata
    - output_root/source_name/<dir_name>/metadata.json - directory metadata
    - output_root/source_name/<dir_name>/<page_name>/metadata.json - page metadata
    - output_root/source_name/<dir_name>/<page_name>/summary.md
    - output_root/source_name/<dir_name>/<page_name>/summary_short.md
    - output_root/source_name/<dir_name>/<page_name>/original.md
    - output_root/source_name/<dir_name>/summary.md (for directories)
    - output_root/source_name/<dir_name>/summary_short.md (for directories)
    """

    def save_directory(doc_dir: DocDirectory, current_path: PathLib):
        """Recursively save directory structure"""

        # Save relevant pages in this directory
        for page in doc_dir.pages:
            if not page.relevant:
                continue

            # Use better_filename without extension as directory name, replace spaces with underscores
            page_name = page.better_filename.rsplit('.', 1)[0] if '.' in page.better_filename else page.better_filename
            page_name = page_name.replace(' ', '_')
            page_dir = current_path / page_name
            page_dir.mkdir(parents=True, exist_ok=True)

            # Write summary files
            (page_dir / "summary.md").write_text(page.summary_markdown)
            (page_dir / "summary_short.md").write_text(page.short_summary_markdown)

            # Copy original file
            original_file = doc_source_root / page.original_path
            if original_file.exists():
                shutil.copy2(original_file, page_dir / "original.md")

            # Save page metadata (only one-line summary)
            page_metadata = {
                "filename": page.filename,
                "original_path": page.original_path,
                "better_filename": page.better_filename,
                "one_line_summary": page.one_line_summary_txt,
                "usage": {
                    "input": page.usage.input,
                    "output": page.usage.output,
                }
            }
            (page_dir / "metadata.json").write_text(json.dumps(page_metadata, indent=2))

        # Save directory summaries and metadata if this directory has relevant content
        if doc_dir.short_summary:
            (current_path / "summary.md").write_text(doc_dir.short_summary)
            (current_path / "summary_short.md").write_text(doc_dir.one_line_summary or "")

            # Save directory metadata (only one-line summary and relevant pages)
            relevant_pages = [page for page in doc_dir.pages if page.relevant]
            relevant_subdirs = [subdir for subdir in doc_dir.subdirs.values() if subdir.short_summary is not None]

            dir_metadata = {
                "name": doc_dir.name,
                "path": doc_dir.path,
                "better_name": doc_dir.better_name,
                "one_line_summary": doc_dir.one_line_summary,
                "usage": {
                    "input": doc_dir.usage.input if doc_dir.usage else 0,
                    "output": doc_dir.usage.output if doc_dir.usage else 0,
                } if doc_dir.usage else None,
                "pages": [
                    {
                        "filename": page.filename,
                        "better_filename": page.better_filename,
                    }
                    for page in relevant_pages
                ],
                "subdirs": [
                    {
                        "name": subdir.name,
                        "better_name": subdir.better_name,
                    }
                    for subdir in relevant_subdirs
                ]
            }
            (current_path / "metadata.json").write_text(json.dumps(dir_metadata, indent=2))

        # Recursively save subdirectories
        for subdir in doc_dir.subdirs.values():
            if not subdir.short_summary:  # Skip directories with no relevant content
                continue

            # Use better_name for subdirectory, replace spaces with underscores
            subdir_name = (subdir.better_name or subdir.name).replace(' ', '_')
            subdir_path = current_path / subdir_name
            subdir_path.mkdir(parents=True, exist_ok=True)
            save_directory(subdir, subdir_path)

    # Create output directory (clear it first if it exists)
    output_dir = output_root / source_name
    if output_dir.exists():
        shutil.rmtree(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)

    # Save the directory structure
    root_name = (doc_dir.better_name or "root").replace(' ', '_')
    root_path = output_dir if root_name == "root" else output_dir / root_name
    root_path.mkdir(parents=True, exist_ok=True)
    save_directory(doc_dir, root_path)

    # Create index.json with high-level metadata only
    total_usage = calculate_total_usage(doc_dir)

    index_data = {
        "source_name": source_name,
        "root_directory": {
            "name": doc_dir.name,
            "better_name": doc_dir.better_name,
            "path": doc_dir.path,
            "one_line_summary": doc_dir.one_line_summary,
        },
        "usage": {
            "input_tokens": total_usage.input,
            "output_tokens": total_usage.output,
            "total_tokens": total_usage.input + total_usage.output,
        },
        "statistics": {
            "total_pages": sum(1 for _ in iter_pages(doc_dir)),
            "relevant_pages": sum(1 for p in iter_pages(doc_dir) if p.relevant),
            "total_directories": sum(1 for _ in iter_directories(doc_dir)),
            "relevant_directories": sum(1 for d in iter_directories(doc_dir) if d.short_summary),
        }
    }

    (output_dir / "index.json").write_text(json.dumps(index_data, indent=2))

    print(f"Saved documentation to: {output_dir}")
    print(f"  Total pages: {index_data['statistics']['total_pages']}")
    print(f"  Relevant pages: {index_data['statistics']['relevant_pages']}")
    print(f"  Total directories: {index_data['statistics']['total_directories']}")
    print(f"  Relevant directories: {index_data['statistics']['relevant_directories']}")

def iter_pages(doc_dir: DocDirectory):
    """Iterator over all pages in the tree"""
    yield from doc_dir.pages
    for subdir in doc_dir.subdirs.values():
        yield from iter_pages(subdir)

def iter_directories(doc_dir: DocDirectory):
    """Iterator over all directories in the tree"""
    for subdir in doc_dir.subdirs.values():
        yield subdir
        yield from iter_directories(subdir)

In [None]:
# Save the processed documentation tree
save_doc_tree(doc_tree, doc_dir_path, settings.output_dir, source.name)

Saved documentation to: ../processed_documents/sveltejs-svelte
  Total pages: 10
  Relevant pages: 9
  Total directories: 1
  Relevant directories: 1


In [None]:
# View a sample of the index.json structure
index_path = settings.output_dir / source.name / "index.json"
if index_path.exists():
    with open(index_path) as f:
        index_data = json.load(f)

    # Show a preview of the structure
    print("Index.json structure preview:")
    print(f"Source: {index_data['source_name']}")
    print(f"\nRoot directory: {index_data['root_directory'].get('better_name')}")
    print(f"One-line summary: {index_data['root_directory'].get('one_line_summary')}")
    print(f"\nStatistics:")
    for key, value in index_data.get('statistics', {}).items():
        print(f"  {key}: {value}")
    print(f"\nUsage:")
    for key, value in index_data.get('usage', {}).items():
        print(f"  {key}: {value:,}")

    # Show first page metadata if available
    subdirs = index_data.get('root_directory', {}).get('subdirs', {})
    if subdirs:
        first_subdir = list(subdirs.values())[0]
        pages = first_subdir.get('pages', [])
        if pages:
            first_page = pages[0]
            print(f"\nSample page metadata:")
            print(f"  Original: {first_page.get('filename')} -> {first_page.get('better_filename')}")
            print(f"  Summary: {first_page.get('one_line_summary')}")
            print(f"  Relevant: {first_page.get('relevant')}")
            print(f"  Usage: {first_page.get('usage')}")

Index.json structure preview:
Source: sveltejs-svelte

Root directory: svelte-reactivity-system
One-line summary: Svelte 5 runes are $ -prefixed compiler keywords that control reactivity, state management, derived values, effects, and component binding.

Statistics:
  total_pages: 10
  relevant_pages: 9
  total_directories: 1
  relevant_directories: 1

Usage:
  input_tokens: 28,530
  output_tokens: 5,536
  total_tokens: 34,066


In [None]:
# # View a sample directory summary
# def print_tree_summaries(doc_dir: DocDirectory, indent=0):
#     prefix = "  " * indent
#     display_name = doc_dir.better_name or doc_dir.name
#     print(f"{prefix}üìÅ {display_name} ({doc_dir.path or 'root'})")

#     if doc_dir.one_line_summary:
#         print(f"{prefix}   ‚Üí {doc_dir.one_line_summary}")

#     # Print relevant pages
#     relevant_pages = [p for p in doc_dir.pages if p.relevant]
#     if relevant_pages:
#         print(f"{prefix}   Pages: {len(relevant_pages)} relevant / {len(doc_dir.pages)} total")

#     # Recursively print subdirectories
#     for subdir in doc_dir.subdirs.values():
#         print_tree_summaries(subdir, indent + 1)

# print_tree_summaries(doc_tree)