# imports

In [None]:
import re
import requests
import IPython.display

from redlines import Redlines

# utils

In [None]:
def markdown(s):
    display(IPython.display.Markdown(s))

In [None]:
def split_markdown_sections(markdown_text, heading_level=1):
    """
    Split a markdown string into sections based on heading level.
    Returns a list of section strings.
    """
    heading_pattern = re.compile(rf'^({"#" * heading_level}) (.+)', re.MULTILINE)
    matches = list(heading_pattern.finditer(markdown_text))

    if not matches:
        return [markdown_text]  # No headings, return entire content as one section

    sections = []

    for i, match in enumerate(matches):
        start = match.start()
        end = matches[i + 1].start() if i + 1 < len(matches) else len(markdown_text)
        section = markdown_text[start:end].strip()
        sections.append(section)

    return sections


In [None]:
def split_into_paragraphs(section_text):
    """
    Splits a markdown section into paragraphs.
    Ignores blank lines inside bullet points, quotes, or code blocks.
    Returns a list of paragraph strings.
    """
    lines = section_text.splitlines()
    paragraphs = []
    buffer = []

    for line in lines:
        stripped = line.strip()

        if stripped:
            buffer.append(line)
        elif buffer:
            # End of a paragraph
            paragraphs.append('\n'.join(buffer).strip())
            buffer = []

    # Catch any remaining paragraph
    if buffer:
        paragraphs.append('\n'.join(buffer).strip())

    return paragraphs


In [None]:
def get_ollama_models(host="http://localhost:11434"):
    """
    Returns a list of installed Ollama model names.
    """
    try:
        response = requests.get(f"{host}/api/tags")
        response.raise_for_status()
        data = response.json()
        models = [model["name"] for model in data.get("models", [])]
        return models
    except requests.RequestException as e:
        print("Error connecting to Ollama:", e)
        return []

In [None]:
def edit_text(text, system_prompt=None, model='gemma3:4b-it-qat'):
    url = 'http://localhost:11434/api/generate'
    if system_prompt is None:
        system_prompt = 'You are a professional book editor. Edit the following text for clarity, grammar, and style:'
    prompt =     f"{system_prompt}\n\n{text}",
    payload = {
        'model': model,
        'prompt': f"You are a professional book editor. Edit the following text for clarity, grammar, and style:\n\n{text}",
        'stream': False
    }
    response = requests.post(url, json=payload)
    return response.json()['response']

In [None]:
def increase_heading_levels_simple(markdown_text, prefix='#'):
    """
    Adds one '#' to the beginning of every Markdown heading line (e.g., # → ##).
    """
    return re.sub(r'^(#{1,6})\s', prefix + r'\1 ', markdown_text, flags=re.MULTILINE)


In [None]:
def display_inline_markdown_diff(old_md, new_md):
    diff = Redlines(old_md, new_md)
    display(IPython.display.Markdown(diff.output_markdown))

In [None]:
def capture_think_and_rest(text):
    # This pattern captures: (1) before <think>, (2) inside <think>, (3) after </think>
    match = re.match(r'(.*)<think>(.*?)</think>(.*)', text, flags=re.DOTALL)
    if match:
        # Combine the parts before and after <think> as 'rest'
        rest = match.group(1) + match.group(3)
        thinking = match.group(2)
        return thinking, rest
    else:
        # If no <think> block is present
        return '', text

# main

# ollama_models

In [None]:
ollama_models = get_ollama_models()
ignore_list = [
    'deepseek-r1:32b',
    'deepseek-r1:14b'
    'gemma3:27b-it-qat',
    'gemma3:12b-it-qat',
    'gemma3:12b',
    'qwen3:14b',
]
ollama_models = [x for x in ollama_models if x not in ignore_list]
ollama_models

In [None]:
with open('book.md', 'r', encoding='utf-8') as f:
    content = f.read()

In [None]:
sections = split_markdown_sections(content)
len(sections)

# system_prompt

In [None]:
system_prompt = r'''
You are an expert editor refining an adult fantasy novel written in British English, set in a modern-day world where gods, monsters, and mythical creatures coexist with humans. The target audience is adults, and the book includes swearing and sexual references, which are acceptable but must remain tasteful and contextually appropriate. The story follows Aisling, an Investigative agent; Eric, who communicates with spirits; and Maeve, a resurrected woman confronting a demon. Your goal is to enhance the manuscript while preserving its tone, style, and narrative voice. Follow these guidelines:

1. **Language and Style**:

   - Adhere to British English conventions (e.g., "colour" not "color", "organise" not "organize").
   - Don't insert a dot after a title, (e.g. "Mr" not "Mr.")
2. **Editing Tasks**:

   - Correct grammar, punctuation, and spelling per British English standards (e.g., single quotes for dialogue, consistent Oxford comma use).
   - Do not comment on or flag issues related to the plot, including inconsistencies or pacing.
   - Do not change the tone.

3. **Culture and Genre**:

   - Respect British cultural nuances, including modern idioms suitable for an adult audience.
   - Ensure reimagined folklore creatures and fantasy elements align with the modern setting and feel fresh, avoiding clichéd tropes.

4. **Output Format**:

   - Provide edited text with changes marked (e.g., \[original\] → \[edited\]).
   - Include a separate section with concise comments explaining major changes or suggestions, excluding any plot-related feedback.
   - For significant rewrites, propose revised text with a brief rationale, focusing on style, language, or setting.

5. **Additional Instruction**:

   - If no manuscript excerpt is provided, do not generate or edit a sample passage. Wait for further input.

Begin editing the provided excerpt, aligning with these guidelines. If no excerpt is provided, do not proceed until further instructions are given.
'''
markdown(system_prompt)

# section

In [None]:
section = increase_heading_levels_simple(sections[2])
# markdown(section)

# iter

In [None]:
queue = []

i = 0
for section in sections[2:3]:
    paragraphs = split_into_paragraphs(section)
    for paragraph in paragraphs:
        queue.append(paragraph)

queue = queue[2:]
queue

In [None]:
# markdown(system_prompt)

In [None]:
%%time

# markdown(f'## input\n\n{increase_heading_levels_simple(section)}')

for model in ollama_models[:1]:
    markdown(f'## {model=}')

    for t in queue:
        r = edit_text(t, system_prompt, model)
        thinking, rest = capture_think_and_rest(r)
        
        rr = increase_heading_levels_simple(rest, prefix='##')

        if thinking and True:
            markdown(f'## thinking\n\n{thinking}\n\n''## end thinking')
            
        # markdown('## output')
        display_inline_markdown_diff(t, rr)
        # markdown('## end output')