# imports

In [34]:
!python3 --version

Python 3.11.4


In [2]:
# !pip install redlines

In [61]:
import re
import requests
import json
import IPython.display
import pydantic
import random
import logging

from redlines import Redlines

# settings

In [26]:
if True:
    # host = 'http://192.168.0.59:11434'
    host = 'http://192.168.0.211:11434'
else:
    host = 'http://localhost:11434'
host

'http://192.168.0.211:11434'

# utils

In [5]:
class book_edit(pydantic.BaseModel):
    edited_excerpt: str
    comments: str
    rationale: str

book_edit.model_json_schema()

{'properties': {'edited_excerpt': {'title': 'Edited Excerpt',
   'type': 'string'},
  'comments': {'title': 'Comments', 'type': 'string'},
  'rationale': {'title': 'Rationale', 'type': 'string'}},
 'required': ['edited_excerpt', 'comments', 'rationale'],
 'title': 'book_edit',
 'type': 'object'}

In [6]:
def markdown(s):
    display(IPython.display.Markdown(s))

In [7]:
def split_markdown_sections(markdown_text, heading_level=1):
    """
    Split a markdown string into sections based on heading level.
    Returns a list of section strings.
    """
    heading_pattern = re.compile(rf'^({"#" * heading_level}) (.+)', re.MULTILINE)
    matches = list(heading_pattern.finditer(markdown_text))

    if not matches:
        return [markdown_text]  # No headings, return entire content as one section

    sections = []

    for i, match in enumerate(matches):
        start = match.start()
        end = matches[i + 1].start() if i + 1 < len(matches) else len(markdown_text)
        section = markdown_text[start:end].strip()
        sections.append(section)

    return sections


In [8]:
def split_into_paragraphs(section_text):
    """
    Splits a markdown section into paragraphs.
    Ignores blank lines inside bullet points, quotes, or code blocks.
    Returns a list of paragraph strings.
    """
    lines = section_text.splitlines()
    paragraphs = []
    buffer = []

    for line in lines:
        stripped = line.strip()

        if stripped:
            buffer.append(line)
        elif buffer:
            # End of a paragraph
            paragraphs.append('\n'.join(buffer).strip())
            buffer = []

    # Catch any remaining paragraph
    if buffer:
        paragraphs.append('\n'.join(buffer).strip())

    return paragraphs


In [27]:
def get_ollama_models(host=host):
    """
    Returns a list of installed Ollama model names.
    """
    try:
        response = requests.get(f"{host}/api/tags")
        response.raise_for_status()
        data = response.json()
        models = [model["name"] for model in data.get("models", [])]
        return models
    except requests.RequestException as e:
        print("Error connecting to Ollama:", e)
        return []

get_ollama_models()

['deepseek-r1:14b',
 'gemma3:12b',
 'deepseek-r1:latest',
 'gemma3:latest',
 'qwen3:latest',
 'qwen2.5vl:latest']

'qwen3:latest'

In [60]:
def edit_text(text, system_prompt=None, model=None, host='http://localhost:11434'):
    url = f'{host}/api/generate'
    schema = book_edit.model_json_schema()
    if system_prompt is None:
        system_prompt = (
            'You are a professional book editor. Edit the following text for clarity, grammar, and style, '
            'and provide comments and rationale for your changes. Return the response in JSON format '
            'conforming to this schema:\n' + json.dumps(schema, indent=2)
        )

    if model is None:
        model = random.choice(get_ollama_models())
        
    prompt = f"{system_prompt}\n\nText to edit:\n{text}"
    payload = {
        'model': model,
        'prompt': prompt,
        'stream': False,
        'format': schema
    }
    response = requests.post(url, json=payload)
    if response.status_code != 200:
        print(response.text)
        assert False
    result = response.json()['response']
    if isinstance(result, str):
        result = json.loads(result)
    # print(result)
    return result

# t = queue[0]
# edit_text(t, system_prompt, 'qwen2.5vl:7b')

edit_text('i like big butts', model='deepseek-r1', host=host)

{'edited_excerpt': 'I enjoy large buttocks.',
 'comments': "Original text was informal and contained slang terms. I replaced 'big' with 'large' for a more neutral tone, and added proper nouns (buttocks) instead of the colloquial term to make it sound less casual while still conveying the same meaning without being crude.",
 'rationale': "The original phrase is too direct and informal for a professional book editing context. Using standard English terms like 'large buttocks' maintains clarity but elevates the language appropriately."}

In [12]:
def increase_heading_levels_simple(markdown_text, prefix='#'):
    """
    Adds one '#' to the beginning of every Markdown heading line (e.g., # → ##).
    """
    return re.sub(r'^(#{1,6})\s', prefix + r'\1 ', markdown_text, flags=re.MULTILINE)


In [13]:
def display_inline_markdown_diff(old_md, new_md):
    diff = Redlines(old_md, new_md)
    display(IPython.display.Markdown(diff.output_markdown))

In [14]:
def capture_think_and_rest(text):
    # This pattern captures: (1) before <think>, (2) inside <think>, (3) after </think>
    match = re.match(r'(.*)<think>(.*?)</think>(.*)', text, flags=re.DOTALL)
    if match:
        # Combine the parts before and after <think> as 'rest'
        rest = match.group(1) + match.group(3)
        thinking = match.group(2)
        return thinking, rest
    else:
        # If no <think> block is present
        return '', text

# main

# ollama_models

In [29]:
ollama_models = get_ollama_models()
ignore_list = [
    'deepseek-r1:32b',
    # 'deepseek-r1:14b',
    'deepseek-r1:7b',
    'deepseek-r1:1.5b',
    'gemma3:27b-it-qat',
    'gemma3:12b-it-qat',
    # 'gemma3:12b',
    'gemma3:1b-it-qat',
    'gemma3:1b-it-qat',
    'gemma3:latest',
    'qwen3:14b',
    'hf.co/OuteAI/OuteTTS-0.2-500M-GGUF:Q2_K',
    'gemma3:1b-it-qat',
]
ollama_models = [x for x in ollama_models if x not in ignore_list]
ollama_models

['deepseek-r1:14b',
 'gemma3:12b',
 'deepseek-r1:latest',
 'qwen3:latest',
 'qwen2.5vl:latest']

# system_prompt

In [16]:
system_prompt = r'''
You are an expert editor refining an adult fantasy novel written in British English, set in a modern-day world where gods, monsters, and mythical creatures coexist with humans. The target audience is adults, and the book includes swearing and sexual references, which are acceptable but must remain tasteful and contextually appropriate. The story follows Aisling, an Investigative agent; Eric, who communicates with spirits; and Maeve, a resurrected woman confronting a demon. Your goal is to enhance the manuscript while preserving its tone, style, and narrative voice. Follow these guidelines:

1. **Language and Style**:

   - Adhere to British English conventions (e.g., "colour" not "color", "organise" not "organize").
   - Don't insert a dot after a title, (e.g. "Mr" not "Mr.")
2. **Editing Tasks**:

   - Correct grammar, punctuation, and spelling per British English standards (e.g., single quotes for dialogue, consistent Oxford comma use).
   - Do not comment on or flag issues related to the plot, including inconsistencies or pacing.
   - Do not change the tone.

3. **Culture and Genre**:

   - Respect British cultural nuances, including modern idioms suitable for an adult audience.
   - Ensure reimagined folklore creatures and fantasy elements align with the modern setting and feel fresh, avoiding clichéd tropes.

4. **Output Format**:

   - Provide edited text with changes marked (e.g., \[original\] → \[edited\]).
   - Include a separate section with concise comments explaining major changes or suggestions, excluding any plot-related feedback.
   - For significant rewrites, propose revised text with a brief rationale, focusing on style, language, or setting.

5. **Additional Instruction**:

   - If no manuscript excerpt is provided, do not generate or edit a sample passage. Wait for further input.

Begin editing the provided excerpt, aligning with these guidelines. If no excerpt is provided, do not proceed until further instructions are given.
'''
# markdown(system_prompt)

In [17]:
system_prompt = '''
You are an expert editor specialising in British English.  
Your task is to review the user’s text for correct spelling, grammar according to British English conventions.  
Make only the necessary corrections—do not alter the tone, style, or meaning of the original text.  
Don't add extra commas.
Don't change quotation marks.

Return the corrected version only, without explanations or additional comments.

---

**Example Usage:**

> **User:**  
> The colour of the neighbours car is grey, but it's tires are flat.

> **AI Output:**  
> The colour of the neighbour’s car is grey, but its tyres are flat.


'''
markdown(system_prompt)


You are an expert editor specialising in British English.  
Your task is to review the user’s text for correct spelling, grammar according to British English conventions.  
Make only the necessary corrections—do not alter the tone, style, or meaning of the original text.  
Don't add extra commas.
Don't change quotation marks.

Return the corrected version only, without explanations or additional comments.

---

**Example Usage:**

> **User:**  
> The colour of the neighbours car is grey, but it's tires are flat.

> **AI Output:**  
> The colour of the neighbour’s car is grey, but its tyres are flat.




# read book

In [18]:
with open('book.md', 'r', encoding='utf-8') as f:
    content = f.read()

In [30]:
sections = split_markdown_sections(content)
len(sections)

46

# iter

In [20]:
queue = []

i = 0
for section in sections[:3]:
    paragraphs = split_into_paragraphs(section)
    for paragraph in paragraphs:
        queue.append(paragraph)

queue = queue[9:13]
len(queue)
queue

['While waiting for her name to be called for her job interview, Aisling Kane pulled out her phone and watched a video of the man who might be her next boss. The video was of a television show that had aired the previous night.',
 'The audience clapped as the show started, and the host greeted the viewers. "Good evening, ladies and gentlemen and welcome to Questions Tonight. I am your host, Phillip Newman, and my guests tonight are, on my left, the leader of the Britain First Party, Mr David Smith." There was a short pause for applause as a middle-aged obese man dressed in an ill-fitting dark blue suit with a Union Jack tie smiled and nodded at the audience. His short, bristly grey hair and five o\'clock shadow made him look dirty and unwashed. The host continued, "On my right, Sir Frederic Allen of the Department of Education." More applause this time for a thin, spectacled gentleman with neat, tidy black hair and a blue tie. He was clean-shaven and smiled uncomfortably. He was, in es

In [21]:
# queue

In [22]:
# t = queue[0]
# edit_text(t, system_prompt, 'qwen2.5vl:7b')




In [23]:
# edit_text(queue[0], system_prompt, 'gemma3:4b-it-qat')

In [24]:
model = random.choice(ollama_models)
model

'gemma3:4b-it-qat'

In [51]:
s = sections[0]
s


'# Introduction\n\nTo my clearly intelligent and undoubtedly attractive reader. I just wanted to say a quick thank you for taking the time to read my work. This is the first book of what will hopefully be a long and (most importantly) fun career.\n\nWhile I cannot personally thank all my readers, I can express my gratitude here for giving me your attention for a brief moment in time. Thank you. I sincerely mean that.\n\nI’d also like to say thank you to everyone who encouraged and supported me when writing this book. They had to wait many years for me to stop talking about it and actually do it.\n\nFinally, I’d like to dedicate this book to my wife. Without her, I wouldn’t even be here at all.\n\nThanks again, everyone. Please enjoy.\n\nSincerely  \nTimothy Stormcrow\n\nP.S. My wife has just told me that I will be sleeping on the sofa unless I also dedicate the book to my cat Bellatrix. She was no help and hindered my attempts to write this book. But I love her.'

In [52]:
system_prompt

"\nYou are an expert editor specialising in British English.  \nYour task is to review the user’s text for correct spelling, grammar according to British English conventions.  \nMake only the necessary corrections—do not alter the tone, style, or meaning of the original text.  \nDon't add extra commas.\nDon't change quotation marks.\n\nReturn the corrected version only, without explanations or additional comments.\n\n---\n\n**Example Usage:**\n\n> **User:**  \n> The colour of the neighbours car is grey, but it's tires are flat.\n\n> **AI Output:**  \n> The colour of the neighbour’s car is grey, but its tyres are flat.\n\n\n"

In [63]:
edit_text(s, host=host)

{'edited_excerpt': '# Introduction\n\nTo my clearly intelligent and undoubtedly attractive reader, I just wanted to say a quick thank you for taking the time to read my work. This is the first book of what will hopefully be a long and (most importantly) fun career.\n\nWhile I cannot personally thank all my readers, I can express my gratitude here for giving me your attention for a brief moment in time. Thank you. I sincerely mean that.\n\nI’d also like to say thank you to everyone who encouraged and supported me while writing this book. They had to wait many years for me to stop talking about it and actually do it.\n\nFinally, I’d like to dedicate this book to my wife. Without her, I wouldn’t even be here at all.\n\nThanks again, everyone. Please enjoy.\n\nSincerely,\nTimothy Stormcrow\n\nP.S. My wife has just told me that I will be sleeping on the sofa unless I also dedicate the book to my cat Bellatrix. She was no help and hindered my attempts to write this book. But I love her.',
 '

In [67]:
def review_text(text, host, system_prompt=None, model=None):
    r = edit_text(text, system_prompt, model, host=host)
    edited_excerpt = r.get('edited_excerpt', None)
    comments = r.get('comments', None)
    rationale = r.get('rationale', None)

    edit = increase_heading_levels_simple(edited_excerpt, prefix='##')

    markdown('### edit')
    display_inline_markdown_diff(t, edit)

    if comments:
        markdown(f'### comments\n\n{comments}')

    if rationale:
        markdown(f'### rationale\n\n{rationale}')


review_text(s, host=host, system_prompt=system_prompt, model='deepseek-r1:14b')

### edit

<span style='color:red;font-weight:700;text-decoration:line-through;'>While waiting for her name to be called for her job interview, Aisling Kane pulled out her phone and watched a video </span><span style='color:green;font-weight:700;'>The colour </span>of the <span style='color:red;font-weight:700;text-decoration:line-through;'>man who might be her next boss. The video was of a television show that had aired the previous night.</span><span style='color:green;font-weight:700;'>neighbours car is grey, but it's tires are flat.</span>

### comments

 tyre’s spelling as ‘tyres’ which conforms to British English conventions rather than using American spelling like ‘tire’.

### rationale

 As an editor specialising in British English, I noticed that the user wrote 'tires' instead of 'tyres'. In British English, we use 'tyres' for the plural form. So I changed it accordingly.

In [25]:
markdown(f'## {model=}')

for t in queue:
    r = edit_text(t, system_prompt, model)
    edited_excerpt = r.get('edited_excerpt', None)
    comments = r.get('comments', None)
    rationale = r.get('rationale', None)

    edit = increase_heading_levels_simple(edited_excerpt, prefix='##')

    markdown('### edit')
    display_inline_markdown_diff(t, edit)

    if comments:
        markdown(f'### comments\n\n{comments}')

    if rationale:
        markdown(f'### rationale\n\n{rationale}')

    markdown('### end')

## model='gemma3:4b-it-qat'

JSONDecodeError: Unterminated string starting at: line 2 column 21 (char 22)

In [None]:
%%time

# markdown(f'## input\n\n{increase_heading_levels_simple(section)}')

for model in ollama_models[2:]:
    markdown(f'## {model=}')

    for t in queue:
        r = edit_text(t, system_prompt, model)
        edited_excerpt = r.get('edited_excerpt', None)
        comments = r.get('comments', None)
        rationale = r.get('rationale', None)

        edit = increase_heading_levels_simple(edited_excerpt, prefix='##')

        markdown('### edit')
        display_inline_markdown_diff(t, edit)

        if comments:
            markdown(f'### comments\n\n{comments}')

        if rationale:
            markdown(f'### rationale\n\n{rationale}')

        markdown('### end')
        
        # display(r)