In [1]:
from llm import LLM
from db import Database

client = LLM()
db = Database()

In [2]:
db.execute("""
    ALTER TABLE IF EXISTS letters
    ADD COLUMN IF NOT EXISTS markdown TEXT
""")

In [3]:
def convert_to_markdown(text):
    system = """
    You are an expert in markdown and have been asked to convert a series of letters to markdown.
    """
    
    prompt = f"""
    Convert the following text to markdown using these rules:
    - Who the letter is from should be a level 2 header (##) only if explicitly stated in the original text. Keep the name and any other information in the same line.
    - When the letter was written should be a level 3 header (###) only if explicitly stated in the original text. Keep the date and any other information in the same line. Don't mention if the date is not present.
    - No changes should be made to the body of the letter.
    - Don't add any text that is not present in the original.
    - Don't remove any information that is present in the original text.
    - Do not add headers for the sender or date if this information is not explicitly provided in the original text.
    - Do not add code block markers (```) to the response.
    - Present the converted text directly without any additional formatting or explanations.
    
    Do this for the following text:

    {text}
    """
        
    return client.generate(
        messages=[
            {
                'role': 'system',
                'content': system
            },
            {
                'role': 'user',
                'content': prompt
            }
        ],
    )
    

db.execute("""
    SELECT id, raw
    FROM letters
    ORDER BY id ASC
""")

rows = db.fetchall()

print('Retrieved letters...', len(rows))
print('Letter ids:', [row[0] for row in rows])

for row in rows:
    row_id, raw = row
    markdown = convert_to_markdown(raw)
    
    db.execute("""
        UPDATE letters
        SET markdown = %s
        WHERE id = %s
    """, (markdown, row_id))
    
    print(f'Processed letter {row_id}.')
    
print('Done!')



Retrieved letters... 5
Letter ids: [9, 10, 11, 12, 13]
Generating response with model gpt-4o-2024-05-13
Processed letter 9.
Generating response with model gpt-4o-2024-05-13
Processed letter 10.
Generating response with model gpt-4o-2024-05-13
Processed letter 11.
Generating response with model gpt-4o-2024-05-13
Processed letter 12.
Generating response with model gpt-4o-2024-05-13
Processed letter 13.
Done!
