In [4]:
from llm import LLM
from db import Database

client = LLM()
db = Database()

In [5]:
db.execute("""
    ALTER TABLE IF EXISTS letters
    ADD COLUMN IF NOT EXISTS markdown TEXT
""")

In [6]:
def convert_to_markdown(text):
    system = """
    You are an expert in markdown and have been asked to convert a series of letters to markdown.
    """
    
    prompt = f"""
    Convert the following text to markdown using these rules:
    - Who the letter is from should be a level 2 header (##) only if explicitly stated in the original text. Keep the name and any other information in the same line.
    - When the letter was written should be a level 3 header (###) only if explicitly stated in the original text. Keep the date and any other information in the same line. Don't mention if the date is not present.
    - No changes should be made to the body of the letter.
    - Don't add any text that is not present in the original.
    - Don't remove any information that is present in the original text.
    - Do not add headers for the sender or date if this information is not explicitly provided in the original text.
    - Do not add code block markers (```) to the response.
    - Present the converted text directly without any additional formatting or explanations.
    
    Do this for the following text:

    {text}
    """
        
    return client.generate(
        messages=[
            {
                'role': 'system',
                'content': system
            },
            {
                'role': 'user',
                'content': prompt
            }
        ],
    )
    

db.execute("""
    SELECT id, raw
    FROM letters
    ORDER BY id ASC
""")

rows = db.fetchall()

print('Retrieved letters...', len(rows))
print('Letter ids:', [row[0] for row in rows])

for row in rows:
    row_id, raw = row
    markdown = convert_to_markdown(raw)
    
    db.execute("""
        UPDATE letters
        SET markdown = %s
        WHERE id = %s
    """, (markdown, row_id))
    
    print(f'Processed letter {row_id}.')
    
print('Done!')



Retrieved letters... 84
Letter ids: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84]
Generating response with model gpt-4o-2024-05-13
Processed letter 1.
Generating response with model gpt-4o-2024-05-13
Processed letter 2.
Generating response with model gpt-4o-2024-05-13
Processed letter 3.
Generating response with model gpt-4o-2024-05-13
Processed letter 4.
Generating response with model gpt-4o-2024-05-13
Processed letter 5.
Generating response with model gpt-4o-2024-05-13
Processed letter 6.
Generating response with model gpt-4o-2024-05-13
Processed letter 7.
Generating response with model gpt-4o-2024-05-13
Processed letter 8.
Generating response with model gpt-4o-2024-05-13
Processed letter 9.
Generating resp