# Database Query Notebook

This notebook provides a scratchpad for querying the email processing database with Pandas.

In [1]:
import pandas as pd
import sys
import os

# Add the project root to Python path
sys.path.append(os.getcwd())

from email_processing.database.db_manager import EmailDatabaseManager

In [2]:
# Initialize database manager
db_manager = EmailDatabaseManager()
connection = db_manager.get_connection()

print(f"Connected to database: {db_manager.db_path}")

Connected to database: data/email_processing.db


In [5]:
# Fetch all emails from the emails table
query = "SELECT * FROM emails"
emails_df = pd.read_sql(query, connection)

print(f"Fetched {len(emails_df)} emails from database")
emails_df.head()

Fetched 71 emails from database


Unnamed: 0,email_id,date,sender,subject,body_markdown,body_clean,pdf_text,raw_email,category_id
0,194a8aefa4f1a8cb,2025-01-27 16:53:45.000000,Autosport <news@e.autosport.com>,Newey expects F1 2026 to be engine-dominated,https://www.autosport.com/ [All Series](https:...,https://www.autosport.com/ [All Series](https:...,,"{""id"": ""194a8aefa4f1a8cb"", ""subject"": ""Newey e...",
1,194a81de444c5c2d,2025-01-27 14:15:18.000000,"""Kjell & Company"" <noreply@medlem.kjell.com>",Din order har skickats! 9639607,En leveransbekräftelse från www.kjell.com Plea...,En leveransbekräftelse från www.kjell.com Plea...,,"{""id"": ""194a81de444c5c2d"", ""subject"": ""Din ord...",
2,194a81d483db0c3b,2025-01-27 14:14:38.000000,Ben Thompson <email@stratechery.com>,DeepSeek FAQ (Stratechery Article 1-27-2025),\n\n,,,"{""id"": ""194a81d483db0c3b"", ""subject"": ""DeepSee...",
3,194a8095607c6b96,2025-01-27 13:52:51.000000,Dropbox <no-reply@em-s.dropbox.com>,"Christian, your storage space if filling up - ...","Get more storage for your files, photos, and p...","Get more storage for your files, photos, and p...",,"{""id"": ""194a8095607c6b96"", ""subject"": ""Christi...",
4,194a7e7f3dde141d,2025-01-27 13:16:22.000000,"""Kjell & Company"" <noreply@medlem.kjell.com>",Din order har skickats! 9639607,En leveransbekräftelse från www.kjell.com Plea...,En leveransbekräftelse från www.kjell.com Plea...,,"{""id"": ""194a7e7f3dde141d"", ""subject"": ""Din ord...",


In [7]:
# Example: Query specific columns only
query = "SELECT email_id, sender, subject, date FROM emails"
emails_summary_df = pd.read_sql(query, connection)

print("Email Summary:")
emails_summary_df.head()

Email Summary:


Unnamed: 0,email_id,sender,subject,date
0,194a8aefa4f1a8cb,Autosport <news@e.autosport.com>,Newey expects F1 2026 to be engine-dominated,2025-01-27 16:53:45.000000
1,194a81de444c5c2d,"""Kjell & Company"" <noreply@medlem.kjell.com>",Din order har skickats! 9639607,2025-01-27 14:15:18.000000
2,194a81d483db0c3b,Ben Thompson <email@stratechery.com>,DeepSeek FAQ (Stratechery Article 1-27-2025),2025-01-27 14:14:38.000000
3,194a8095607c6b96,Dropbox <no-reply@em-s.dropbox.com>,"Christian, your storage space if filling up - ...",2025-01-27 13:52:51.000000
4,194a7e7f3dde141d,"""Kjell & Company"" <noreply@medlem.kjell.com>",Din order har skickats! 9639607,2025-01-27 13:16:22.000000


In [None]:
# Custom query space - modify as needed
custom_query = """
SELECT 
    email_id,
    sender,
    subject,
    date,
    category_id
FROM emails 
WHERE sender LIKE '%@%'
ORDER BY date DESC
LIMIT 10
"""

result_df = pd.read_sql(custom_query, connection)
print("Custom Query Results:")
result_df