In [6]:
# Setup
from opensearchpy import OpenSearch, helpers
from sentence_transformers import SentenceTransformer
import os

INDEX_NAME = "enron-semantic-2"
DIMENSION = 384  # For 'all-MiniLM-L6-v2'
EMBEDDING_MODEL = 'all-MiniLM-L6-v2'

# Connect to OpenSearch
client = OpenSearch(
    hosts=[{'host': 'localhost', 'port': 9200}],
    #http_auth=('admin', 'admin'),  # adjust if needed
    use_ssl=False
)

# Load embedding model
model = SentenceTransformer(EMBEDDING_MODEL)

In [7]:
# Semantic Search
query_text = "What’s our plan for the next quarter?"
query_vector = model.encode(query_text).tolist()

search_body = {
    "size": 3,
    "query": {
        "knn": {
            "body_vector": {
                "vector": query_vector,
                "k": 3
            }
        }
    }
}

results = client.search(index=INDEX_NAME, body=search_body)

print(f"\nTop matches for: \"{query_text}\"\n")
for hit in results["hits"]["hits"]:
    print(f"- {hit['_source']['message']} (score: {hit['_score']:.4f})")



Top matches for: "What’s our plan for the next quarter?"

- {'file': 'bass-e/inbox/55.', 'message-id': '<14068132.1075840320520.JavaMail.evans@thyme>', 'from': 'timothy.blanchard@enron.com', 'to': 'eric.bass@enron.com', 'x-to': 'Bass, Eric </O=ENRON/OU=NA/CN=RECIPIENTS/CN=Ebass>', 'x-cc': '', 'x-bcc': '', 'subject': 'Super Bowl', 'date': 'Wed, 30 Jan 2002 08:53:42 -0800', 'body': "What's the plan?"} (score: 0.5916)
- {'file': 'hayslett-r/projects/portland_general/35.', 'message-id': '<13358943.1075862295606.JavaMail.evans@thyme>', 'from': 'kirk.stevens@enron.com', 'to': 'rod.hayslett@enron.com, geaccone.enron.ene.tracy@enron.com', 'x-to': 'Hayslett, Rod </O=ENRON/OU=NA/CN=RECIPIENTS/CN=RHAYSLE>, Enron.ENE.Tracy Geaccone <??EEnron.ENE.Tracy Geaccone>', 'x-cc': 'Piro, Jim </O=ENRON/OU=NA/CN=RECIPIENTS/CN=GWADDR/CN=HQ3.EM5.JIM PIRO>, FOWLER, PEGGY </O=ENRON/OU=NA/CN=RECIPIENTS/CN=GWADDR/CN=HQ3.EM5.PEGGY FOWLER>', 'x-bcc': '', 'subject': 'Q3 Earnings - PGG', 'date': 'Tue, 21 Aug 2001 12:5

In [8]:
# ----------- Summarize with Gemini ----------- #
import google.generativeai as genai
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

messages = [hit["_source"]["message"] for hit in results["hits"]["hits"]]

prompt = "Summarize the following email messages:\n\n"
for i, msg in enumerate(messages, 1):
    prompt += f"{i}. {msg}\n"
prompt += "\nSummary:"

GEMINI_API_KEY=os.getenv("GEMINI_API_KEY")
genai.configure(api_key=GEMINI_API_KEY)
gemini_model = genai.GenerativeModel('gemini-2.5-flash')


response = gemini_model.generate_content({
    "text": prompt
})

print("\n🧠 Gemini Summary:")
print(response.text)


🧠 Gemini Summary:
The emails cover two main topics:

*   Two emails are simple inquiries about plans: one asks about "Super Bowl" plans, and another asks for details (day, time, where) for "our plan later this week."
*   The third email provides a financial analysis and a plan to cover a $19.5 million gap in PGG's expected Q3 financial results, noting that some items require auditor approval and mentioning other risks/opportunities.
