In [1]:
# Cell 1: Import necessary modules and set up environment variables
import os
import sys
import json
import base64
from unittest.mock import Mock # Used to simulate Flask request object
from datetime import datetime
import uuid

# Get the current directory of the notebook (assuming you launched Jupyter from the project root)
# If you launched Jupyter from 'job-application-tracker/backend/services/process_emails/',
# then os.getcwd() will be that directory.
# We need to go up 3 levels to reach the 'job-application-tracker/' root.
current_dir = os.getcwd()
project_root = os.path.abspath(os.path.join(current_dir, '..', '..', '..'))

# Add the project root to sys.path so Python can find 'backend' as a module
if project_root not in sys.path:
    sys.path.append(project_root)

print(f"Added '{project_root}' to sys.path for module discovery.")

# Set environment variables that your main.py expects
# IMPORTANT: Replace "your-gcp-project-id", "job_applications", "raw_job_applications"
#            with your actual project ID and BigQuery names from config.py/GCP console.
os.environ["PROJECT_ID"] = "onlyjobs-465420"
os.environ["LOCATION"] = "us-central1"
os.environ["BQ_DATASET_ID"] = "user_data"
os.environ["BQ_RAW_TABLE_ID"] = "job_applications"

# You might also want to set GOOGLE_APPLICATION_CREDENTIALS for local development
# if not running on a VM with default credentials
# os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/path/to/your/service-account-key.json"
# However, since you're on a Jupyter VM, it likely uses VM's service account by default.

print("Environment variables set for local testing.")

# Cell 2: Import the main processing logic (Flask app)
# This might print "Running Cloud Run service locally..." but won't start the server yet
# If it tries to start the Flask app immediately, you might need to adjust main.py to
# put the app.run() inside an if __name__ == '__main__': block for better import behavior.
# (Your current main.py already has this, which is good!)
from backend.services.process_emails.main import app, bigquery_client, firestore_client # Import the Flask app instance and clients

# Cell 3: Simulate a Pub/Sub message payload
# This is the JSON structure that Cloud Run sends to your service
# when a Pub/Sub message is received.

test_email_content_1 = """Subject: Interview Invitation - ExampleCorp

Dear Jane,

We were impressed by your resume and would like to invite you to a first-round Zoom interview for the Software Engineer role at ExampleCorp.

Please let us know your availability.

Best,
ExampleCorp Talent Team
"""

test_email_content_2 = """Subject: Application Received - Cloud Solutions Inc.

Hi John,

Thank you for applying to the Cloud Architect position at Cloud Solutions Inc. We have received your application and will review it shortly.

Best,
Cloud Solutions HR
"""

test_email_content_3 = """Subject: Important Meeting Reminder

Hi Team,

Just a reminder about our stand-up meeting at 9 AM today.

Thanks,
Manager
"""

# Combine a few for testing. Remember to format them as your classifier expects!
# Each will be processed as a separate Pub/Sub message in a real scenario.
test_email_payloads = [
    {"user_id": "test_user_from_notebook_001", "email_id": f"notebook-app-{uuid.uuid4()}", "email_content": test_email_content_1},
    {"user_id": "test_user_from_notebook_001", "email_id": f"notebook-app-{uuid.uuid4()}", "email_content": test_email_content_2},
    {"user_id": "test_user_from_notebook_002", "email_id": f"notebook-nonjob-{uuid.uuid4()}", "email_content": test_email_content_3}
]

# Cell 4: Simulate a Pub/Sub POST request and call the Flask route
from flask import testing # Import for creating test requests

client = app.test_client() # Create a test client for your Flask app

for i, payload in enumerate(test_email_payloads):
    print(f"\n--- Processing Test Email {i+1} ---")

    # Base64 encode the inner JSON payload
    encoded_data = base64.b64encode(json.dumps(payload).encode('utf-8')).decode('utf-8')

    # Construct the full Pub/Sub message envelope
    pubsub_message_envelope = {
        "message": {
            "data": encoded_data,
            "messageId": f"test-message-{i}-{uuid.uuid4()}",
            "publishTime": datetime.utcnow().isoformat() + "Z",
            "attributes": {
                "some-attribute": "test"
            }
        },
        "subscription": "projects/onlyjobs-465420/subscriptions/test-subscription"
    }

    # Send the POST request to the Flask app
    response = client.post('/', json=pubsub_message_envelope)

    print(f"Response Status: {response.status_code}")
    print(f"Response Data: {response.data.decode('utf-8')}")

    # You can add assertions here to check expected behavior
    # For example:
    # assert response.status_code == 200
    # assert "processed successfully" in response.data.decode('utf-8')

print("\n--- Local Testing Complete ---")

Added '/home/jupyter/onlyjobs' to sys.path for module discovery.
Environment variables set for local testing.

--- Processing Test Email 1 ---
Processing email for user: test_user_from_notebook_001, ID: notebook-app-a960fe3b-930d-455e-b9b7-35d35c7580bf


NotFound: 404 POST https://bigquery.googleapis.com/bigquery/v2/projects/onlyjobs-465420/datasets/job_applications/tables/raw_job_applications/insertAll?prettyPrint=false: Not found: Dataset onlyjobs-465420:job_applications