# 1. backend/services/process_emails

In [4]:
# Cell 1: Setup Environment Variables and Python Path
# This ensures your gcloud commands use the right project and region,
# and your Python imports work correctly for local testing/validation.

import os
import sys

# --- Project Configuration ---
PROJECT_ID = "onlyjobs-465420" # <<-- IMPORTANT: Replace with your actual GCP Project ID
LOCATION = "us-central1"     # <<-- IMPORTANT: Ensure this matches your desired region for Cloud Run/BQ/Firestore

# --- FIX APPLIED HERE: Ensure working directory is the project root ---
# If your JupyterLab starts at /home/jupyter/onlyjobs/, os.getcwd() will be that.
# os.chdir() is defensive.
PROJECT_ROOT_DIR = "/home/jupyter/onlyjobs/" # This should be your actual project root path
os.chdir(PROJECT_ROOT_DIR)
print(f"Changed current working directory to: {os.getcwd()}")


# Set gcloud project for subsequent commands
!gcloud config set project {PROJECT_ID}
!gcloud config set run/region {LOCATION}

# --- Add Project Root to Python Path for Local Imports ---
# os.getcwd() is now the PROJECT_ROOT_DIR
project_root_for_sys_path = os.getcwd()
if project_root_for_sys_path not in sys.path:
    sys.path.append(project_root_for_sys_path)

print(f"Project ID set to: {PROJECT_ID}")
print(f"Cloud Run region set to: {LOCATION}")
print(f"Added '{project_root_for_sys_path}' to sys.path for module discovery.")

# Set environment variables for the deployed Cloud Run service
# These will be passed to Cloud Run by the gcloud run deploy command.
os.environ["PROJECT_ID"] = PROJECT_ID
os.environ["LOCATION"] = LOCATION
os.environ["BQ_DATASET_ID"] = "user_data"         # Your BigQuery Dataset ID
os.environ["BQ_RAW_TABLE_ID"] = "job_applications" # Your BigQuery Table ID
os.environ["FIRESTORE_DATABASE_ID"] = "emails-firestore" # Your Named Firestore Database ID

print("\nEnvironment variables for Cloud Run service set in current session.")

Changed current working directory to: /home/jupyter/onlyjobs
Updated property [core/project].
Updated property [run/region].
Project ID set to: onlyjobs-465420
Cloud Run region set to: us-central1
Added '/home/jupyter/onlyjobs' to sys.path for module discovery.

Environment variables for Cloud Run service set in current session.


In [5]:
# Cell 2: Build the Docker Image for process_emails
# This will use Cloud Build to create your container image and push it to Google Container Registry.

# Define the path to your process_emails service directory relative to this notebook's location
# --- FIX APPLIED HERE: Path is now relative from project root ---
SERVICE_DIR = "./backend/services/process_emails"
IMAGE_NAME = f"gcr.io/{PROJECT_ID}/process-emails"

print(f"Building Docker image from: {SERVICE_DIR}")
print(f"Image will be tagged as: {IMAGE_NAME}")

# The '!' prefix runs the command in the shell. The '.' indicates the build context is SERVICE_DIR.
# We explicitly specify SERVICE_DIR as the build context.
!gcloud builds submit --tag {IMAGE_NAME} {SERVICE_DIR}

Building Docker image from: ./backend/services/process_emails
Image will be tagged as: gcr.io/onlyjobs-465420/process-emails
Creating temporary archive of 16 file(s) totalling 84.0 KiB before compression.
Uploading tarball of [./backend/services/process_emails] to [gs://onlyjobs-465420_cloudbuild/source/1753135323.450823-7c04e36b13d649e4a615ec16e01d0501.tgz]
Created [https://cloudbuild.googleapis.com/v1/projects/onlyjobs-465420/locations/global/builds/6e620397-336d-4803-88cc-21a8fa193be4].
Logs are available at [ https://console.cloud.google.com/cloud-build/builds/6e620397-336d-4803-88cc-21a8fa193be4?project=12002195951 ].
Waiting for build to complete. Polling interval: 1 second(s).
----------------------------- REMOTE BUILD OUTPUT ------------------------------
starting build "6e620397-336d-4803-88cc-21a8fa193be4"

FETCHSOURCE
Fetching storage object: gs://onlyjobs-465420_cloudbuild/source/1753135323.450823-7c04e36b13d649e4a615ec16e01d0501.tgz#1753135323750206
Copying gs://onlyjobs-4

In [6]:
# Cell 3: Deploy the process_emails service to Cloud Run
# This will deploy the container image you just built as a new Cloud Run service.

SERVICE_NAME = "process-emails"

print(f"\nDeploying Cloud Run service: {SERVICE_NAME} in region {LOCATION}")

deploy_command = (
    f"gcloud run deploy {SERVICE_NAME} "
    f"--image {IMAGE_NAME} "
    f"--platform managed "
    f"--region {LOCATION} "
    f"--no-allow-unauthenticated "
    f"--cpu 1 --memory 2Gi --min-instances 0 --max-instances 1 "
    f"--set-env-vars PROJECT_ID={PROJECT_ID},LOCATION={LOCATION},"
    f"BQ_DATASET_ID={os.environ['BQ_DATASET_ID']},"
    f"BQ_RAW_TABLE_ID={os.environ['BQ_RAW_TABLE_ID']},"
    f"FIRESTORE_DATABASE_ID={os.environ['FIRESTORE_DATABASE_ID']} " # Note the space before --quiet
    f"--quiet" # <--- ADDED THIS FLAG
)

# Execute the command
!{deploy_command}


Deploying Cloud Run service: process-emails in region us-central1
Deploying container to Cloud Run service [[1mprocess-emails[m] in project [[1monlyjobs-465420[m] region [[1mus-central1[m]
Deploying new service...                                                       
  . Creating Revision...                                                       
  . Routing traffic...                                                         
  Deploying new service...                                                     


⠛ Deploying new service...                                                     


⠹ Deploying new service...                                                     


⠼ Deploying new service...                                                     


⠶ Deploying new service...                                                     


⠧ Deploying new service...                                                     
  ⠧ Creating Revision...                                                  

# 2. backend/functions/manage_tokens

# 3. backend/functions/gmail_fetch

In [10]:
from google.cloud import firestore

db = firestore.Client(project='onlyjobs-465420')

uids = [
    'cXq5vE4vtDWigFnuBPWFX3eDd8D3',
    'Fmlvn574OzbleWP5DPhYF5AEaHB3',
    'test-user'
]

for uid in uids:
    print(f"Resetting {uid}...")
    db.collection("users").document(uid).set(
        {"last_fetched": 0},
        merge=True
    )


Resetting cXq5vE4vtDWigFnuBPWFX3eDd8D3...
Resetting Fmlvn574OzbleWP5DPhYF5AEaHB3...
Resetting test-user...


# 4. DBT_Trigger

In [None]:
# 1) Publish your test message
gcloud pubsub topics publish applications-ready-topic --message="{}"

# 2) Wait a few seconds, then grab the latest build ID
BUILD_ID=$(gcloud builds list \
  --project=onlyjobs-465420 \
  --sort-by="~CREATE_TIME" \
  --limit=1 \
  --format="value(ID)")

echo "Latest build ID: $BUILD_ID"

# 3) Stream its logs
gcloud beta builds log "$BUILD_ID" --project=onlyjobs-465420 --stream
