<a href="https://colab.research.google.com/github/upashanadutta23/gmail-triage-bot/blob/main/gmail_triage_bot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -q -U google-generativeai google-api-python-client google-auth-oauthlib

In [None]:
# Cell 2
%%writefile emails.json
[
  {
    "body": "Hi team, please remember to submit the Q3 financial summary by this Friday, October 10th, 2025. This is a high priority. Thanks, Management."
  },
  {
    "body": "Thank you for your order from The Coffee Shop. Your credit card has been charged $12.50 on October 4th, 2025. Order #12345."
  },
  {
    "body": "Hey, are you free to meet for lunch next Tuesday at 1pm to discuss the project launch? Let me know. Best, Alex."
  }
]

Writing emails.json


In [None]:
import os
import sys
import json
import base64
import datetime
import pandas as pd
from bs4 import BeautifulSoup

import google.generativeai as genai
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from IPython.display import display

# ==============================================================================
# --- 1. USER CONFIGURATION ---
# ==============================================================================

GEMINI_API_KEY = "AIzaSyBgVEFchGk89wsj9agh6P9yv9kCZYEAPmo"
GOOGLE_SHEET_ID = "1cKUCuYb9VY6NS8kvAHx_Cf_PHURWgfVthS8Ev95waVQ"

# ==============================================================================
# --- 2. AUTHENTICATION MODULE ---
# ==============================================================================

SCOPES = [
    "https://www.googleapis.com/auth/gmail.readonly",
    "https://www.googleapis.com/auth/gmail.modify",
    "https://www.googleapis.com/auth/tasks",
    "https://www.googleapis.com/auth/calendar",
    "https://www.googleapis.com/auth/spreadsheets",
]
CREDENTIALS_PATH = 'credentials.json'
TOKEN_PATH = 'token.json'

def authenticate():
    """Handles user authentication for all required Google scopes."""
    creds = None
    if os.path.exists(TOKEN_PATH):
        creds = Credentials.from_authorized_user_file(TOKEN_PATH, SCOPES)
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            try:
                creds.refresh(Request())
            except Exception as e:
                print(f"Token refresh failed: {e}. Re-authenticating...")
                creds = None

    if not creds or not creds.valid:
        if not os.path.exists(CREDENTIALS_PATH):
            print(f"🔴 FATAL: '{CREDENTIALS_PATH}' not found. Please upload it to Colab.")
            return None
        flow = InstalledAppFlow.from_client_secrets_file(CREDENTIALS_PATH, SCOPES)
        creds = flow.run_console() # <-- THIS IS THE CORRECTED LINE

    with open(TOKEN_PATH, "w") as token:
        token.write(creds.to_json())
    print("✅ Authentication successful.")
    return creds

# ==============================================================================
# --- 3. GMAIL CLIENT MODULE ---
# ==============================================================================

def get_unread_emails(creds, max_results=50):
    """Fetches and parses a specified number of unread emails."""
    try:
        service = build("gmail", "v1", credentials=creds)
        print(f"Fetching up to {max_results} unread emails...")
        results = service.users().messages().list(userId="me", q="is:unread", maxResults=max_results).execute()
        messages = results.get("messages", [])

        if not messages:
            print("✅ No unread messages found.")
            return []

        emails = []
        for msg in messages:
            msg_data = service.users().messages().get(userId="me", id=msg["id"], format="full").execute()
            payload = msg_data.get("payload", {})
            headers = payload.get("headers", [])

            email_data = { "id": msg_data.get("id"), "subject": next((h["value"] for h in headers if h["name"] == "Subject"), ""), "sender": next((h["value"] for h in headers if h["name"] == "From"), ""), "body": "" }

            if "parts" in payload:
                for part in payload["parts"]:
                    if part["mimeType"] == "text/plain":
                        encoded_body = part.get("body", {}).get("data", "")
                        if encoded_body: email_data["body"] = base64.urlsafe_b64decode(encoded_body).decode("utf-8"); break
                    elif part["mimeType"] == "text/html":
                        encoded_body = part.get("body", {}).get("data", "")
                        if encoded_body: html_content = base64.urlsafe_b64decode(encoded_body).decode("utf-8"); soup = BeautifulSoup(html_content, "html.parser"); email_data["body"] = soup.get_text(separator='\n').strip()
            else:
                encoded_body = payload.get("body", {}).get("data", "")
                if encoded_body: email_data["body"] = base64.urlsafe_b64decode(encoded_body).decode("utf-8")

            emails.append(email_data)

        print(f"Successfully fetched {len(emails)} emails.")
        return emails
    except HttpError as error:
        print(f"🔴 An error occurred with the Gmail API: {error}")
        return []

def mark_email_as_read(creds, msg_id):
    """Marks an email as read by removing the UNREAD label."""
    try:
        service = build("gmail", "v1", credentials=creds)
        service.users().messages().modify(userId='me', id=msg_id, body={'removeLabelIds': ['UNREAD']}).execute()
        print(f"  -> Marked email {msg_id[:10]}... as read.")
    except Exception as e:
        print(f"  -> 🔴 Error marking email as read: {e}")

# ==============================================================================
# --- 4. COGNITIVE & WORKSPACE MODULE (RUNNER) ---
# ==============================================================================

def configure_gemini(api_key):
    """Configures the Gemini API and returns the model."""
    try:
        genai.configure(api_key=api_key)
        model = genai.GenerativeModel('models/gemini-pro-latest')
        print("✅ Gemini API configured successfully.")
        return model
    except Exception as e:
        print(f"🔴 Error configuring Gemini: {e}")
        return None

def get_email_intent(model, email_body):
    """Uses Gemini to classify the intent of an email."""
    prompt = f"""Analyze email content and classify its intent. Choose from: task_delegation, scheduling_request, receipt_invoice, information_query, no_action_needed. Email: --- {email_body} --- Return only the intent name."""
    try:
        response = model.generate_content(prompt)
        return response.text.strip()
    except Exception as e:
        print(f"  -> 🔴 Error classifying intent: {e}")
        return "error"

def extract_information(model, email_body, intent):
    """Uses Gemini to extract structured information based on the email's intent."""
    if intent == "task_delegation":
        prompt = f"""Extract task details from the email. Return JSON with keys: "task_description", "due_date" (YYYY-MM-DD or null), "priority" ("High", "Medium", "Low"). Email: --- {email_body} ---"""
    elif intent == "scheduling_request":
        prompt = f"""Extract event details from the email. Return JSON with keys: "event_title", "attendees" (list of emails), "proposed_time" (YYYY-MM-DDTHH:MM:SS or null). Email: --- {email_body} ---"""
    elif intent == "receipt_invoice":
        prompt = f"""Extract expense details from the email. Return JSON with keys: "vendor_name", "total_amount" (float), "purchase_date" (YYYY-MM-DD). Email: --- {email_body} ---"""
    else:
        return None
    try:
        response = model.generate_content(prompt)
        cleaned_response = response.text.strip().replace("```json", "").replace("```", "")
        return json.loads(cleaned_response)
    except Exception as e:
        print(f"  -> 🔴 Error extracting information: {e}")
        return None

def create_google_task(creds, task_details):
    service = build("tasks", "v1", credentials=creds)
    if not task_details.get("task_description"): return
    task = {'title': task_details["task_description"]}
    if task_details.get("due_date"): task['due'] = f'{task_details["due_date"]}T00:00:00.000Z'
    try:
        result = service.tasks().insert(tasklist='@default', body=task).execute()
        print(f"  -> ✅ Successfully created task: '{result['title']}'")
    except HttpError as e: print(f"  -> 🔴 Error creating Google Task: {e}")

def create_calendar_event(creds, event_details):
    service = build("calendar", "v3", credentials=creds)
    if not all(k in event_details for k in ["event_title", "proposed_time"]): return
    start_time = datetime.datetime.fromisoformat(event_details["proposed_time"])
    end_time = start_time + datetime.timedelta(hours=1)
    event = {'summary': event_details["event_title"],'start': {'dateTime': start_time.isoformat(),'timeZone': 'America/Chicago'},'end': {'dateTime': end_time.isoformat(),'timeZone': 'America/Chicago'},'attendees': [{'email': e} for e in event_details.get("attendees", [])]}
    try:
        result = service.events().insert(calendarId='primary', body=event).execute()
        print(f"  -> ✅ Successfully created event: '{result['summary']}'")
    except HttpError as e: print(f"  -> 🔴 Error creating Google Calendar event: {e}")

def log_expense_to_sheet(creds, sheet_id, expense_details):
    service = build("sheets", "v4", credentials=creds)
    if not all(k in expense_details for k in ["purchase_date", "vendor_name", "total_amount"]): return
    values = [[expense_details["purchase_date"], expense_details["vendor_name"], expense_details["total_amount"]]]
    body = {'values': values}
    try:
        service.spreadsheets().values().append(spreadsheetId=sheet_id, range="Sheet1!A1", valueInputOption="USER_ENTERED", body=body).execute()
        print(f"  -> ✅ Successfully logged expense for '{expense_details['vendor_name']}'")
    except HttpError as e: print(f"  -> 🔴 Error logging expense: {e}")

# ==============================================================================
# --- 5. MAIN PIPELINE EXECUTION ---
# ==============================================================================

def run_pipeline():
    creds = authenticate()
    if not creds: return
    gemini_model = configure_gemini(GEMINI_API_KEY)
    if not gemini_model: return
    emails = get_unread_emails(creds, max_results=50)
    if not emails: return
    results = []
    print("\n--- 🚀 Starting Email Processing Pipeline ---")
    for email in emails:
        print(f"\nProcessing: '{email['subject']}' from {email['sender']}")
        if not email["body"]: continue
        intent = get_email_intent(gemini_model, email["body"])
        print(f"  - Intent: {intent}")
        action_taken, details = "None", {}
        if intent and intent not in ["no_action_needed", "information_query", "error"]:
            extracted_data = extract_information(gemini_model, email["body"], intent)
            print(f"  - Extracted: {extracted_data}")
            if extracted_data:
                details = extracted_data
                if intent == "task_delegation": create_google_task(creds, extracted_data); action_taken = "Created Google Task"
                elif intent == "scheduling_request": create_calendar_event(creds, extracted_data); action_taken = "Created Calendar Event"
                elif intent == "receipt_invoice": log_expense_to_sheet(creds, GOOGLE_SHEET_ID, extracted_data); action_taken = "Logged Expense to Sheet"
        mark_email_as_read(creds, email["id"])
        results.append({'subject': email['subject'], 'intent': intent, 'action': action_taken, 'details': json.dumps(details)})
    print("\n--- ✅ Pipeline Summary ---")
    if results: display(pd.DataFrame(results))

# --- Run the agent ---
run_pipeline()

✅ Authentication successful.
✅ Gemini API configured successfully.
Fetching up to 50 unread emails...
Successfully fetched 50 emails.

--- 🚀 Starting Email Processing Pipeline ---

Processing: 'Mocha made for poolside chillin'☀️😎☀️' from Dutch Bros Coffee <goodvibes@members2.dutchbros.com>
  - Intent: no_action_needed
  -> Marked email 1983ec5e7a... as read.

Processing: 'Stop watching tutorials, start coding with the pros.' from Tanya Chitranshi <tanya@newsletters.analyticsvidhya.com>
  - Intent: no_action_needed
  -> Marked email 1976ca6120... as read.

Processing: 'Dad’s Cookbook 👨‍🍳 Recipe Contest & More 🍽️' from bigbasket <alert@info.bigbasket.com>
  - Intent: no_action_needed
  -> Marked email 19766b8e13... as read.

Processing: 'Let’s be friends (forever). Here’s 21% Off' from "Aéropostale" <aeropostale@em.aeropostale.com>
  - Intent: no_action_needed
  -> Marked email 197651ad0e... as read.

Processing: 'Vegan vs Plant-based - what’s the difference? 🧐' from bigbasket <noreply@u

Unnamed: 0,subject,intent,action,details
0,Mocha made for poolside chillin'☀️😎☀️,no_action_needed,,{}
1,"Stop watching tutorials, start coding with the...",no_action_needed,,{}
2,Dad’s Cookbook 👨‍🍳 Recipe Contest & More 🍽️,no_action_needed,,{}
3,Let’s be friends (forever). Here’s 21% Off,no_action_needed,,{}
4,Vegan vs Plant-based - what’s the difference? 🧐,no_action_needed,,{}
5,Sign up for a chance to win a US$100 coupon fo...,no_action_needed,,{}
6,Action Required:,no_action_needed,,{}
7,Competition Launch: Make Data Count - Finding ...,no_action_needed,,{}
8,💐 Welcome to the Super Value Store 🏬,no_action_needed,,{}
9,Difficult periods? Try yoga! 🧘🏻‍♀️,no_action_needed,,{}


In [None]:
from google.colab import drive
drive.mount('/content/drive')