<a href="https://colab.research.google.com/github/yuuu125/Lunette/blob/main/Untitled0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [18]:
# -*- coding: utf-8 -*-
!pip install openai==0.28.1 python-docx notion-client langdetect

import os
import re
import json
import openai
from docx import Document
from google.colab import files, userdata
from notion_client import Client
from langdetect import detect, LangDetectException
import datetime

try:
    OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')
    NOTION_TOKEN = userdata.get('NOTION_TOKEN')
    NOTION_DB_ID = userdata.get('NOTION_DB_ID')

    if not OPENAI_API_KEY:
        raise ValueError("OPENAI_API_KEY not set")
    if not NOTION_TOKEN:
        print("⚠️ Notion token missing - feature disabled")
    if not NOTION_DB_ID:
        print("⚠️ Notion DB ID missing - feature disabled")

    openai.api_key = OPENAI_API_KEY
    print("✅ OpenAI API key set")

except Exception as e:
    print(f"❌ Key retrieval failed: {str(e)}")

def handle_transcript_input():
    """Handles transcript input methods"""
    print("\n=== Handling Transcript Input ===")

    input_method = input("Choose input method (1-upload, 2-paste): ")
    transcript_text = ""

    # File upload handling
    if input_method == "1":
        uploaded = files.upload()
        if not uploaded:
            print("⚠️ No files uploaded, switching to paste")
            transcript_text = input("Paste meeting transcript: ")
        else:
            filename = list(uploaded.keys())[0]
            print(f"✅ Uploaded: {filename}")

            # Text file processing
            if filename.endswith('.txt'):
                transcript_text = uploaded[filename].decode('utf-8')

            # DOCX processing
            elif filename.endswith('.docx'):
                doc = Document(filename)
                transcript_text = "\n".join([para.text for para in doc.paragraphs])
            else:
                raise ValueError("Unsupported file format")

    # Text paste handling
    elif input_method == "2":
        transcript_text = input("Paste meeting transcript: ")

    cleaned_text = clean_transcript(transcript_text)
    segments = segment_text(cleaned_text)

    print(f"📝 Processed text: {len(segments)} segments")
    return cleaned_text, segments

def test_notion_connection():
    """Tests Notion API connection"""
    try:
        notion = Client(auth=NOTION_TOKEN)
        db_info = notion.databases.retrieve(database_id=NOTION_DB_ID)
        print("✅ Notion connection successful!")
        print(f"DB Name: {db_info['title'][0]['text']['content']}")
        print("DB Properties:", list(db_info['properties'].keys()))
        return True
    except Exception as e:
        print(f"❌ Notion connection failed: {str(e)}")
        return False

def clean_transcript(text):
    """Cleans raw transcript text"""
    text = re.sub(r'\d{1,2}:\d{2}:\d{2}', '', text)
    text = re.sub(r'Speaker\s*\d+:?', '', text)
    return re.sub(r'\n\s*\n', '\n\n', text).strip()

def segment_text(text):
    """Segments text into paragraphs"""
    return [p.strip() for p in text.split('\n\n') if p.strip()]

def analyze_with_gpt(text, language='en'):
    """Analyzes text with GPT API"""
    print("\n=== Analyzing with GPT ===")

    if not openai.api_key:
        print("❌ OpenAI API key missing")
        return {"error": "OpenAI API key not set", "fallback_used": True}, 0

    # Language mapping
    lang_map = {'zh': 'Chinese', 'es': 'Spanish', 'fr': 'French', 'en': 'English'}
    lang_name = lang_map.get(language[:2], 'English')

    # System prompt setup
    system_prompt = f"""
    You are a professional meeting analyst. Extract key information:
    - Respond in {lang_name}
    - Use this JSON format:
    {{
        "meeting_title": "Meeting Title",
        "participants": ["Attendee1", "Attendee2"],
        "summary": "Meeting summary",
        "action_items": [{{"task": "Task", "assignee": "Owner"}}],
        "key_points": {{
            "concerns": [],
            "decisions": [],
            "deadlines": [],
            "updates": []
        }},
        "meeting_type": "Meeting type",
        "platform": "Platform",
        "fallback_used": false
    }}

    Extraction rules:
    1. meeting_title: Extract from start/end or generate
    2. participants: Extract all attendees
    3. Focus on meeting start/end sections
    """

    user_prompt = f"Meeting transcript:\n{text[:10000]}"

    try:
        # GPT API call
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt}
            ],
            temperature=0.3
        )

        content = response.choices[0].message['content']
        result = json.loads(content)
        tokens_used = response.usage['total_tokens']

        print(f"✅ GPT analysis complete! Tokens: {tokens_used}")
        print(f"Meeting title: {result.get('meeting_title', 'N/A')}")
        print(f"Participants: {len(result.get('participants', []))}")
        print(f"Meeting type: {result.get('meeting_type', 'N/A')}")
        print(f"Action items: {len(result.get('action_items', []))}")

        # Fallback for action items
        if not result.get('action_items'):
            result['fallback_used'] = True
            print("⚠️ No action items detected")

        return result, tokens_used

    except Exception as e:
        print(f"❌ GPT analysis failed: {str(e)}")
        return {
            "error": str(e),
            "fallback_used": True
        }, 0

def create_notion_entry(meeting_data):
    """Creates Notion database entry"""
    if not NOTION_TOKEN or not NOTION_DB_ID:
        print("⚠️ Notion config incomplete - skipping")
        return False

    print("\n=== Syncing to Notion ===")

    try:
        notion = Client(auth=NOTION_TOKEN)

        # Prepare properties
        properties = {
            "Meeting Title": {"title": [{"text": {"content": meeting_data.get("meeting_title", "Untitled")}}]},
            "Participant": {"rich_text": [{"text": {"content": ", ".join(meeting_data.get("participants", ["Unknown"]))}}]},
            "Date & Duration": {"date": {"start": meeting_data.get("date", datetime.datetime.now().isoformat())}},
            "Meeting Type": {"rich_text": [{"text": {"content": meeting_data.get("meeting_type", "Other")}}]},
            "Platform": {"select": {"name": meeting_data.get("platform", "Unknown")}},
            "Summary": {"rich_text": [{"text": {"content": meeting_data.get("summary", "")}}]},
            "Key Points": {"rich_text": [{"text": {"content": format_key_points(meeting_data)}}]},
            "Action Items": {"rich_text": [{"text": {"content": format_action_items(meeting_data)}}]},
        }

        # Create entry
        new_page = notion.pages.create(
            parent={"database_id": NOTION_DB_ID},
            properties=properties
        )

        print(f"✅ Notion entry created! ID: {new_page['id']}")
        return True
    except Exception as e:
        print(f"❌ Notion sync failed: {str(e)}")
        return False

def format_key_points(data):
    """Formats key points for Notion"""
    points = []
    key_points = data.get("key_points", {})
    for category, items in key_points.items():
        if items and isinstance(items, list):
            points.append(f"{category.upper()}:")
            points.extend([f"- {item}" for item in items])
    return "\n".join(points)

def format_action_items(data):
    """Formats action items for Notion"""
    action_items = data.get("action_items", [])
    if not action_items or not isinstance(action_items, list):
        return "No action items"

    formatted = []
    for item in action_items:
        if isinstance(item, dict):
            task = item.get('task', 'Unknown task')
            assignee = item.get('assignee', 'Unassigned')
            formatted.append(f"- {task} (Owner: {assignee})")
        else:
            formatted.append(f"- {str(item)}")
    return "\n".join(formatted)

def setup_whisper():
    """Installs Whisper dependencies"""
    print("\n=== Setting up Whisper ===")
    !pip install git+https://github.com/openai/whisper.git
    !sudo apt update && sudo apt install ffmpeg

    uploaded_audio = files.upload()
    if uploaded_audio:
        audio_file = list(uploaded_audio.keys())[0]
        print(f"🔊 Audio sample: {audio_file}")
        return audio_file
    return None

def main():
    """Main workflow execution"""
    if not openai.api_key:
        print("❌ OpenAI API key missing")
        return

    logs = {"steps": [], "errors": []}

    # Test Notion connection
    if NOTION_TOKEN and NOTION_DB_ID:
        if not test_notion_connection():
            print("⚠️ Notion connection failed")

    try:
        # Process input
        cleaned_text, segments = handle_transcript_input()
        logs["steps"].append({
            "step": "Text input",
            "segment_count": len(segments),
            "status": "success"
        })

        # Detect language
        try:
            language = detect(cleaned_text[:500]) if cleaned_text else 'en'
        except LangDetectException:
            language = 'en'

        # GPT analysis
        gpt_results, tokens_used = analyze_with_gpt(cleaned_text, language)

        if "error" in gpt_results:
            logs["steps"].append({
                "step": "GPT analysis",
                "status": "failed",
                "error": gpt_results["error"]
            })
            print(f"❌ GPT failed: {gpt_results['error']}")
            return
        else:
            logs["steps"].append({
                "step": "GPT analysis",
                "tokens_used": tokens_used,
                "meeting_title": gpt_results.get("meeting_title"),
                "participants_count": len(gpt_results.get("participants", [])),
                "meeting_type": gpt_results.get("meeting_type"),
                "action_items_count": len(gpt_results.get("action_items", [])),
                "status": "success"
            })

        # Add date and sync to Notion
        gpt_results["date"] = datetime.datetime.now().isoformat()
        notion_success = create_notion_entry(gpt_results)
        logs["steps"].append({
            "step": "Notion sync",
            "status": "success" if notion_success else "failed"
        })

        # Save logs
        with open("meeting_logs.json", "w") as f:
            json.dump(logs, f, indent=2)

        print("\n✅ Process complete! Logs saved")

    except Exception as e:
        logs["errors"].append(str(e))
        print(f"\n❌ Process error: {str(e)}")
        with open("error_log.json", "w") as f:
            json.dump(logs, f, indent=2)

if __name__ == "__main__":
    main()

✅ OpenAI API key set
✅ Notion connection successful!
DB Name: 📄 Meeting Logs
DB Properties: ['Date & Duration', 'Key Points', 'Platform', 'Participant', 'Summary', 'Action Items', 'Meeting Type', 'Meeting Title']

=== Handling Transcript Input ===
Choose input method (1-upload, 2-paste): 1


Saving 会议记录.docx to 会议记录 (7).docx
✅ Uploaded: 会议记录 (7).docx
📝 Processed text: 1 segments

=== Analyzing with GPT ===
✅ GPT analysis complete! Tokens: 1432
Meeting title: 项目进度汇报与讨论
Participants: 6
Meeting type: 项目进度汇报与讨论
Action items: 3

=== Syncing to Notion ===
✅ Notion entry created! ID: 2305fee1-8e37-81a0-a874-f76428a7e501

✅ Process complete! Logs saved
