<a href="https://colab.research.google.com/github/yuuu125/Lunette/blob/main/AI_Assistent1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install openai==0.28.1 python-docx notion-client langdetect pydub
!pip install git+https://github.com/openai/whisper.git
!sudo apt update && sudo apt install ffmpeg  # 确保安装必要的依赖

import os
import re
import json
import openai
import whisper
from docx import Document
from google.colab import files, userdata
from notion_client import Client
from langdetect import detect, LangDetectException
import datetime
import tempfile
import torch
from pydub import AudioSegment
import subprocess

try:
    OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')
    NOTION_TOKEN = userdata.get('NOTION_TOKEN')
    NOTION_DB_ID = userdata.get('NOTION_DB_ID')

    if not OPENAI_API_KEY:
        raise ValueError("OPENAI_API_KEY not set")
    if not NOTION_TOKEN:
        print("⚠️ Notion token missing - feature disabled")
    if not NOTION_DB_ID:
        print("⚠️ Notion DB ID missing - feature disabled")

    openai.api_key = OPENAI_API_KEY
    print("✅ OpenAI API key set")

except Exception as e:
    print(f"❌ Key retrieval failed: {str(e)}")

def get_audio_duration(audio_path):
    """使用ffmpeg获取精确音频时长（秒）"""
    try:
        result = subprocess.run(
            ["ffprobe", "-v", "error", "-show_entries", "format=duration",
             "-of", "default=noprint_wrappers=1:nokey=1", audio_path],
            capture_output=True, text=True
        )
        return float(result.stdout)
    except Exception as e:
        print(f"⚠️ 无法获取精确时长，使用估算值: {e}")
        # 估算：8KB/s 是常见音频比特率
        return max(30, os.path.getsize(audio_path) // 8000)

def transcribe_audio(audio_path, model_size="base"):
    """使用Whisper转录音频文件"""
    print(f"🔊 Starting transcription with Whisper ({model_size} model)...")

    try:
        # 检查GPU加速
        device = "cuda" if torch.cuda.is_available() else "cpu"
        print(f"💻 Using device: {device.upper()}")

        # 加载模型
        model = whisper.load_model(model_size, device=device)
        print(f"✅ Loaded Whisper {model_size} model")

        # 转录音频
        result = model.transcribe(
            audio_path,
            fp16=(device == "cuda"),
            verbose=True,
            task="transcribe"
        )

        transcription = result["text"]
        print(f"✅ Transcription complete! Characters: {len(transcription)}")
        return transcription

    except Exception as e:
        print(f"❌ Transcription failed: {str(e)}")
        raise

def test_notion_connection():
    """测试Notion连接是否有效"""
    try:
        notion = Client(auth=NOTION_TOKEN)
        notion.databases.retrieve(database_id=NOTION_DB_ID)
        print("✅ Notion connection verified")
        return True
    except Exception as e:
        print(f"❌ Notion connection failed: {str(e)}")
        return False

def clean_transcript(text):
    """Cleans raw transcript text"""
    text = re.sub(r'\d{1,2}:\d{2}:\d{2}', '', text)
    text = re.sub(r'Speaker\s*\d+:?', '', text)
    return re.sub(r'\n\s*\n', '\n\n', text).strip()

def segment_text(text):
    """Segments text into paragraphs"""
    return [p.strip() for p in text.split('\n\n') if p.strip()]

def handle_transcript_input():
    """Handles transcript input methods"""
    print("\n=== Handling Transcript Input ===")
    print("Choose input method:")
    print("1 - Upload text file (.txt or .docx)")
    print("2 - Paste text directly")
    print("3 - Upload audio file (transcribe with Whisper)")

    input_method = input("Your choice (1/2/3): ")
    transcript_text = ""

    # 文本文件上传
    if input_method == "1":
        uploaded = files.upload()
        if not uploaded:
            print("⚠️ No files uploaded, switching to paste")
            transcript_text = input("Paste meeting transcript: ")
        else:
            filename = list(uploaded.keys())[0]
            print(f"✅ Uploaded: {filename}")

            # 文本文件处理
            if filename.endswith('.txt'):
                transcript_text = uploaded[filename].decode('utf-8')

            # DOCX处理
            elif filename.endswith('.docx'):
                with tempfile.NamedTemporaryFile(delete=False, suffix='.docx') as tmp:
                    tmp.write(uploaded[filename])
                    tmp_path = tmp.name

                doc = Document(tmp_path)
                transcript_text = "\n".join([para.text for para in doc.paragraphs])
                os.unlink(tmp_path)
            else:
                raise ValueError("Unsupported file format")

    # 文本粘贴
    elif input_method == "2":
        transcript_text = input("Paste meeting transcript: ")

    # 音频文件处理
    elif input_method == "3":
        uploaded_audio = files.upload()
        if not uploaded_audio:
            print("⚠️ No audio files uploaded, switching to text paste")
            transcript_text = input("Paste meeting transcript: ")
        else:
            filename = list(uploaded_audio.keys())[0]
            print(f"✅ Uploaded audio: {filename}")

            # 创建临时文件
            with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(filename)[1]) as tmp:
                tmp.write(uploaded_audio[filename])
                audio_path = tmp.name

            print("\n⚡ Select transcription speed:")
            print("1 - Fast (tiny model, fastest, lower accuracy)")
            print("2 - Balanced (base model, recommended)")
            print("3 - High Quality (small model, slower)")

            speed_choice = input("Your choice (1/2/3): ") or "2"
            model_map = {"1": "tiny", "2": "base", "3": "small"}
            model_size = model_map.get(speed_choice, "base")

            # 获取音频时长
            try:
                duration = get_audio_duration(audio_path)
                print(f"⏱ Audio duration: {duration//60:.0f}m {duration%60:.0f}s")

                # 时间估算
                time_estimates = {"tiny": 0.3, "base": 0.8, "small": 2.0}
                est_sec = duration * time_estimates[model_size]
                print(f"⏳ Estimated processing time: ~{est_sec//60:.0f}m {est_sec%60:.0f}s")
            except Exception as e:
                print(f"⚠️ Duration estimation failed: {e}")

            # 转录音频
            transcript_text = transcribe_audio(audio_path, model_size)

            # 清理临时文件
            os.unlink(audio_path)

    else:
        print("⚠️ Invalid option, defaulting to text paste")
        transcript_text = input("Paste meeting transcript: ")

    cleaned_text = clean_transcript(transcript_text)
    segments = segment_text(cleaned_text)

    print(f"📝 Processed text: {len(segments)} segments, {len(cleaned_text)} characters")
    return cleaned_text, segments

def analyze_with_gpt(text, language='en'):
    """Analyzes text with GPT API"""
    print("\n=== Analyzing with GPT ===")

    if not openai.api_key:
        print("❌ OpenAI API key missing")
        return {"error": "OpenAI API key not set", "fallback_used": True}, 0

    # Language mapping
    lang_map = {'zh': 'Chinese', 'es': 'Spanish', 'fr': 'French', 'en': 'English'}
    lang_name = lang_map.get(language[:2], 'English')

    # System prompt setup
    system_prompt = f"""
    You are a professional meeting analyst. Extract key information:
    - Respond in {lang_name}
    - Use this JSON format:
    {{
        "meeting_title": "Meeting Title",
        "participants": ["Attendee1", "Attendee2"],
        "summary": "Meeting summary",
        "action_items": [{{"task": "Task", "assignee": "Owner"}}],
        "key_points": {{
            "concerns": [],
            "decisions": [],
            "deadlines": [],
            "updates": []
        }},
        "meeting_type": "Meeting type",
        "platform": "Platform",
        "fallback_used": false
    }}

    Extraction rules:
    1. meeting_title: Extract from start/end or generate
    2. participants: Extract all attendees
    3. Focus on meeting start/end sections
    """

    user_prompt = f"Meeting transcript:\n{text[:10000]}"

    try:
        # GPT API call
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt}
            ],
            temperature=0.3
        )

        content = response.choices[0].message['content']
        result = json.loads(content)
        tokens_used = response.usage['total_tokens']

        print(f"✅ GPT analysis complete! Tokens: {tokens_used}")
        print(f"Meeting title: {result.get('meeting_title', 'N/A')}")
        print(f"Participants: {len(result.get('participants', []))}")
        print(f"Meeting type: {result.get('meeting_type', 'N/A')}")
        print(f"Action items: {len(result.get('action_items', []))}")

        # Fallback for action items
        if not result.get('action_items'):
            result['fallback_used'] = True
            print("⚠️ No action items detected")

        return result, tokens_used

    except Exception as e:
        print(f"❌ GPT analysis failed: {str(e)}")
        return {
            "error": str(e),
            "fallback_used": True
        }, 0

def create_notion_entry(meeting_data):
    """Creates Notion database entry"""
    if not NOTION_TOKEN or not NOTION_DB_ID:
        print("⚠️ Notion config incomplete - skipping")
        return False

    print("\n=== Syncing to Notion ===")

    try:
        notion = Client(auth=NOTION_TOKEN)

        # Prepare properties
        properties = {
            "Meeting Title": {"title": [{"text": {"content": meeting_data.get("meeting_title", "Untitled")}}]},
            "Participant": {"rich_text": [{"text": {"content": ", ".join(meeting_data.get("participants", ["Unknown"]))}}]},
            "Date & Duration": {"date": {"start": meeting_data.get("date", datetime.datetime.now().isoformat())}},
            "Meeting Type": {"rich_text": [{"text": {"content": meeting_data.get("meeting_type", "Other")}}]},
            "Platform": {"select": {"name": meeting_data.get("platform", "Unknown")}},
            "Summary": {"rich_text": [{"text": {"content": meeting_data.get("summary", "")}}]},
            "Key Points": {"rich_text": [{"text": {"content": format_key_points(meeting_data)}}]},
            "Action Items": {"rich_text": [{"text": {"content": format_action_items(meeting_data)}}]},
        }

        # Create entry
        new_page = notion.pages.create(
            parent={"database_id": NOTION_DB_ID},
            properties=properties
        )

        print(f"✅ Notion entry created! ID: {new_page['id']}")
        return True
    except Exception as e:
        print(f"❌ Notion sync failed: {str(e)}")
        return False

def format_key_points(data):
    """Formats key points for Notion"""
    points = []
    key_points = data.get("key_points", {})
    for category, items in key_points.items():
        if items and isinstance(items, list):
            points.append(f"{category.upper()}:")
            points.extend([f"- {item}" for item in items])
    return "\n".join(points)

def format_action_items(data):
    """Formats action items for Notion"""
    action_items = data.get("action_items", [])
    if not action_items or not isinstance(action_items, list):
        return "No action items"

    formatted = []
    for item in action_items:
        if isinstance(item, dict):
            task = item.get('task', 'Unknown task')
            assignee = item.get('assignee', 'Unassigned')
            formatted.append(f"- {task} (Owner: {assignee})")
        else:
            formatted.append(f"- {str(item)}")
    return "\n".join(formatted)

def main():
    """Main workflow execution"""
    if not openai.api_key:
        print("❌ OpenAI API key missing")
        return

    logs = {"steps": [], "errors": []}

    # Test Notion connection
    if NOTION_TOKEN and NOTION_DB_ID:
        if not test_notion_connection():
            print("⚠️ Notion connection failed")

    try:
        # Process input
        cleaned_text, segments = handle_transcript_input()
        logs["steps"].append({
            "step": "Text input",
            "segment_count": len(segments),
            "status": "success"
        })

        # Detect language
        try:
            language = detect(cleaned_text[:500]) if cleaned_text else 'en'
        except LangDetectException:
            language = 'en'
        print(f"🌐 Detected language: {language}")

        # GPT analysis
        gpt_results, tokens_used = analyze_with_gpt(cleaned_text, language)

        if "error" in gpt_results:
            logs["steps"].append({
                "step": "GPT analysis",
                "status": "failed",
                "error": gpt_results["error"]
            })
            print(f"❌ GPT failed: {gpt_results['error']}")
            return
        else:
            logs["steps"].append({
                "step": "GPT analysis",
                "tokens_used": tokens_used,
                "meeting_title": gpt_results.get("meeting_title"),
                "participants_count": len(gpt_results.get("participants", [])),
                "meeting_type": gpt_results.get("meeting_type"),
                "action_items_count": len(gpt_results.get("action_items", [])),
                "status": "success"
            })

        # Add date and sync to Notion
        gpt_results["date"] = datetime.datetime.now().isoformat()
        notion_success = create_notion_entry(gpt_results)
        logs["steps"].append({
            "step": "Notion sync",
            "status": "success" if notion_success else "failed"
        })

        # Save logs
        with open("meeting_logs.json", "w") as f:
            json.dump(logs, f, indent=2)

        print("\n✅ Process complete! Logs saved")

    except Exception as e:
        logs["errors"].append(str(e))
        print(f"\n❌ Process error: {str(e)}")
        with open("error_log.json", "w") as f:
            json.dump(logs, f, indent=2)

if __name__ == "__main__":
    main()

Collecting openai==0.28.1
  Downloading openai-0.28.1-py3-none-any.whl.metadata (11 kB)
Collecting python-docx
  Downloading python_docx-1.2.0-py3-none-any.whl.metadata (2.0 kB)
Collecting notion-client
  Downloading notion_client-2.4.0-py2.py3-none-any.whl.metadata (11 kB)
Collecting langdetect
  Downloading langdetect-1.0.9.tar.gz (981 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m981.5/981.5 kB[0m [31m18.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Downloading openai-0.28.1-py3-none-any.whl (76 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.0/77.0 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading python_docx-1.2.0-py3-none-any.whl (252 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m253.0/253.0 kB[0m [31m16.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading notion_client-2.4.0-py2.py3-none-any.whl (13 kB)
Building wheels for collected packages: langdetect
  Buil

Saving test.mp3 to test.mp3
✅ Uploaded audio: test.mp3

⚡ Select transcription speed:
1 - Fast (tiny model, fastest, lower accuracy)
2 - Balanced (base model, recommended)
3 - High Quality (small model, slower)
Your choice (1/2/3): 1
⏱ Audio duration: 52m 36s
⏳ Estimated processing time: ~15m 47s
🔊 Starting transcription with Whisper (tiny model)...
💻 Using device: CPU


100%|██████████████████████████████████████| 72.1M/72.1M [00:00<00:00, 104MiB/s]


✅ Loaded Whisper tiny model
Detecting language using up to the first 30 seconds. Use `--language` to specify the language
Detected language: English
[00:00.000 --> 00:04.640]  Support for this podcast and the following message come from Sierra Nevada Brewing Company,
[00:04.640 --> 00:08.560]  where pure ingredients and sustainable brewing meet a legacy of craft.
[00:08.560 --> 00:11.560]  Share one with a friend today and taste for yourself.
[00:11.560 --> 00:15.280]  Sierra Nevada, taste what matters, please drink responsibly.
[00:20.080 --> 00:27.280]  From NPR and WBEZ Chicago, this is Weight Weight Don't Tell Me The NPR News Quiz.
[00:27.520 --> 00:32.560]  I'm the guy whose voice is bigger than John Hencock's signature.
[00:36.880 --> 00:42.000]  Bill Pernive, saying here is your host at the Stutabaker Theatre in downtown Chicago.
[00:42.000 --> 00:43.440]  Peter, say go.
[00:43.440 --> 00:44.560]  Thank you, Bill.
[00:44.560 --> 00:45.680]  Thank you, everybody.
[00:46.640 --> 0

In [1]:
# 更新安装命令部分
!pip uninstall -y langchain langchain-core langchain-community langchain-openai openai

# 安装兼容版本的核心包
!pip install langchain==0.2.0
!pip install langchain-core==0.2.1
!pip install langchain-community==0.2.0
!pip install langchain-openai==0.1.7  # 兼容版本
!pip install openai==0.28.1  # 保持此版本

# 其他依赖保持不变
!pip install tqdm python-docx notion-client langdetect pydub
!pip install git+https://github.com/openai/whisper.git
!sudo apt update && sudo apt install ffmpeg -y

import os
import re
import json
import openai
import whisper
from docx import Document
from google.colab import files, userdata
from notion_client import Client
from langdetect import detect, LangDetectException
import datetime
import tempfile
import torch
from pydub import AudioSegment
import subprocess
from tqdm import tqdm
from langchain_core.prompts import PromptTemplate
from langchain.chains import LLMChain, SimpleSequentialChain
from langchain_community.llms import OpenAI
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field

# ======================
# 初始化设置（增强错误处理）
# ======================
try:
    OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')
    NOTION_TOKEN = userdata.get('NOTION_TOKEN')
    NOTION_DB_ID = userdata.get('NOTION_DB_ID')
    NOTION_PAGE_ID = userdata.get('NOTION_PAGE_ID')

    # 更严格的凭证检查
    missing_creds = []
    if not OPENAI_API_KEY:
        missing_creds.append("OPENAI_API_KEY")
    if not NOTION_TOKEN:
        missing_creds.append("NOTION_TOKEN")
    if not NOTION_DB_ID:
        missing_creds.append("NOTION_DB_ID")
    if not NOTION_PAGE_ID:
        missing_creds.append("NOTION_PAGE_ID")

    if missing_creds:
        raise ValueError(f"Missing credentials: {', '.join(missing_creds)}")

    openai.api_key = OPENAI_API_KEY
    print("✅ All credentials set")

except Exception as e:
    print(f"❌ Key retrieval failed: {str(e)}")
    print("\n🔧 Setup Instructions:")
    print("1. Click the key icon on the left sidebar (Colab secrets)")
    print("2. Add the following secrets:")
    print("   - OPENAI_API_KEY: Your OpenAI API key")
    print("   - NOTION_TOKEN: Your Notion integration token")
    print("   - NOTION_DB_ID: ID of your Notion database")
    print("   - NOTION_PAGE_ID: ID of the parent page for reports")
    print("3. Rerun this cell after adding secrets")
    raise

# ======================
# 日志系统增强
# ======================
class MeetingLogger:
    def __init__(self):
        self.logs = {
            "start_time": datetime.datetime.now().isoformat(),
            "steps": [],
            "errors": [],
            "metrics": {}
        }

    def log_step(self, step_name, status, details=None, error=None):
        """记录处理步骤"""
        entry = {
            "step": step_name,
            "timestamp": datetime.datetime.now().isoformat(),
            "status": status
        }
        if details:
            entry["details"] = details
        if error:
            entry["error"] = str(error)
        self.logs["steps"].append(entry)

    def log_metric(self, name, value):
        """记录性能指标"""
        self.logs["metrics"][name] = value

    def save_logs(self, filename="meeting_logs.json"):
        """保存日志到文件"""
        with open(filename, "w") as f:
            json.dump(self.logs, f, indent=2)
        return filename

    def get_console_log(self):
        """生成控制台友好的日志摘要"""
        log_str = f"=== Meeting Processing Log ===\n"
        log_str += f"Start Time: {self.logs['start_time']}\n"

        for step in self.logs["steps"]:
            status_icon = "✅" if step["status"] == "success" else "❌"
            log_str += f"{status_icon} [{step['timestamp']}] {step['step']}"
            if "details" in step:
                log_str += f" - {step['details']}"
            if step["status"] == "failed":
                log_str += f" - ERROR: {step.get('error', 'Unknown')}"
            log_str += "\n"

        if self.logs["metrics"]:
            log_str += "\n=== Metrics ===\n"
            for metric, value in self.logs["metrics"].items():
                log_str += f"- {metric}: {value}\n"

        return log_str

# 初始化全局日志器
logger = MeetingLogger()

# ======================
# 音频处理增强
# ======================
def get_audio_duration(audio_path):
    """获取音频时长并记录指标"""
    try:
        result = subprocess.run(
            ["ffprobe", "-v", "error", "-show_entries", "format=duration",
             "-of", "default=noprint_wrappers=1:nokey=1", audio_path],
            capture_output=True, text=True
        )
        duration = float(result.stdout)
        logger.log_metric("audio_duration_sec", duration)
        return duration
    except Exception as e:
        logger.log_step("Audio Duration", "warning", error=e)
        return max(30, os.path.getsize(audio_path) // 8000)

def transcribe_audio(audio_path, model_size="base"):
    """使用Whisper转录音频并返回语言信息"""
    logger.log_step("Whisper Transcription", "started",
                   {"model_size": model_size, "audio_path": audio_path})

    try:
        device = "cuda" if torch.cuda.is_available() else "cpu"
        logger.log_step("Hardware Check", "success", {"device": device})

        model = whisper.load_model(model_size, device=device)
        logger.log_step("Model Loading", "success")

        result = model.transcribe(
            audio_path,
            fp16=(device == "cuda"),
            verbose=False,
            task="transcribe"
        )

        transcription = result["text"]
        detected_lang = result["language"]
        logger.log_step("Whisper Transcription", "success", {
            "characters": len(transcription),
            "detected_language": detected_lang
        })

        return transcription, detected_lang

    except Exception as e:
        logger.log_step("Whisper Transcription", "failed", error=e)
        raise

# ======================
# LangChain模块化 (更新为使用Pydantic模型)
# ======================
class MeetingAnalysis(BaseModel):
    meeting_title: str = Field(description="Meeting title extracted from context")
    participants: list[str] = Field(description="List of participant names")
    summary: str = Field(description="Comprehensive meeting summary in 3-5 paragraphs")
    key_points: dict = Field(description="Key points organized by category: concerns, decisions, updates, risks")
    action_items: list[dict] = Field(description="List of action items with task, assignee and due date")
    meeting_type: str = Field(description="Type of meeting e.g., project update, client call, team sync")
    platform: str = Field(description="Platform where meeting occurred e.g., Zoom, Teams, WhatsApp")

def setup_langchain_chains(language='en'):
    """创建LangChain处理链"""
    # 多语言提示模板
    lang_map = {
        'en': "Analyze this meeting transcript in English",
        'zh': "用中文分析此会议记录",
        'es': "Analiza esta transcripción de reunión en español",
        'fr': "Analysez cette transcription de réunion en français"
    }
    lang_instruction = lang_map.get(language[:2], lang_map['en'])

    # 设置输出解析器
    parser = JsonOutputParser(pydantic_object=MeetingAnalysis)

    # 创建提示模板
    prompt_template = PromptTemplate(
        template="""
        {lang_instruction}:

        {format_instructions}

        ### Meeting Transcript:
        {transcript}

        ### Analysis Guidelines:
        1. Identify all participants mentioned
        2. Extract meeting title from context or create descriptive one
        3. Group key points by category:
           - Concerns: Any risks or worries expressed
           - Decisions: Formal agreements made
           - Updates: Project or task progress reports
           - Risks: Potential future problems identified
        4. Action items must include: task description, assignee, and due date (if mentioned)
        5. Determine meeting type and platform from context clues
        """,
        input_variables=["transcript"],
        partial_variables={
            "lang_instruction": lang_instruction,
            "format_instructions": parser.get_format_instructions()
        }
    )

      # 创建处理链 - 使用兼容的初始化方式
    llm = ChatOpenAI(
        openai_api_key=OPENAI_API_KEY,  # 直接传递api_key
        temperature=0.3,
        model="gpt-3.5-turbo"    # 使用model参数
    )

    analysis_chain = LLMChain(llm=llm, prompt=prompt_template, output_key="analysis")

    return analysis_chain, parser

# ======================
# Notion报告增强
# ======================
def create_notion_report_page(meeting_data, transcript, logs):
    """创建详细的Notion报告页面"""
    logger.log_step("Notion Report", "started")

    try:
        notion = Client(auth=NOTION_TOKEN)

        # 验证父页面是否存在
        try:
            parent_page = notion.pages.retrieve(NOTION_PAGE_ID)
            logger.log_step("Parent Page Check", "success",
                          {"title": parent_page['properties']['title']['title'][0]['plain_text']})
        except Exception as e:
            logger.log_step("Parent Page Check", "failed", error=e)
            raise ValueError(f"Parent page {NOTION_PAGE_ID} not found or inaccessible")

        # 创建子页面
        new_page = notion.pages.create(
            parent={"page_id": NOTION_PAGE_ID},
            properties={
                "title": {
                    "title": [
                        {
                            "text": {
                                "content": meeting_data.get("meeting_title", "Meeting Report")[:200]
                            }
                        }
                    ]
                }
            }
        )
        page_id = new_page["id"]
        logger.log_step("Page Created", "success", {"page_id": page_id})

        # 添加报告内容块
        children_blocks = []

        # 1. 元数据部分
        children_blocks.append({
            "object": "block",
            "type": "heading_2",
            "heading_2": {"rich_text": [{"text": {"content": "Meeting Details"}}]}
        })

        details_text = f"""
        **Date**: {meeting_data.get('date', 'N/A')}
        **Participants**: {', '.join(meeting_data.get('participants', []))}
        **Language**: {meeting_data.get('language', 'Unknown')}
        **Platform**: {meeting_data.get('platform', 'Unknown')}
        **Meeting Type**: {meeting_data.get('meeting_type', 'N/A')}
        """
        children_blocks.append({
            "object": "block",
            "type": "paragraph",
            "paragraph": {"rich_text": [{"text": {"content": details_text.strip()}}]}
        })

        # 2. 摘要部分
        children_blocks.append({
            "object": "block",
            "type": "heading_2",
            "heading_2": {"rich_text": [{"text": {"content": "Summary"}}]}
        })
        children_blocks.append({
            "object": "block",
            "type": "paragraph",
            "paragraph": {"rich_text": [{"text": {"content": meeting_data.get('summary', '')}}]}
        })

        # 3. 关键点部分
        children_blocks.append({
            "object": "block",
            "type": "heading_2",
            "heading_2": {"rich_text": [{"text": {"content": "Key Points"}}]}
        })

        key_points = meeting_data.get('key_points', {})
        for category, items in key_points.items():
            children_blocks.append({
                "object": "block",
                "type": "heading_3",
                "heading_3": {"rich_text": [{"text": {"content": category.capitalize()}}]}
            })

            if items:
                for item in items:
                    children_blocks.append({
                        "object": "block",
                        "type": "bulleted_list_item",
                        "bulleted_list_item": {"rich_text": [{"text": {"content": item}}]}
                    })

        # 4. 行动项表格
        children_blocks.append({
            "object": "block",
            "type": "heading_2",
            "heading_2": {"rich_text": [{"text": {"content": "Action Items"}}]}
        })

        # 创建表格
        table_rows = []
        for idx, item in enumerate(meeting_data.get('action_items', [])):
            task = item.get('task', '')
            assignee = item.get('assignee', 'Unassigned')
            due_date = item.get('due_date', 'N/A')

            table_rows.append([
                [{"text": {"content": str(idx+1)}}],
                [{"text": {"content": task}}],
                [{"text": {"content": assignee}}],
                [{"text": {"content": due_date}}]
            ])

        children_blocks.append({
            "object": "block",
            "type": "table",
            "table": {
                "table_width": 4,
                "has_column_header": True,
                "has_row_header": False,
                "children": [
                    {
                        "object": "block",
                        "type": "table_row",
                        "table_row": {
                            "cells": [
                                [{"text": {"content": "#"}}],
                                [{"text": {"content": "Task"}}],
                                [{"text": {"content": "Assignee"}}],
                                [{"text": {"content": "Due Date"}}]
                            ]
                        }
                    },
                    *[{
                        "object": "block",
                        "type": "table_row",
                        "table_row": {"cells": cells}
                    } for cells in table_rows]
                ]
            }
        })

        # 5. 日志部分（可选）
        children_blocks.append({
            "object": "block",
            "type": "heading_2",
            "heading_2": {"rich_text": [{"text": {"content": "Processing Logs"}}]}
        })
        children_blocks.append({
            "object": "block",
            "type": "code",
            "code": {"rich_text": [{"text": {"content": logger.get_console_log()}}]}
        })

        # 添加所有块到页面
        notion.blocks.children.append(
            block_id=page_id,
            children=children_blocks
        )

        # 返回页面URL
        report_url = new_page.get("url", "")
        logger.log_step("Notion Report", "success", {"url": report_url})

        # 添加数据库属性
        if NOTION_DB_ID:
            notion.pages.update(
                page_id=page_id,
                properties={
                    "Database Relation": {
                        "relation": [{"id": NOTION_DB_ID}]
                    }
                }
            )

        return report_url

    except Exception as e:
        # 更详细的错误日志
        error_details = f"Notion API error: {str(e)}"
        if hasattr(e, 'response') and hasattr(e.response, 'content'):
            error_details += f"\nResponse: {e.response.content.decode('utf-8')}"

        logger.log_step("Notion Report", "failed", error=error_details)
        print(f"❌ Notion failed: {error_details}")
        return None


# ======================
# 主流程函数修改
# ======================
def handle_transcript_input():
    """处理转录输入（增强版）"""
    logger.log_step("Input Handling", "started")

    print("\n=== Input Method ===")
    print("1: Upload audio file (.mp3/.wav/.m4a/.opus)")
    print("2: Upload text file (.txt/.docx)")
    print("3: Paste text directly")

    choice = input("Select input method (1/2/3): ").strip()

    if choice == "1":
        # 音频处理流程
        uploaded = files.upload()
        if not uploaded:
            logger.log_step("Audio Upload", "failed", "No files uploaded")
            print("No files uploaded, switching to text input")
            return handle_transcript_input()

        filename = next(iter(uploaded.keys()))
        logger.log_step("Audio Upload", "success", {"filename": filename, "size": len(uploaded[filename])})

        # 保存临时文件
        ext = os.path.splitext(filename)[1].lower()
        supported_audio = ['.mp3', '.wav', '.m4a', '.opus']
        if ext not in supported_audio:
            logger.log_step("Audio Processing", "failed", f"Unsupported format: {ext}")
            raise ValueError(f"Unsupported audio format: {ext}")

        with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as tmp:
            tmp.write(uploaded[filename])
            audio_path = tmp.name

        # 模型选择
        print("\n⚡ Transcription Model:")
        print("1: Fast (tiny, low accuracy)")
        print("2: Balanced (base, recommended)")
        print("3: High Quality (small, slow)")
        model_choice = input("Select model (1/2/3): ").strip() or "2"
        model_map = {"1": "tiny", "2": "base", "3": "small"}
        model_size = model_map.get(model_choice, "base")

        # 转录音频
        duration = get_audio_duration(audio_path)
        logger.log_metric("transcription_model", model_size)
        transcript, detected_lang = transcribe_audio(audio_path, model_size)
        os.unlink(audio_path)

        return transcript, detected_lang

    elif choice == "2":
        # 文本文件处理
        uploaded = files.upload()
        if not uploaded:
            logger.log_step("Text Upload", "failed", "No files uploaded")
            print("No files uploaded, switching to paste")
            return handle_transcript_input()

        filename = next(iter(uploaded.keys()))
        logger.log_step("Text Upload", "success", {"filename": filename})

        if filename.endswith('.txt'):
            transcript = uploaded[filename].decode('utf-8')
        elif filename.endswith('.docx'):
            with tempfile.NamedTemporaryFile(delete=False, suffix='.docx') as tmp:
                tmp.write(uploaded[filename])
                doc = Document(tmp.name)
                transcript = "\n".join(p.text for p in doc.paragraphs)
                os.unlink(tmp.name)
        else:
            logger.log_step("Text Processing", "failed", f"Unsupported format: {filename}")
            raise ValueError("Unsupported file format")

        # 检测语言
        try:
            lang = detect(transcript[:500])
            logger.log_step("Language Detection", "success", {"language": lang})
            return transcript, lang
        except LangDetectException as e:
            logger.log_step("Language Detection", "warning", "Using default English")
            return transcript, 'en'

    elif choice == "3":
        # 直接粘贴文本
        transcript = input("Paste meeting transcript: ")
        logger.log_step("Text Input", "success", {"length": len(transcript)})

        try:
            lang = detect(transcript[:500])
            logger.log_step("Language Detection", "success", {"language": lang})
            return transcript, lang
        except LangDetectException as e:
            logger.log_step("Language Detection", "warning", "Using default English")
            return transcript, 'en'

    else:
        logger.log_step("Input Handling", "failed", "Invalid choice")
        print("Invalid choice, defaulting to audio upload")
        return handle_transcript_input()

def analyze_meeting(transcript, language='en'):
    """使用LangChain分析会议内容"""
    logger.log_step("Meeting Analysis", "started", {"language": language})

    try:
        # 获取链和解析器
        analysis_chain, parser = setup_langchain_chains(language)
        raw_output = analysis_chain.run(transcript[:15000])  # 限制长度

        # 解析结构化输出
        parsed = parser.parse(raw_output)
        parsed["language"] = language
        parsed["date"] = datetime.datetime.now().isoformat()

        # 验证行动项
        if not parsed.get("action_items"):
            logger.log_step("Action Items", "warning", "No action items detected")
            parsed["fallback_used"] = True
        else:
            parsed["fallback_used"] = False

        logger.log_step("Meeting Analysis", "success", {
            "title": parsed["meeting_title"],
            "participants": len(parsed["participants"]),
            "action_items": len(parsed["action_items"])
        })

        return parsed

    except Exception as e:
        logger.log_step("Meeting Analysis", "failed", error=e)
        return {
            "error": str(e),
            "fallback_used": True
        }

def create_notion_database_entry(meeting_data, report_url=None):
    """创建Notion数据库条目"""
    if not NOTION_TOKEN or not NOTION_DB_ID:
        logger.log_step("Notion DB Entry", "skipped", "Missing credentials")
        return False

    try:
        notion = Client(auth=NOTION_TOKEN)

        properties = {
            "Meeting Title": {"title": [{"text": {"content": meeting_data.get("meeting_title", "Untitled")}}]},
            "Participants": {"rich_text": [{"text": {"content": ", ".join(meeting_data.get("participants", []))}}]},
            "Date": {"date": {"start": meeting_data.get("date", datetime.datetime.now().isoformat())}},
            "Meeting Type": {"select": {"name": meeting_data.get("meeting_type", "Unknown")}},
            "Platform": {"select": {"name": meeting_data.get("platform", "Unknown")}},
            "Language": {"select": {"name": meeting_data.get("language", "Unknown")}},
            "Status": {"status": {"name": "Processed"}},
        }

        if report_url:
            properties["Report"] = {"url": report_url}

        new_entry = notion.pages.create(
            parent={"database_id": NOTION_DB_ID},
            properties=properties
        )

        entry_id = new_entry["id"]
        logger.log_step("Notion DB Entry", "success", {"entry_id": entry_id})
        return True

    except Exception as e:
        logger.log_step("Notion DB Entry", "failed", error=e)
        return False

# ======================
# 主函数重构
# ======================
def main():
    """主工作流"""
    logger.log_step("Workflow", "started")

    try:
        # 检查Notion连接性
        try:
            notion = Client(auth=NOTION_TOKEN)
            user_info = notion.users.me()
            logger.log_step("Notion Connection", "success",
                          {"user": user_info['name'], "email": user_info.get('person', {}).get('email')})
            print(f"🔗 Connected to Notion as: {user_info['name']}")
        except Exception as e:
            logger.log_step("Notion Connection", "failed", error=e)
            print(f"❌ Notion connection failed: {str(e)}")
            raise

        # 处理输入
        transcript, language = handle_transcript_input()
        logger.log_metric("transcript_length", len(transcript))

        # 分析会议内容
        meeting_data = analyze_meeting(transcript, language)
        if "error" in meeting_data:
            raise RuntimeError(f"Analysis failed: {meeting_data['error']}")

        # 创建Notion报告
        report_url = create_notion_report_page(
            meeting_data,
            transcript,
            logger.logs
        )

        if not report_url:
            raise RuntimeError("Failed to create Notion report")

        # 创建数据库条目
        db_success = create_notion_database_entry(meeting_data, report_url)

        if not db_success:
            print("⚠️ Failed to create Notion database entry, but report page was created")

        # 保存日志
        log_file = logger.save_logs()
        print(f"\n✅ PROCESS COMPLETE! Logs saved to {log_file}")
        print(f"📄 Report Page: {report_url}")

        # 在Colab中提供直接链接
        from IPython.display import HTML
        display(HTML(f'<a href="{report_url}" target="_blank">Open Notion Report</a>'))

    except Exception as e:
        logger.log_step("Workflow", "failed", error=e)
        log_file = logger.save_logs("error_logs.json")
        print(f"\n❌ PROCESS FAILED! Error logs saved to {log_file}")
        print(f"Error details: {str(e)}")

if __name__ == "__main__":
    main()

Found existing installation: langchain 0.2.0
Uninstalling langchain-0.2.0:
  Successfully uninstalled langchain-0.2.0
Found existing installation: langchain-core 0.2.43
Uninstalling langchain-core-0.2.43:
  Successfully uninstalled langchain-core-0.2.43
[0mCollecting langchain==0.2.0
  Using cached langchain-0.2.0-py3-none-any.whl.metadata (13 kB)
Collecting langchain-core<0.3.0,>=0.2.0 (from langchain==0.2.0)
  Using cached langchain_core-0.2.43-py3-none-any.whl.metadata (6.2 kB)
Using cached langchain-0.2.0-py3-none-any.whl (973 kB)
Using cached langchain_core-0.2.43-py3-none-any.whl (397 kB)
Installing collected packages: langchain-core, langchain
Successfully installed langchain-0.2.0 langchain-core-0.2.43
Collecting langchain-core==0.2.1
  Using cached langchain_core-0.2.1-py3-none-any.whl.metadata (5.9 kB)
Using cached langchain_core-0.2.1-py3-none-any.whl (308 kB)
Installing collected packages: langchain-core
  Attempting uninstall: langchain-core
    Found existing installatio

Saving test.mp3 to test.mp3

⚡ Transcription Model:
1: Fast (tiny, low accuracy)
2: Balanced (base, recommended)
3: High Quality (small, slow)
Select model (1/2/3): 1
Detected language: English


100%|██████████| 315616/315616 [04:22<00:00, 1200.10frames/s]


❌ PROCESS FAILED! Error logs saved to error_logs.json
Error details: Analysis failed: 1 validation error for ChatOpenAI
__root__
  Client.__init__() got an unexpected keyword argument 'proxies' (type=type_error)



