In [None]:
from dotenv import load_dotenv
import os

# Load environment variables from .env
load_dotenv()

from openai import OpenAI

# Pull base URL and API key from environment variables
client = OpenAI(
    base_url=os.getenv("OPENAI_BASE_URL"),
    api_key=os.getenv("OPENAI_API_KEY")
)

import math
import argparse
import tempfile
from pathlib import Path
from pydub import AudioSegment


def split_audio(in_path: Path, chunk_minutes: int, tmp_dir: Path):
    audio = AudioSegment.from_file(in_path)
    chunk_ms = chunk_minutes * 60_000
    total_chunks = math.ceil(len(audio) / chunk_ms)
    paths = []
    for i in range(total_chunks):
        start = i * chunk_ms
        end = min((i + 1) * chunk_ms, len(audio))
        chunk = audio[start:end]
        out_path = tmp_dir / f"chunk_{i:03d}.mp3"
        chunk.export(out_path, format="mp3")
        paths.append(out_path)
    return paths


def transcribe_file(audio_path: Path, model: str):
    with open(audio_path, "rb") as f:
        tr = client.audio.transcriptions.create(model=model, file=f)
    return tr.text.strip()


def chat_summarize(text: str, model: str, system_prompt: str, instruction: str):
    messages = []
    if system_prompt:
        messages.append({"role": "system", "content": system_prompt})
    messages.append({"role": "user", "content": f"{instruction}\n\n{text}"})
    resp = client.chat.completions.create(model=model, messages=messages, temperature=0.2)
    return resp.choices[0].message.content.strip()


def main():
    parser = argparse.ArgumentParser(description="Chunk, transcribe, and summarize an audio file.")
    parser.add_argument("audio", type=Path, help="Path to input audio file (mp3, wav, m4a, etc.)")
    parser.add_argument("--chunk-minutes", type=int, default=10, help="Chunk length in minutes")
    parser.add_argument("--out-dir", type=Path, default=Path("out"), help="Directory for outputs")
    parser.add_argument("--whisper-model", default="whisper-1", help="OpenAI transcription model")
    parser.add_argument("--summary-model", default="gpt-4o-mini", help="OpenAI summarization model")
    parser.add_argument("--system-prompt-file", type=Path, default=None, help="Optional system prompt file")
    parser.add_argument("--keep-chunks", action="store_true", help="Keep temporary audio chunks")
    args = parser.parse_args()

    # read system prompt if provided
    system_prompt = ""
    if args.system_prompt_file and args.system_prompt_file.exists():
        system_prompt = args.system_prompt_file.read_text(encoding="utf-8")

    # ensure output dir
    args.out_dir.mkdir(parents=True, exist_ok=True)

    # make temp dir for audio chunks
    temp_dir_ctx = tempfile.TemporaryDirectory()
    tmp_dir = Path(temp_dir_ctx.name)

    print("Splitting audio...")
    chunk_paths = split_audio(args.audio, args.chunk_minutes, tmp_dir)
    print(f"Created {len(chunk_paths)} chunks")

    # transcribe each chunk
    chunk_transcripts = []
    for i, p in enumerate(chunk_paths):
        print(f"Transcribing chunk {i+1}/{len(chunk_paths)}: {p.name}")
        text = transcribe_file(p, args.whisper_model)
        chunk_transcripts.append(text)

    # save combined transcript
    combined_transcript = "\n\n".join(
        [f"[Chunk {i+1}]\n{text}" for i, text in enumerate(chunk_transcripts)]
    )
    (args.out_dir / "combined_transcript.txt").write_text(combined_transcript, encoding="utf-8")
    print(f"Wrote {args.out_dir / 'combined_transcript.txt'}")

    # summarize each chunk
    print("Summarizing chunks...")
    per_chunk_summaries = []
    for i, text in enumerate(chunk_transcripts):
        instruction = (
            "Summarize this transcript section into concise bullet points. "
            "Capture key topics, decisions, action items with owners and due dates if stated, "
            "and any open questions. Keep it faithful and factual."
        )
        s = chat_summarize(text, args.summary_model, system_prompt, instruction)
        per_chunk_summaries.append(f"[Chunk {i+1} summary]\n{s}")

    chunk_summaries_text = "\n\n".join(per_chunk_summaries)
    (args.out_dir / "chunk_summaries.txt").write_text(chunk_summaries_text, encoding="utf-8")
    print(f"Wrote {args.out_dir / 'chunk_summaries.txt'}")

    # final overall summary from per-chunk summaries
    print("Creating final overall summary...")
    final_instruction = (
        "Using the chunk summaries below, produce a single cohesive meeting-style summary with "
        "sections: Overview, Key Topics, Decisions, Action Items, Notable Quotes, Open Questions. "
        "Do not invent details not present in the summaries."
    )
    final_summary = chat_summarize(chunk_summaries_text, args.summary_model, system_prompt, final_instruction)
    (args.out_dir / "final_summary.txt").write_text(final_summary, encoding="utf-8")
    print(f"Wrote {args.out_dir / 'final_summary.txt'}")

    # clean up temp chunks unless requested to keep
    if args.keep_chunks:
        keep_dir = args.out_dir / "chunks_audio"
        keep_dir.mkdir(exist_ok=True)
        for p in chunk_paths:
            p.replace(keep_dir / p.name)
        print(f"Kept chunks in {keep_dir}")
    else:
        temp_dir_ctx.cleanup()


if __name__ == "__main__":
    main()