In [14]:
import requests
import time

# First request to create the diarization job
url = "https://api.pyannote.ai/v1/diarize"
payload = {
    "url": "https://drive.google.com/uc?export=download&id=1KSmKA3iIJBtaEECaJPb9eNMOHpvTt4U-",
    # "numSpeakers": 6,
    "confidence": True
}
headers = {
    "Authorization": "Bearer sk_fa265ace19b4464cb995da7fc1b74eab",
    "Content-Type": "application/json"
}

response = requests.request("POST", url, json=payload, headers=headers)
job_id = response.json()["jobId"]

# Poll for results using the correct endpoint format: /v1/jobs/{jobId}
status_url = f"https://api.pyannote.ai/v1/jobs/{job_id}"
while True:
    status_response = requests.get(status_url, headers={
        "Authorization": "Bearer sk_fa265ace19b4464cb995da7fc1b74eab"
    })
    status_data = status_response.json()
    
    if status_data["status"] == "succeeded":  # Changed from "completed" to "succeeded"
        print("Diarization completed!")
        print("Results:", status_data.get("output", {}))  # Changed to get "output" field
        break
    elif status_data["status"] == "failed":
        print("Diarization failed:", status_data.get("error"))
        break
    else:
        print(f"Status: {status_data['status']} ... waiting 5 seconds")
        time.sleep(5)

Status: created ... waiting 5 seconds
Diarization completed!
Results: {'diarization': [{'speaker': 'SPEAKER_04', 'start': 0.405, 'end': 0.845}, {'speaker': 'SPEAKER_04', 'start': 0.925, 'end': 2.865}, {'speaker': 'SPEAKER_02', 'start': 3.085, 'end': 3.445}, {'speaker': 'SPEAKER_02', 'start': 3.885, 'end': 4.685}, {'speaker': 'SPEAKER_02', 'start': 4.865, 'end': 6.385}, {'speaker': 'SPEAKER_03', 'start': 6.385, 'end': 7.845}, {'speaker': 'SPEAKER_04', 'start': 6.385, 'end': 9.325}, {'speaker': 'SPEAKER_00', 'start': 9.645, 'end': 12.365}, {'speaker': 'SPEAKER_00', 'start': 13.065, 'end': 17.725}, {'speaker': 'SPEAKER_01', 'start': 17.165, 'end': 18.245}, {'speaker': 'SPEAKER_00', 'start': 17.945, 'end': 18.705}, {'speaker': 'SPEAKER_02', 'start': 18.245, 'end': 18.265}, {'speaker': 'SPEAKER_01', 'start': 18.265, 'end': 18.285}, {'speaker': 'SPEAKER_02', 'start': 18.285, 'end': 18.405}, {'speaker': 'SPEAKER_03', 'start': 18.405, 'end': 18.565}, {'speaker': 'SPEAKER_03', 'start': 18.945, 

In [15]:
import subprocess
from collections import defaultdict

# Group segments by speaker
speaker_segments = defaultdict(list)
for segment in status_data['output']['diarization']:
    speaker_segments[segment['speaker']].append({
        'start': segment['start'],  # Already in seconds
        'end': segment['end']      # Already in seconds
    })

input_file = "output.mp4"  # Make sure this matches your input file name

for speaker, segments in speaker_segments.items():
    # Create the volume filter expression
    volume_expr = []
    for segment in segments:
        volume_expr.append(f"between(t,{segment['start']},{segment['end']})")

    # Combine all segments with OR operator (+)
    filter_expression = f"volume=enable='{'+'.join(volume_expr)}':volume=1,volume=enable='not({'+'.join(volume_expr)})':volume=0"

    output_file = f"speaker_{speaker}_output.mp4"

    cmd = [
        "ffmpeg", "-i", input_file,
        "-af", filter_expression,
        "-c:a", "aac",
        "-vn",  # Remove video stream
        output_file
    ]

    print(f"\nProcessing Speaker {speaker}...")
    print(f"Using filter: {filter_expression}")
    try:
        subprocess.run(cmd, check=True)
        print(f"Created file: {output_file}")
    except subprocess.CalledProcessError as e:
        print(f"Error processing Speaker {speaker}: {e}")

print("\nAll speaker files have been created!")


Processing Speaker SPEAKER_04...
Using filter: volume=enable='between(t,0.405,0.845)+between(t,0.925,2.865)+between(t,6.385,9.325)':volume=1,volume=enable='not(between(t,0.405,0.845)+between(t,0.925,2.865)+between(t,6.385,9.325))':volume=0


ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enab

Created file: speaker_SPEAKER_04_output.mp4

Processing Speaker SPEAKER_02...
Using filter: volume=enable='between(t,3.085,3.445)+between(t,3.885,4.685)+between(t,4.865,6.385)+between(t,18.245,18.265)+between(t,18.285,18.405)':volume=1,volume=enable='not(between(t,3.085,3.445)+between(t,3.885,4.685)+between(t,4.865,6.385)+between(t,18.245,18.265)+between(t,18.285,18.405))':volume=0


size=      72kB time=00:00:27.98 bitrate=  21.2kbits/s speed= 140x    
video:0kB audio:67kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 8.264185%
[aac @ 0x558d43ed4880] Qavg: 58856.445
ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberb

Created file: speaker_SPEAKER_02_output.mp4

Processing Speaker SPEAKER_03...
Using filter: volume=enable='between(t,6.385,7.845)+between(t,18.405,18.565)+between(t,18.945,19.025)+between(t,22.085,27.985)':volume=1,volume=enable='not(between(t,6.385,7.845)+between(t,18.405,18.565)+between(t,18.945,19.025)+between(t,22.085,27.985))':volume=0


size=     140kB time=00:00:27.98 bitrate=  41.1kbits/s speed= 126x    
video:0kB audio:135kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 4.089421%
[aac @ 0x5dde7311c880] Qavg: 47653.781
ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubber

Created file: speaker_SPEAKER_03_output.mp4

Processing Speaker SPEAKER_00...
Using filter: volume=enable='between(t,9.645,12.365)+between(t,13.065,17.725)+between(t,17.945,18.705)':volume=1,volume=enable='not(between(t,9.645,12.365)+between(t,13.065,17.725)+between(t,17.945,18.705))':volume=0


size=     148kB time=00:00:27.98 bitrate=  43.4kbits/s speed= 128x    
video:0kB audio:143kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 3.861778%
[aac @ 0x649cb2e2a880] Qavg: 46432.012
ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubber

Created file: speaker_SPEAKER_00_output.mp4

Processing Speaker SPEAKER_01...
Using filter: volume=enable='between(t,17.165,18.245)+between(t,18.265,18.285)+between(t,19.125,21.985)':volume=1,volume=enable='not(between(t,17.165,18.245)+between(t,18.265,18.285)+between(t,19.125,21.985))':volume=0
Created file: speaker_SPEAKER_01_output.mp4

All speaker files have been created!


size=      81kB time=00:00:27.98 bitrate=  23.6kbits/s speed= 145x    
video:0kB audio:75kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 7.353439%
[aac @ 0x5744f0bca880] Qavg: 56208.754
