In [7]:
from pydub import AudioSegment
import os

def split_audio(mp3_file, timestamps, output_dir=None):
    """
    Split an MP3 file based on timestamps.
    
    Parameters:
    mp3_file (str): Path to input MP3 file
    timestamps (list): List of tuples containing (start_time, end_time) in seconds
    output_dir (str): Optional directory to save split files. If None, uses same directory as input file
    """
    # Get the directory and filename without extension
    base_dir = output_dir if output_dir else os.path.dirname(os.path.abspath(mp3_file))
    base_name = os.path.splitext(os.path.basename(mp3_file))[0]
    
    # Create output directory if it doesn't exist
    if not os.path.exists(base_dir):
        os.makedirs(base_dir)
    
    # Load the audio file
    try:
        audio = AudioSegment.from_file(mp3_file)
    except Exception as e:
        print(f"Error loading audio file: {e}")
        return
    
    # Process each timestamp and create segments
    for i, (start, end) in enumerate(timestamps):
        try:
            # Convert timestamps to milliseconds
            start_ms = int(start * 1000)
            end_ms = int(end * 1000)
            
            # Extract the segment
            segment = audio[start_ms:end_ms]
            
            # Generate output filename with format: originalname-start-end.mp3
            output_filename = f"{base_name}-{start}-{end}.mp3"
            output_path = os.path.join(base_dir, output_filename)
            
            # Export the segment
            segment.export(output_path, format="mp3")
            print(f"Created segment: {output_path}")
            
        except Exception as e:
            print(f"Error processing segment {i+1}: {e}")


In [25]:

# timestamps = [
#     (215.12, 225.24),
#     (225.24, 236.48),
#     (236.48, 244.20),
#     (244.20, 251.80),
#     (251.80, 257.00)
# ]

# timestamps = [
#     (215.12, 226.84),
#     (226.84, 236.48),
#     (236.48, 244.20),
#     (244.20, 251.80),
#     (251.80, 257.00)
# ]

# timestamps = [
#     (215.12, 220.12),
#     (220.12, 226.84),
#     (226.84, 233.36),
#     (233.36, 236.48),
#     (236.48, 244.20),
#     (244.20, 247.76),
#     (247.76, 251.80),
#     (251.80, 257.00)
# ]

# timestamps = [
#     (215.12, 217.28),
#     (217.28, 225.24),
#     (225.24, 231.64),
#     (231.64, 236.48),
#     (236.48, 244.20),
#     (244.20, 251.80),
#     (251.80, 257.00)
# ]

timestamps = [
    (215.12, 225.24),
    (225.24, 236.48),
    (236.48, 244.20),
    (244.20, 251.80),
    (251.80, 257.00)
]

split_audio("./downloads/_vecpj_CRLw.mp3", timestamps)

Created segment: /Users/xiangyukong/repos/audio-temp/downloads/_vecpj_CRLw-215.12-225.24.mp3
Created segment: /Users/xiangyukong/repos/audio-temp/downloads/_vecpj_CRLw-225.24-236.48.mp3
Created segment: /Users/xiangyukong/repos/audio-temp/downloads/_vecpj_CRLw-236.48-244.2.mp3
Created segment: /Users/xiangyukong/repos/audio-temp/downloads/_vecpj_CRLw-244.2-251.8.mp3
Created segment: /Users/xiangyukong/repos/audio-temp/downloads/_vecpj_CRLw-251.8-257.0.mp3


In [12]:
timestamps = [
    (539, 545),
    (545, 554),
    (554, 561),
    (561, 568),
    (568, 578),
    (578, 585),
    (585, 589)
]
split_audio("./downloads/IgiO_GmNbKU.mp3", timestamps)

Created segment: /Users/xiangyukong/repos/audio-temp/downloads/IgiO_GmNbKU-539-545.mp3
Created segment: /Users/xiangyukong/repos/audio-temp/downloads/IgiO_GmNbKU-545-554.mp3
Created segment: /Users/xiangyukong/repos/audio-temp/downloads/IgiO_GmNbKU-554-561.mp3
Created segment: /Users/xiangyukong/repos/audio-temp/downloads/IgiO_GmNbKU-561-568.mp3
Created segment: /Users/xiangyukong/repos/audio-temp/downloads/IgiO_GmNbKU-568-578.mp3
Created segment: /Users/xiangyukong/repos/audio-temp/downloads/IgiO_GmNbKU-578-585.mp3
Created segment: /Users/xiangyukong/repos/audio-temp/downloads/IgiO_GmNbKU-585-589.mp3


In [15]:
timestamps = [
    (64, 71),
    (71, 77),
    (77, 87),
    (87, 94),
    (94, 105),
    (105, 113),
    (113, 117),
    (117, 128),
    (128, 136),
    (136, 142)
]
split_audio("./downloads/NtJ1wPCUxCI.mp3", timestamps)

Created segment: /Users/xiangyukong/repos/audio-temp/downloads/NtJ1wPCUxCI-64-71.mp3
Created segment: /Users/xiangyukong/repos/audio-temp/downloads/NtJ1wPCUxCI-71-77.mp3
Created segment: /Users/xiangyukong/repos/audio-temp/downloads/NtJ1wPCUxCI-77-87.mp3
Created segment: /Users/xiangyukong/repos/audio-temp/downloads/NtJ1wPCUxCI-87-94.mp3
Created segment: /Users/xiangyukong/repos/audio-temp/downloads/NtJ1wPCUxCI-94-105.mp3
Created segment: /Users/xiangyukong/repos/audio-temp/downloads/NtJ1wPCUxCI-105-113.mp3
Created segment: /Users/xiangyukong/repos/audio-temp/downloads/NtJ1wPCUxCI-113-117.mp3
Created segment: /Users/xiangyukong/repos/audio-temp/downloads/NtJ1wPCUxCI-117-128.mp3
Created segment: /Users/xiangyukong/repos/audio-temp/downloads/NtJ1wPCUxCI-128-136.mp3
Created segment: /Users/xiangyukong/repos/audio-temp/downloads/NtJ1wPCUxCI-136-142.mp3


In [13]:
# get the transcript
video_id = "NtJ1wPCUxCI"
from youtube_transcript_api import YouTubeTranscriptApi
transcript = YouTubeTranscriptApi.get_transcript(video_id)

In [18]:
video_id = "_vecpj_CRLw"
from youtube_transcript_api import YouTubeTranscriptApi
transcript = YouTubeTranscriptApi.get_transcript(video_id)

In [19]:
print(transcript)

[{'text': 'all right so when it comes to money', 'start': 0.04, 'duration': 3.72}, {'text': 'there are a lot of things that feel like', 'start': 1.439, 'duration': 4.281}, {'text': "they're working against us we have an", 'start': 3.76, 'duration': 3.519}, {'text': 'exponential rise in the cost of living', 'start': 5.72, 'duration': 3.999}, {'text': "we're in a current job crisis gas is", 'start': 7.279, 'duration': 3.881}, {'text': 'more expensive groceries are more', 'start': 9.719, 'duration': 3.361}, {'text': "expensive and it feels like we're doing", 'start': 11.16, 'duration': 4.08}, {'text': 'more work for Less no doubt that there', 'start': 13.08, 'duration': 3.959}, {'text': 'are a lot of external pressures that', 'start': 15.24, 'duration': 3.64}, {'text': 'just make things feel Tighter and like', 'start': 17.039, 'duration': 3.521}, {'text': 'we have less and less money in our', 'start': 18.88, 'duration': 3.639}, {'text': "pocket that's definitely not lost on me", 'start': 

In [None]:
from openai import OpenAI
from typing import List, Dict
import json

def process_transcript(segments: List[Dict]) -> str:
    """Process transcript segments using OpenAI API."""
    
    client = OpenAI()
    
    # Combine segments into single text while preserving timing
    processed_text = ""
    current_time = 0
    
    for segment in segments:
        if '(Laughter)' not in segment['text']:
            # Clean text and add timing info
            clean_text = segment['text'].replace('\n', ' ')
            time_info = f"[{segment['start']:.1f}s] "
            processed_text += time_info + clean_text + " "
    
    system_prompt = """
    You are a professional translator and subtitle editor. Convert the provided text (with timestamps) into a JSON array of bilingual subtitles.
    Requirements:
    1. Use the provided timestamps [Xs] to create segments of 5-20 seconds
    2. Keep the context and meaning complete in each segment
    3. Provide natural and accurate Chinese translations
    4. Maintain conversational flow and speaker's style
    
    Output format:
    {
      "subtitles": [
        {
          "timestamp": "MM:SS",
          "en": "English text here",
          "cn": "Chinese translation here"
        }
      ]
    }
    
    Guidelines:
    - Break at natural pauses and complete thoughts
    - Keep subtitle length comfortable for reading
    - Translate for clarity and cultural context
    - Keep translations concise but accurate
    - Use the provided timestamps to guide segmentation
    """

    try:
        completion = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": processed_text}
            ],
            response_format={"type": "json_object"},
            temperature=0.3
        )
        
        return completion.choices[0].message.content

    except Exception as e:
        print(f"Error processing transcript: {e}")
        return None

In [None]:
result = process_transcript(segments)
if result:
    print(json.dumps(json.loads(result), indent=2, ensure_ascii=False))