In [6]:
import os
from pydub import AudioSegment

# Define the segment length in milliseconds (30 seconds)
SEGMENT_LENGTH_MS = 30 * 1000

# Define the output folder name
output_folder = "chopped_clips"

# Create the output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# Output counter for sequentially numbered files
output_index = 1

# Function to sort files numerically based on the number in the filename
def numeric_sort(filename):
    try:
        return int(os.path.splitext(filename)[0])
    except ValueError:
        return float('inf')

# Get a sorted list of .mp3 files (e.g., "3.mp3", "4.mp3", etc.)
audio_files = sorted([f for f in os.listdir('/Users/lixinyue/Desktop/my-music-web-app/public/audio') if f.endswith('.mp3')], key=numeric_sort)

for file in audio_files:
    print(f"Processing {file}...")
    # Load the audio file
    file = os.path.join('/Users/lixinyue/Desktop/my-music-web-app/public/audio', file)
    audio = AudioSegment.from_mp3(file)
    duration_ms = len(audio)
    
    # Skip files that are shorter than 30 seconds
    if duration_ms < SEGMENT_LENGTH_MS:
        print(f"  {file} is shorter than 30 seconds. Skipping.")
        continue
    print(duration_ms)
    # Calculate how many full 30-second segments exist in the file
    num_segments = duration_ms // SEGMENT_LENGTH_MS
    print(num_segments)
    # Export each full segment
    for i in range(int(num_segments)):
        start_ms = i * SEGMENT_LENGTH_MS
        end_ms = start_ms + SEGMENT_LENGTH_MS
        segment = audio[start_ms:end_ms]
        # Export only if the segment is exactly 30 seconds long
        if len(segment) == SEGMENT_LENGTH_MS:
            output_filename = os.path.join(output_folder, f"{output_index}.mp3")
            segment.export(output_filename, format="mp3")
            print(f"  Exported {output_filename}")
            output_index += 1

print(f"Finished processing. Generated {output_index - 1} segments in folder '{output_folder}'.")


Processing 1.mp3...
275963
9
  Exported chopped_clips/1.mp3
  Exported chopped_clips/2.mp3
  Exported chopped_clips/3.mp3
  Exported chopped_clips/4.mp3
  Exported chopped_clips/5.mp3
  Exported chopped_clips/6.mp3
  Exported chopped_clips/7.mp3
  Exported chopped_clips/8.mp3
  Exported chopped_clips/9.mp3
Processing 2.mp3...
139744
4
  Exported chopped_clips/10.mp3
  Exported chopped_clips/11.mp3
  Exported chopped_clips/12.mp3
  Exported chopped_clips/13.mp3
Processing 3.mp3...
260042
8
  Exported chopped_clips/14.mp3
  Exported chopped_clips/15.mp3
  Exported chopped_clips/16.mp3
  Exported chopped_clips/17.mp3
  Exported chopped_clips/18.mp3
  Exported chopped_clips/19.mp3
  Exported chopped_clips/20.mp3
  Exported chopped_clips/21.mp3
Processing 4.mp3...
169500
5
  Exported chopped_clips/22.mp3
  Exported chopped_clips/23.mp3
  Exported chopped_clips/24.mp3
  Exported chopped_clips/25.mp3
  Exported chopped_clips/26.mp3
Processing 5.mp3...
198831
6
  Exported chopped_clips/27.mp3


In [1]:
!pip install yt-dlp

[33mDEPRECATION: Loading egg at /Users/lixinyue/.local/lib/python3.11/site-packages/tokenizers-0.13.4rc3-py3.11-macosx-11.1-arm64.egg is deprecated. pip 23.3 will enforce this behaviour change. A possible replacement is to use pip for package installation..[0m[33m
[0m[33mDEPRECATION: Loading egg at /Users/lixinyue/anaconda3/lib/python3.11/site-packages/huggingface_hub-0.19.4-py3.8.egg is deprecated. pip 23.3 will enforce this behaviour change. A possible replacement is to use pip for package installation..[0m[33m


In [2]:
import yt_dlp

# YouTube video URL
video_url = "https://www.youtube.com/watch?v=FKHL7ldZAzU"

# Set options to download as MP4
ydl_opts = {
    'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]',  # Ensures MP4 format
    'merge_output_format': 'mp4',  # Ensure final output is MP4
    'outtmpl': '%(title)s.mp4',  # Saves as video title
}

# Download the video
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
    ydl.download([video_url])

print("Download complete! The video is saved as an MP4 file.")


[youtube] Extracting URL: https://www.youtube.com/watch?v=FKHL7ldZAzU
[youtube] FKHL7ldZAzU: Downloading webpage
[youtube] FKHL7ldZAzU: Downloading tv client config
[youtube] FKHL7ldZAzU: Downloading player 9c6dfc4a
[youtube] FKHL7ldZAzU: Downloading tv player API JSON
[youtube] FKHL7ldZAzU: Downloading ios player API JSON
[youtube] FKHL7ldZAzU: Downloading m3u8 information
[info] FKHL7ldZAzU: Downloading 1 format(s): 135+140
[download] Destination: Leonard Bernstein conducts Haydn Symphony No. 88 with his face (excerpt).f135.mp4
[download] 100% of    1.66MiB in 00:00:00 at 5.78MiB/s   
[download] Destination: Leonard Bernstein conducts Haydn Symphony No. 88 with his face (excerpt).f140.m4a
[download] 100% of  696.60KiB in 00:00:01 at 392.43KiB/s 
[Merger] Merging formats into "Leonard Bernstein conducts Haydn Symphony No. 88 with his face (excerpt).mp4"
Deleting original file Leonard Bernstein conducts Haydn Symphony No. 88 with his face (excerpt).f135.mp4 (pass -k to keep)
Deleting o

In [6]:
import os
import json

def rename_mp3_files(folder_path):
    # Ensure the folder path exists
    if not os.path.isdir(folder_path):
        print(f"Error: The folder '{folder_path}' does not exist.")
        return

    # Get all .mp3 files in the folder and sort them
    mp3_files = sorted([f for f in os.listdir(folder_path) if f.endswith('.mp3')])
    
    file_mapping = {}  # Dictionary to store original-to-new filename mapping

    # Rename files sequentially
    for index, original_filename in enumerate(mp3_files, start=1):
        original_path = os.path.join(folder_path, original_filename)
        new_filename = f"{index}.mp3"
        new_path = os.path.join(folder_path, new_filename)
        
        # Rename the file
        os.rename(original_path, new_path)
        
        # Store mapping
        file_mapping[original_filename] = new_filename

    # Save mapping to a JSON file
    json_path = os.path.join(folder_path, "file_mapping.json")
    with open(json_path, "w", encoding="utf-8") as json_file:
        json.dump(file_mapping, json_file, indent=4, ensure_ascii=False)

    print(f"Renamed {len(mp3_files)} files and saved mapping to '{json_path}'.")

# Example usage
folder_path = "/Users/lixinyue/Desktop/my-music-web-app/epidemicsound_dataset"  # Change this to your folder path
rename_mp3_files(folder_path)


Renamed 100 files and saved mapping to '/Users/lixinyue/Desktop/my-music-web-app/epidemicsound_dataset/file_mapping.json'.
