In [1]:
import os
import librosa
import numpy as np
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")


In [2]:
# 配置参数
input_dir = "Bird_Song/all_data" 
output_dir = "Bird_Song/Mel" 
target_sr = 22050  # 目标采样率
duration = 2.0  # 音频时长（秒）
samples_per_file = int(target_sr * duration)  # 每个文件所需采样数

# Mel谱图参数
n_fft = 2048
hop_length = 512
n_mels = 128

In [3]:
os.makedirs(output_dir, exist_ok=True)
def process_audio(file_path):
    """处理单个音频文件并生成Mel谱图"""
    try:
        # 加载音频（自动重采样到target_sr）
        y, _ = librosa.load(file_path, 
                           sr=target_sr, 
                           mono=True, 
                           duration=duration)
        
        # 如果音频过短，用静音填充
        if len(y) < samples_per_file:
            y = np.pad(y, (0, max(0, samples_per_file - len(y))), mode='constant')
        
        # 计算Mel谱图
        mel = librosa.feature.melspectrogram(y=y, 
                                            sr=target_sr,
                                            n_fft=n_fft,
                                            hop_length=hop_length,
                                            n_mels=n_mels)
        
        # 转换为对数刻度 (dB)
        log_mel = librosa.power_to_db(mel, ref=np.max)
        
        # 归一化到[-1, 1]范围
        log_mel = (log_mel - log_mel.min()) / (log_mel.max() - log_mel.min()) * 2 - 1
        
        return log_mel.astype(np.float32)
    
    except Exception as e:
        print(f"处理 {os.path.basename(file_path)} 时出错: {str(e)}")
        return None


In [4]:
# 遍历所有MP3文件
for filename in tqdm(os.listdir(input_dir)):
    if filename.lower().endswith(".mp3"):
        file_path = os.path.join(input_dir, filename)
        
        # 处理音频
        mel_spec = process_audio(file_path)
        if mel_spec is None:
            continue
        
        # 保存为NumPy数组
        output_path = os.path.join(output_dir, 
                                 f"{os.path.splitext(filename)[0]}.npy")
        np.save(output_path, mel_spec)

print(f"处理完成! 已保存到 {output_dir}")

100%|██████████| 3147/3147 [00:18<00:00, 172.18it/s]

处理完成! 已保存到 Bird_Song/Mel



