# MFCC LFCC 추출기


In [1]:
import os
import torch
import torchaudio
import numpy as np
from tqdm import tqdm

# 기본 경로 설정
base_dir = r"C:\Users\tjdwn\Downloads\archive"
save_dir = "audio_features"

# 저장 디렉토리 생성
os.makedirs(save_dir, exist_ok=True)

# 멜스펙트로그램 변환기 설정
mel_transform = torchaudio.transforms.MelSpectrogram(
    sample_rate=32000,  # 기본 샘플레이트
    n_mels=64,         # 멜 필터뱅크 수
    n_fft=2048,
    hop_length=1024
)

# LFCC 변환기 설정 - n_lfcc를 64로 수정
lfcc_transform = torchaudio.transforms.LFCC(
    sample_rate=32000,
    n_lfcc=64,         # LFCC 계수 수를 64로 변경
    n_filter=128,      # 필터 수 증가
    speckwargs={"n_fft": 2048, "hop_length": 1024}
)

# Delta와 Delta-Delta 계산기 설정
compute_deltas = torchaudio.transforms.ComputeDeltas(win_length=9)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
mel_transform = mel_transform.to(device)
lfcc_transform = lfcc_transform.to(device)
compute_deltas = compute_deltas.to(device)

def extract_features(audio_path):
    # 오디오 로드
    waveform, sr = torchaudio.load(audio_path)
    waveform = waveform.to(device)
    
    # 멜스펙트로그램 추출
    mel_spec = mel_transform(waveform)
    mel_spec_db = torchaudio.transforms.AmplitudeToDB()(mel_spec)
    
    # Delta와 Delta-Delta 특징 추출 (멜스펙트로그램)
    mel_deltas = compute_deltas(mel_spec_db)
    mel_delta_deltas = compute_deltas(mel_deltas)
    
    # LFCC 특징 추출
    lfcc = lfcc_transform(waveform)
    # LFCC에 대한 Delta와 Delta-Delta
    lfcc_deltas = compute_deltas(lfcc)
    lfcc_delta_deltas = compute_deltas(lfcc_deltas)
    
    # 모든 특징을 하나의 텐서로 결합
    features = {
        'melspec': mel_spec_db.cpu().numpy(),
        'melspec_delta': mel_deltas.cpu().numpy(),
        'melspec_delta_delta': mel_delta_deltas.cpu().numpy(),
        'lfcc': lfcc.cpu().numpy(),
        'lfcc_delta': lfcc_deltas.cpu().numpy(),
        'lfcc_delta_delta': lfcc_delta_deltas.cpu().numpy()
    }
    
    return features

# 데이터 처리
for folder_name in ['for-2seconds', 'for-rerecorded']:
    dataset_save_dir = os.path.join(save_dir, folder_name)
    os.makedirs(dataset_save_dir, exist_ok=True)
    
    for split in ['training', 'testing', 'validation']:
        split_save_dir = os.path.join(dataset_save_dir, split)
        os.makedirs(split_save_dir, exist_ok=True)
        
        for label in ['fake', 'real']:
            label_save_dir = os.path.join(split_save_dir, label)
            os.makedirs(label_save_dir, exist_ok=True)
            
            # 각 특징 타입에 대한 디렉토리 생성
            feature_types = ['melspec', 'melspec_delta', 'melspec_delta_delta', 
                           'lfcc', 'lfcc_delta', 'lfcc_delta_delta']
            for feature_type in feature_types:
                os.makedirs(os.path.join(label_save_dir, feature_type), exist_ok=True)
            
            current_dir = os.path.join(base_dir, folder_name, split, label)
            if not os.path.exists(current_dir):
                print(f"Directory not found: {current_dir}")
                continue
            
            # 해당 디렉토리의 모든 wav 파일 처리
            for filename in tqdm(os.listdir(current_dir), desc=f'Processing {folder_name}/{split}/{label}'):
                if filename.endswith('.wav'):
                    file_path = os.path.join(current_dir, filename)
                    
                    try:
                        # 특징 추출
                        features = extract_features(file_path)
                        
                        # 각 특징을 개별적으로 저장
                        for feature_type, feature_data in features.items():
                            save_path = os.path.join(label_save_dir, feature_type, 
                                                   filename.replace('.wav', '.npy'))
                            np.save(save_path, feature_data)
                            
                    except Exception as e:
                        print(f"Error processing {file_path}: {str(e)}")

print("Processing completed!")

# 샘플 데이터 shape 확인
def check_saved_features(save_dir):
    print("\nChecking saved features shapes...")
    for folder_name in ['for-2seconds', 'for-rerecorded']:
        split_dir = os.path.join(save_dir, folder_name, 'training')
        if not os.path.exists(split_dir):
            continue
            
        label_dir = os.path.join(split_dir, 'fake')  # 임의의 레이블 선택
        if not os.path.exists(label_dir):
            continue
            
        for feature_type in ['melspec', 'melspec_delta', 'melspec_delta_delta', 
                           'lfcc', 'lfcc_delta', 'lfcc_delta_delta']:
            feature_dir = os.path.join(label_dir, feature_type)
            if not os.path.exists(feature_dir):
                continue
                
            # 첫 번째 파일 확인
            first_file = next((f for f in os.listdir(feature_dir) if f.endswith('.npy')), None)
            if first_file:
                feature_path = os.path.join(feature_dir, first_file)
                feature_data = np.load(feature_path)
                print(f"{feature_type}: {feature_data.shape}")

check_saved_features(save_dir)

Processing for-2seconds/training/fake: 100%|██████████| 6978/6978 [00:53<00:00, 131.19it/s]
Processing for-2seconds/training/real: 100%|██████████| 6978/6978 [00:52<00:00, 132.51it/s]
Processing for-2seconds/testing/fake: 100%|██████████| 544/544 [00:04<00:00, 130.33it/s]
Processing for-2seconds/testing/real: 100%|██████████| 544/544 [00:04<00:00, 126.74it/s]
Processing for-2seconds/validation/fake: 100%|██████████| 1413/1413 [00:10<00:00, 130.41it/s]
Processing for-2seconds/validation/real: 100%|██████████| 1413/1413 [00:10<00:00, 130.29it/s]
Processing for-rerecorded/training/fake: 100%|██████████| 5104/5104 [00:41<00:00, 124.39it/s]
Processing for-rerecorded/training/real: 100%|██████████| 5104/5104 [00:40<00:00, 126.73it/s]
Processing for-rerecorded/testing/fake: 100%|██████████| 408/408 [00:03<00:00, 125.08it/s]
Processing for-rerecorded/testing/real: 100%|██████████| 408/408 [00:03<00:00, 128.74it/s]
Processing for-rerecorded/validation/fake: 100%|██████████| 1143/1143 [00:09<00:

Processing completed!

Checking saved features shapes...
melspec: (1, 64, 32)
melspec_delta: (1, 64, 32)
melspec_delta_delta: (1, 64, 32)
lfcc: (1, 64, 32)
lfcc_delta: (1, 64, 32)
lfcc_delta_delta: (1, 64, 32)
melspec: (1, 64, 32)
melspec_delta: (1, 64, 32)
melspec_delta_delta: (1, 64, 32)
lfcc: (1, 64, 32)
lfcc_delta: (1, 64, 32)
lfcc_delta_delta: (1, 64, 32)


# Melspectrogram 등 주파수 추출기

In [2]:
import os
import torch
import torchaudio
import numpy as np
from tqdm import tqdm
import torch.nn.functional as F

# 기본 설정
TARGET_SR = 16000  # 논문에서 사용한 샘플링 레이트
TARGET_LENGTH = 64600  # 4초 * 16000Hz = 64000 (약간의 여유 추가)
TARGET_SIZE = (64, 64)  # 목표 특징 크기

class RawboostAugment:
    def __init__(self, max_db=10, device='cuda'):
        self.max_db = max_db
        self.device = device
    
    def __call__(self, waveform):
        # 1. Random gain
        gain_db = torch.empty(1, device=self.device).uniform_(-self.max_db, self.max_db)
        gain_factor = 10 ** (gain_db / 20)
        waveform = waveform * gain_factor
        
        # 2. Add noise
        noise = torch.randn_like(waveform, device=self.device) * 0.001
        waveform = waveform + noise
        
        # 3. Clip to [-1, 1]
        waveform = torch.clamp(waveform, -1, 1)
        
        return waveform

def fix_length(waveform, target_length):
    """Fix audio length to target length"""
    curr_length = waveform.size(-1)
    
    if curr_length > target_length:
        # Truncate
        waveform = waveform[..., :target_length]
    elif curr_length < target_length:
        # Repeat and truncate
        num_repeats = (target_length + curr_length - 1) // curr_length
        waveform = waveform.repeat(1, num_repeats)
        waveform = waveform[..., :target_length]
    
    return waveform

# 특징 추출기 설정
class FeatureExtractor:
    def __init__(self, device='cuda'):
        self.device = device
        self.mel_transform = torchaudio.transforms.MelSpectrogram(
            sample_rate=TARGET_SR,
            n_mels=64,
            n_fft=2048,
            hop_length=512,
            power=2
        ).to(device)
        
        self.lfcc_transform = torchaudio.transforms.LFCC(
            sample_rate=TARGET_SR,
            n_lfcc=64,
            n_filter=128,
            speckwargs={"n_fft": 2048, "hop_length": 512}
        ).to(device)
        
        self.compute_deltas = torchaudio.transforms.ComputeDeltas().to(device)
        self.to_db = torchaudio.transforms.AmplitudeToDB().to(device)
        self.rawboost = RawboostAugment()
    
    def extract_features(self, waveform, is_training=False):
        # 입력 웨이브폼을 지정된 디바이스로 이동
        waveform = waveform.to(self.device)
        
        # 학습 중일 경우 Rawboost 적용
        if is_training:
            waveform = self.rawboost(waveform.to(self.device))
        
        # Mel-spectrogram 특징 추출
        mel_spec = self.mel_transform(waveform)
        mel_spec_db = self.to_db(mel_spec)
        mel_deltas = self.compute_deltas(mel_spec_db)
        mel_delta_deltas = self.compute_deltas(mel_deltas)
        
        # LFCC 특징 추출
        lfcc = self.lfcc_transform(waveform)
        lfcc_deltas = self.compute_deltas(lfcc)
        lfcc_delta_deltas = self.compute_deltas(lfcc_deltas)
        
        # 특징 크기 조정
        features = {
            'melspec': self._resize_feature(mel_spec_db),
            'melspec_delta': self._resize_feature(mel_deltas),
            'melspec_delta_delta': self._resize_feature(mel_delta_deltas),
            'lfcc': self._resize_feature(lfcc),
            'lfcc_delta': self._resize_feature(lfcc_deltas),
            'lfcc_delta_delta': self._resize_feature(lfcc_delta_deltas)
        }
        
        # CPU로 이동하고 numpy로 변환하기 전에 모든 특징을 동일한 디바이스에 있도록 함
        return {k: v.detach().cpu().numpy() for k, v in features.items()}

    
    def _resize_feature(self, feature):
        # Interpolate to target size
        feature = feature.unsqueeze(0)  # Add batch dimension
        feature = F.interpolate(feature, size=TARGET_SIZE, mode='bilinear', align_corners=False)
        feature = feature.squeeze(0)  # Remove batch dimension
        return feature

def process_dataset(base_dir, save_dir, device='cuda'):
    feature_extractor = FeatureExtractor(device)
    
    for folder_name in ['for-2seconds', 'for-rerecorded']:
        dataset_save_dir = os.path.join(save_dir, folder_name)
        os.makedirs(dataset_save_dir, exist_ok=True)
        
        for split in ['training', 'testing', 'validation']:
            split_save_dir = os.path.join(dataset_save_dir, split)
            os.makedirs(split_save_dir, exist_ok=True)
            
            for label in ['fake', 'real']:
                label_save_dir = os.path.join(split_save_dir, label)
                os.makedirs(label_save_dir, exist_ok=True)
                
                # 각 특징 타입에 대한 디렉토리 생성
                for feature_type in ['melspec', 'melspec_delta', 'melspec_delta_delta', 
                                   'lfcc', 'lfcc_delta', 'lfcc_delta_delta']:
                    os.makedirs(os.path.join(label_save_dir, feature_type), exist_ok=True)
                
                current_dir = os.path.join(base_dir, folder_name, split, label)
                if not os.path.exists(current_dir):
                    print(f"Directory not found: {current_dir}")
                    continue
                
                # 파일 처리
                for filename in tqdm(os.listdir(current_dir), 
                                   desc=f'Processing {folder_name}/{split}/{label}'):
                    if filename.endswith('.wav'):
                        file_path = os.path.join(current_dir, filename)
                        
                        try:
                            # 오디오 로드 및 전처리
                            waveform, sr = torchaudio.load(file_path)
                            
                            # 리샘플링
                            if sr != TARGET_SR:
                                waveform = torchaudio.transforms.Resample(sr, TARGET_SR)(waveform)
                            
                            # 길이 조정
                            waveform = fix_length(waveform, TARGET_LENGTH)
                            
                            # 특징 추출
                            features = feature_extractor.extract_features(
                                waveform, 
                                is_training=(split == 'training')
                            )
                            
                            # 특징 저장
                            for feature_type, feature_data in features.items():
                                save_path = os.path.join(label_save_dir, feature_type, 
                                                       filename.replace('.wav', '.npy'))
                                np.save(save_path, feature_data)
                                
                        except Exception as e:
                            print(f"Error processing {file_path}: {str(e)}")

if __name__ == "__main__":
    base_dir = r"C:\Users\tjdwn\Downloads\archive"
    save_dir = "audio_features"
    
    # GPU 사용 가능 여부 확인
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"Using device: {device}")
    
    # 데이터셋 처리
    process_dataset(base_dir, save_dir, device)
    print("Processing completed!")
    
    # 결과 확인
    check_saved_features(save_dir)

Using device: cuda


Processing for-2seconds/training/fake: 100%|██████████| 6978/6978 [00:32<00:00, 217.43it/s]
Processing for-2seconds/training/real: 100%|██████████| 6978/6978 [00:57<00:00, 121.35it/s]
Processing for-2seconds/testing/fake: 100%|██████████| 544/544 [00:04<00:00, 123.01it/s]
Processing for-2seconds/testing/real: 100%|██████████| 544/544 [00:04<00:00, 109.53it/s]
Processing for-2seconds/validation/fake: 100%|██████████| 1413/1413 [00:12<00:00, 115.95it/s]
Processing for-2seconds/validation/real: 100%|██████████| 1413/1413 [00:11<00:00, 120.29it/s]
Processing for-rerecorded/training/fake: 100%|██████████| 5104/5104 [00:41<00:00, 121.55it/s]
Processing for-rerecorded/training/real: 100%|██████████| 5104/5104 [00:43<00:00, 118.05it/s]
Processing for-rerecorded/testing/fake: 100%|██████████| 408/408 [00:03<00:00, 113.60it/s]
Processing for-rerecorded/testing/real: 100%|██████████| 408/408 [00:03<00:00, 116.95it/s]
Processing for-rerecorded/validation/fake: 100%|██████████| 1143/1143 [00:09<00:

Processing completed!





NameError: name 'check_saved_features' is not defined