In [None]:
import os
import numpy as np
from tqdm import tqdm
from src.audio_processor import extract_audio_features
from src.mri_preprocessor import preprocess_mri

# Инициализация
audio_features = []
mri_scans = []
labels = []

# Обработка данных для каждого класса
for class_name in ['dementia', 'control']:
    # Аудио
    audio_dir = f"data/audio/{class_name}/"
    for file in tqdm(os.listdir(audio_dir), desc=f"Аудио {class_name}"):
        features = extract_audio_features(os.path.join(audio_dir, file))
        if features:
            audio_features.append([
                *features['mfcc'], 
                features['pitch'], 
                features['rms']
            ])
    
    # МРТ
    mri_dir = f"data/mri/{class_name}/"
    for file in tqdm(os.listdir(mri_dir), desc=f"МРТ {class_name}"):
        scan = preprocess_mri(os.path.join(mri_dir, file))
        if scan is not None:
            mri_scans.append(scan)
    
    # Метки: 1 для dementia, 0 для control
    n_samples = min(len(audio_features), len(mri_scans))  # Чтобы избежать дисбаланса
    labels.extend([1 if class_name == 'dementia' else 0] * n_samples)

# Сохранение
np.save("data/audio_features.npy", np.array(audio_features))
np.save("data/mri_scans.npy", np.array(mri_scans))
np.save("data/labels.npy", np.array(labels))