In [11]:
# Cell 1: Imports
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import matplotlib.pyplot as plt
from sklearn import metrics
import os
import pandas as pd

In [12]:
# Cell 2: Load model architecture
class Residual(nn.Module):
    def __init__(self, fn):
        super().__init__()
        self.fn = fn

    def forward(self, x):
        return self.fn(x) + x

def ConvMixer(dim, depth, kernel_size=9, patch_size=7, n_classes=12):
    return nn.Sequential(
        nn.Conv2d(1, dim, kernel_size=patch_size, stride=patch_size),
        nn.GELU(),
        nn.BatchNorm2d(dim),
        *[nn.Sequential(
            Residual(nn.Sequential(
                nn.Conv2d(dim, dim, kernel_size=kernel_size, groups=dim, padding="same"),
                nn.GELU(),
                nn.BatchNorm2d(dim)
            )),
            nn.Conv2d(dim, dim, kernel_size=1),
            nn.GELU(),
            nn.BatchNorm2d(dim)
        ) for i in range(depth)],
        nn.AdaptiveAvgPool2d((1, 1)),
        nn.Flatten(),
        nn.Linear(dim, n_classes)
    )

In [None]:
# Cell 3: Load and prepare model
# Khởi tạo model
model = ConvMixer(dim=256, depth=8, n_classes=12)

# Load weights
model.load_state_dict(torch.load('../data/models/audio_classifier_best.pth'))

# Chuyển model lên device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
model.eval()

print(f"Model loaded successfully!")
print(f"Using device: {device}")

In [14]:
# Cell 4: Define prediction function
def predict_single_file(model, file_path, transform=None):
    """Dự đoán cho một file âm thanh"""
    # Load và preprocess file
    features = np.load(file_path)
    if len(features.shape) == 2:
        features = np.expand_dims(features, axis=0)
    
    if transform:
        features = transform(features)
    
    # Chuyển đổi thành tensor và thêm batch dimension
    features = torch.FloatTensor(features)
    if features.dim() == 3:
        features = features.unsqueeze(0)  # Thêm batch dimension nếu cần
    features = features.to(device)
    
    # Dự đoán
    with torch.no_grad():
        outputs = model(features)
        probabilities = torch.softmax(outputs, dim=1)
        predicted_class = torch.argmax(outputs, dim=1).item()
        confidence = probabilities[0][predicted_class].item()
    
    return predicted_class, confidence, probabilities[0]

In [None]:
# Cell 5: Test with a single file
# Định nghĩa classes
classes = ['bat_den', 'bat_dieu_hoa', 'bat_quat', 'bat_tv',
           'do_am', 'dong_rem', 'mo_rem', 'nhiet_do',
           'tat_den', 'tat_dieu_hoa', 'tat_quat', 'tat_tv']

# Test với một file cụ thể
test_file_path = '../data/features/mel/bat_tv/bat_tv_speaker01_019.npy'
predicted_class, confidence, all_probabilities = predict_single_file(model, test_file_path)

print(f"Predicted class: {classes[predicted_class]}")
print(f"Confidence: {confidence:.2%}")

# Hiển thị top 3 predictions
top3_prob, top3_indices = torch.topk(all_probabilities, 3)
print("\nTop 3 predictions:")
for prob, idx in zip(top3_prob, top3_indices):
    print(f"{classes[idx]}: {prob:.2%}")