In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
# from sklearn.svm import SVC  # 选择SVM作为分类器，你也可以选择其他算法
# import librosa
import os
from tqdm import tqdm

# Function to load data from a folder
def load_data(folder):
    data = []
    files = os.listdir(folder)
    for file in files:
        if file.endswith('.npy'):
            filepath = os.path.join(folder, file)
            mel_spec = np.load(filepath)
            data.append(mel_spec)
    return data

In [2]:
X_train0 = load_data('train_data/language_0')
X_train1 = load_data('train_data/language_1')
X_test = load_data('test_data')

In [5]:
# find the max length of the data
max_length = 0
for i in range(len(X_train0)):
    if X_train0[i].shape[0] > max_length:
        max_length = X_train0[i].shape[0]
for i in range(len(X_train1)):
    if X_train1[i].shape[0] > max_length:
        max_length = X_train1[i].shape[0]
for i in range(len(X_test)):
    if X_test[i].shape[0] > max_length:
        max_length = X_test[i].shape[0]

In [7]:
# fill the data with zeros
for i in range(len(X_train0)):
    if X_train0[i].shape[0] < max_length:
        X_train0[i] = np.pad(X_train0[i], ((0, max_length - X_train0[i].shape[0]), (0, 0)), 'constant')
for i in range(len(X_train1)):
    if X_train1[i].shape[0] < max_length:
        X_train1[i] = np.pad(X_train1[i], ((0, max_length - X_train1[i].shape[0]), (0, 0)), 'constant')
for i in range(len(X_test)):
    if X_test[i].shape[0] < max_length:
        X_test[i] = np.pad(X_test[i], ((0, max_length - X_test[i].shape[0]), (0, 0)), 'constant')

In [8]:
X_train0 = np.array(X_train0)
X_train1 = np.array(X_train1)
X_test = np.array(X_test)

In [89]:
# concatenate the data
X_train = np.concatenate((X_train0, X_train1), axis=0)

In [90]:
# generate the labels
y_train0 = np.zeros(len(X_train0))
y_train1 = np.ones(len(X_train1))
y_train = np.concatenate((y_train0, y_train1), axis=0)

In [95]:
import torch
from torch import nn
import torch.nn.functional as F
from sklearn.model_selection import train_test_split

# Assuming X_train and y_train are your data
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Convert to tensors
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Convert to tensors
X_train = torch.tensor(X_train).to(device)
y_train = torch.tensor(y_train).long().to(device)
X_val = torch.tensor(X_val).to(device)
y_val = torch.tensor(y_val).long().to(device)

# Add a dimension for the channel
X_train = X_train.unsqueeze(1)
X_val = X_val.unsqueeze(1)

In [96]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        # Convolutional layers
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        # Max pooling layer
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        # Fully connected layers
        self.fc1 = nn.Linear(32 * 191 * 20, 256)
        self.fc2 = nn.Linear(256, 2)  # Assuming 10 classes for classification
    
    def forward(self, x):
        # Convolutional layers with ReLU activation and max pooling
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        # Flatten the tensor for fully connected layers
        x = x.view(-1, 32 * 191 * 20)
        # Fully connected layers with ReLU activation
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [98]:
from torch.utils.data import TensorDataset, DataLoader

# Check if CUDA (GPU) is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize the model and move it to the device
model = CNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Create a DataLoader
batch_size = 64
train_data = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)

# Train the model
num_epochs = 10
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Print loss for every epoch
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item()}')

# Evaluate the model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    outputs = model(X_val)
    _, predicted = torch.max(outputs.data, 1)
    total += y_val.size(0)
    correct += (predicted == y_val).sum().item()

print(f'Accuracy of the model on the validation data: {100 * correct / total}%')

Epoch 1/10, Loss: 0.6649547219276428
Epoch 2/10, Loss: 0.5728296041488647
Epoch 3/10, Loss: 0.28349724411964417
Epoch 4/10, Loss: 0.15444275736808777
Epoch 5/10, Loss: 0.22692586481571198
Epoch 6/10, Loss: 0.15337534248828888
Epoch 7/10, Loss: 0.041806600987911224
Epoch 8/10, Loss: 0.030931225046515465
Epoch 9/10, Loss: 0.018156446516513824
Epoch 10/10, Loss: 0.0368022695183754
Accuracy of the model on the validation data: 92.65625%


In [105]:
# free the memory of the GPU
torch.cuda.empty_cache()

In [107]:
# test the model
X_test = torch.tensor(X_test).to(device)

# Add a dimension for the channel
X_test = X_test.unsqueeze(1)

model.eval()
with torch.no_grad():
    outputs = model(X_test)
    _, predicted = torch.max(outputs.data, 1)    

In [109]:
predicted.shape

torch.Size([2000])

In [110]:
# write the result to the second column of test.csv, starting from the second row
import csv
with open('test.csv', 'r') as f:
    reader = csv.reader(f)
    rows = [row for row in reader]
    for i in range(len(predicted)):
        rows[i+1][1] = predicted[i].item()
with open('test.csv', 'w') as f:
    writer = csv.writer(f)
    writer.writerows(rows)