In [18]:
!python run.py 


^C


In [23]:
%%writefile model.py

import torch
import torch.nn as nn
import torch.nn.functional as F
#x输入是四个参数
#[100,1,161,101]
# 100 batch_size,一次100个音频
# 1不清楚
# 161不清楚
# 101 frames帧数

class ConvNet(nn.Module):
    # CNN
    def __init__(self):
        super(ConvNet, self).__init__()
        self.l1 = nn.Linear(101, 1024)
        self.l2 = nn.Linear(1024, 512)
        self.l3 = nn.Linear(512, 64)
        self.l4 = nn.Linear(64, 6)

    def forward(self, x):
        x=self.l1(x)
        x = F.relu(x)
        x=self.l2(x)
        x = F.relu(x)
        x=self.l3(x)
        x = F.relu(x)
        x = self.l4(x)
        return x

        '''
        ###############################################################
        do the forward propagation here
        x: model input with shape:(batch_size, frame_num, feature_size)
        frame_num is how many frame one wav have
        feature_size is the dimension of the feature
        ###############################################################
        '''

class FcNet(nn.Module):
    # DNN
    def __init__(self):
        super(FcNet, self).__init__()
        self.l1 = torch.nn.Linear(784, 512)
        self.l2 = torch.nn.Linear(512, 256)
        self.l3 = torch.nn.Linear(256, 128)
        self.l4 = torch.nn.Linear(128, 64)
        self.l5 = torch.nn.Linear(64, 6)
    def forward(self, x):
        x = x.view(-1, 784)
        x = F.relu(self.l1(x))
        x = F.relu(self.l2(x))
        x = F.relu(self.l3(x))
        x = F.relu(self.l4(x))
        return self.l5(x)
        '''
        ###############################################################
        do the forward propagation here
        x: model input with shape:(batch_size, frame_num, feature_size)
        frame_num is how many frame one wav have
        feature_size is the dimension of the feature
        ###############################################################
        '''

        
# 建立VGG卷积神经的模型层
def _make_layers(cfg):
    layers = []
    in_channels = 1
    for x in cfg:
        if x == 'M':  # maxpool 池化层
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:  # 卷积层
            layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
                       nn.BatchNorm2d(x),
                       nn.ReLU(inplace=True)]
            in_channels = x
    layers += [nn.AvgPool2d(kernel_size=1, stride=1)]  # avgPool 池化层
    return nn.Sequential(*layers)


# 各个VGG模型的参数
cfg = {
    'VGG':[32,'M',64,'M',128,'M',128,'M'],
    'VGG11': [32, 'M', 64, 'M', 128, 128, 'M', 256, 256, 'M', 256, 256, 'M'],
    'VGG13': [32, 32, 'M', 64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 256, 256, 'M'],
    'VGG16': [32, 32, 'M', 64, 64, 'M', 128, 128, 128, 'M', 256, 256, 256, 'M', 256, 256, 256, 'M'],
    'VGG19': [32, 32, 'M', 64, 64, 'M', 128, 128, 128, 128, 'M', 256, 256, 256, 256, 'M', 256, 256, 256, 256, 'M'],
}


# VGG卷积神经网络
class VGG(nn.Module):
    def __init__(self, vgg_name):
        super(VGG, self).__init__()
        self.features = _make_layers(cfg[vgg_name])  # VGG的模型层
        print(self.features)
        self.fc1 = nn.Linear(7680, 256)            # 7680,512
        self.fc2 = nn.Linear(256, 6)     # 输出类别5，由于全量比较大，这里只选择前5个类别

    def forward(self, x):
        out = self.features(x)
        out = out.view(out.size(0), -1)  # flatting
#         print(out.size())
        out = self.fc1(out)  # 线性层
        out = self.fc2(out)  # 线性层
        return F.log_softmax(out, dim=1)  # log_softmax 激活函数


Overwriting model.py


In [24]:
%%writefile train.py
from __future__ import print_function
import torch.nn.functional as F
from torch.autograd import Variable


def train(loader, model, optimizer, epoch, cuda, log_interval, verbose=True):
    '''
    #############################################################################
    train the model, you can write this function partly refer to the "test" below
    Args:
        loader: torch dataloader
        model: model to train
        optimizer: torch optimizer
        epoch: number of epochs to train
        cuda: whether to use gpu
        log_interval: how many batches to wait before logging training status
        verbose: whether to print training log(such as epoch and loss)
    Return:
        the average loss of this epoch
    #############################################################################
    '''
    

    model.train()      # 设置为trainning模式
    for batch_idx, (data, target) in enumerate(loader):
        if cuda:  # 如果要调用GPU模式，就把数据转存到GPU
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data), Variable(target)  # 把数据转换成Variable
        optimizer.zero_grad()  # 优化器梯度初始化为零
#         print(type(data))
#         print(data)
#         print(data.shape)
        output = model(data)   # 把数据输入网络并得到输出，即进行前向传播
#         print(output.shape)
#         print(output)
#         print(target.shape)
        loss = F.nll_loss(output, target)               # 计算损失函数  
        loss.backward()        # 反向传播梯度
        optimizer.step()       # 结束一次前传+反传之后，更新优化器参数
#         print(batch_idx)
#         print(log_interval)
#         print(epoch)
#         print(loss.data[1])
#         print(type(loss.data))
        if batch_idx % log_interval == 0:          # 准备打印相关信息，args.log_interval是最开头设置的好了的参数
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(loader.dataset),
                100. * batch_idx / len(loader), loss.item()))








def test(loader, model, cuda, verbose=True):
    model.eval()
    test_loss = 0
    correct = 0
    for data, target in loader:
        if cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data, volatile=True), Variable(target)
        output = model(data)
        test_loss += F.nll_loss(output, target, size_average=False).data.item()  # sum up batch loss
        pred = output.data.max(1, keepdim=True)[1]  # get the index of the max log-probability
        correct += pred.eq(target.data.view_as(pred)).cpu().sum()

    test_loss /= len(loader.dataset)
    if verbose:
        print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
            test_loss, correct, len(loader.dataset), 100. * correct / len(loader.dataset)))
    return test_loss


Overwriting train.py


In [25]:
%%writefile run.py
from __future__ import print_function
import argparse
import torch
import torch.optim as optim
from command_loader import CommandLoader
import numpy as np
from model import ConvNet, FcNet,VGG
from train import train, test
import os
# Training settings
def run():
    parser = argparse.ArgumentParser(description='ConvNets for Speech Commands Recognition')
    parser.add_argument('--train_path', default='dataset/train', help='path to the train data folder')
    parser.add_argument('--test_path', default='dataset/test', help='path to the test data folder')
    parser.add_argument('--valid_path', default='dataset/valid', help='path to the valid data folder')
    parser.add_argument('--batch_size', type=int, default=100, metavar='N', help='training and valid batch size')
    parser.add_argument('--test_batch_size', type=int, default=100, metavar='N', help='batch size for testing')
    parser.add_argument('--arc', default='VGG', help='network architecture: ConvNet,FcNet,VGG')
    parser.add_argument('--epochs', type=int, default=100, metavar='N', help='number of epochs to train')
    parser.add_argument('--lr', type=float, default=0.001, metavar='LR', help='learning rate')
    parser.add_argument('--momentum', type=float, default=0.9, metavar='M', help='SGD momentum, for SGD only')
    parser.add_argument('--optimizer', default='adam', help='optimization method: sgd | adam')
    parser.add_argument('--cuda', default=True, help='enable CUDA')
    parser.add_argument('--seed', type=int, default=1234, metavar='S', help='random seed')
    parser.add_argument('--log-interval', type=int, default=10, metavar='N',
                        help='how many batches to wait before logging training status')
    parser.add_argument('--patience', type=int, default=5, metavar='N',
                        help='how many epochs of no loss improvement should we wait before stop training')
    # feature extraction options
    parser.add_argument('--window_size', default=.02, help='window size for the stft')
    parser.add_argument('--window_stride', default=.01, help='window stride for the stft')
    parser.add_argument('--window_type', default='hamming', help='window type for the stft')


    args = parser.parse_args()

    args.cuda = args.cuda and torch.cuda.is_available()
    torch.manual_seed(args.seed)

    if args.cuda:
        torch.cuda.manual_seed(args.seed)

    # loading data
    train_dataset = CommandLoader(args.train_path, window_size=args.window_size, window_stride=args.window_stride,
                                window_type=args.window_type)
    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=args.batch_size, shuffle=True,
        num_workers=8, pin_memory=args.cuda, sampler=None)

    valid_dataset = CommandLoader(args.valid_path, window_size=args.window_size, window_stride=args.window_stride,
                                window_type=args.window_type)
    valid_loader = torch.utils.data.DataLoader(
        valid_dataset, batch_size=args.batch_size, shuffle=None,
        num_workers=8, pin_memory=args.cuda, sampler=None)

    test_dataset = CommandLoader(args.test_path, window_size=args.window_size, window_stride=args.window_stride,
                                window_type=args.window_type)
    test_loader = torch.utils.data.DataLoader(
        test_dataset, batch_size=args.test_batch_size, shuffle=None,
        num_workers=8, pin_memory=args.cuda, sampler=None)

    # build model
    if args.arc == 'ConvNet':
        model = ConvNet()
    elif args.arc == 'FcNet':
        model = FcNet()
    elif args.arc=='VGG':
        model = VGG('VGG')
    else:
        model='ConvNet'

    if args.cuda:
        print('Using CUDA with {0} GPUs'.format(torch.cuda.device_count()))
        model = torch.nn.DataParallel(model).cuda()

    # define optimizer
    if args.optimizer.lower() == 'adam':
        optimizer = optim.Adam(model.parameters(), lr=args.lr)
    elif args.optimizer.lower() == 'sgd':
        optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
    else:
        optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)

    best_valid_loss = np.inf
    iteration = 0
    epoch = 1

    print(model)
    # train with early stopping
    while (epoch < args.epochs + 1) and (iteration < args.patience):
        train(train_loader, model, optimizer, epoch, args.cuda, args.log_interval)
        valid_loss = test(valid_loader, model, args.cuda)
        if valid_loss > best_valid_loss:
            iteration += 1
            print('Loss was not improved, iteration {0}'.format(str(iteration)))
        else:
            print('Saving model...')
            iteration = 0
            best_valid_loss = valid_loss
            state = {
                'net': model.module if args.cuda else model,
                'acc': valid_loss,
                'epoch': epoch,
            }
            if not os.path.isdir('checkpoint'):
                os.mkdir('checkpoint')
            torch.save(state, './checkpoint/ckpt.t7')
        epoch += 1

    # test model
    test(test_loader, model, args.cuda)


if __name__ == '__main__':
    run()

Overwriting run.py


In [26]:
!python run.py

^C


NameError: name 'Tensor' is not defined