# aifood baseline 

### 本baseline采用pytorch框架，应用ModelArts的Notebook进行开发

### 数据集获取
将您OBS桶中的数据文件加载到此notebook中，将如下代码中"obs-aifood-baseline"修改成您OBS桶名称。

In [1]:
import moxing as mox
mox.file.copy_parallel('s3://ai-awe-n4/aifood','./aifood/')
print("done")

INFO:root:Using MoXing-v1.14.0-14d5d81b
INFO:root:Using OBS-Python-SDK-3.1.2
INFO:root:Listing OBS: 1000
INFO:root:Listing OBS: 2000
INFO:root:Listing OBS: 3000
INFO:root:Listing OBS: 4000
INFO:root:Listing OBS: 5000
INFO:root:pid: None.	1000/5001
INFO:root:pid: None.	2000/5001
INFO:root:pid: None.	3000/5001
INFO:root:pid: None.	4000/5001
INFO:root:pid: None.	5000/5001


done


In [20]:
import moxing as mox
mox.file.copy('s3://ai-awe-n4/model_output1/model/vgg-resnet-50-torch.pth','./vgg-resnet-50-torch.pth')
print("done")

done


### 加载依赖

In [1]:

from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
import torchvision
from torchvision import datasets, models, transforms
import time
import os


### 加载数据集，并将其分为训练集和测试集

In [13]:
dataTrans = transforms.Compose([
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomVerticalFlip(p=0.5),
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.RandomRotation(20),
#     transforms.FiveCrop(100),
#     transforms.Lambda(lambda x: transforms.functional.rotate(x, 100)),
#     transforms.TenCrop((100, 224), vertical_flip=True),
#     transforms.RandomGrayscale(p=0.3),
#     transforms.ColorJitter(brightness=0.5, contrast=0.5, hue=0.5),
#     transforms.Pad(10, padding_mode="symmetric"),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
])
 
    # image data path
data_dir = './aifood/images'
all_image_datasets = datasets.ImageFolder(data_dir, dataTrans)
#print(all_image_datasets.class_to_idx)    
trainsize = int(0.7*len(all_image_datasets))
testsize = len(all_image_datasets) - trainsize
train_dataset, test_dataset = torch.utils.data.random_split(all_image_datasets,[trainsize,testsize])
   
image_datasets = {'train':train_dataset,'val':test_dataset}
    

    # wrap your data and label into Tensor

    
dataloders = {x: torch.utils.data.DataLoader(image_datasets[x],
                                                 batch_size=64,
                                                 shuffle=True,
                                                 num_workers=4) for x in ['train', 'val']}

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}

    # use gpu or not
use_gpu = torch.cuda.is_available()

In [17]:
def train_model(model, lossfunc, optimizer, scheduler, num_epochs=10):
    start_time = time.time()

    best_model_wts = model.state_dict()
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)
        
        if epoch % 10 == 0:
            # random dataset
            train_dataset, test_dataset = torch.utils.data.random_split(all_image_datasets,[trainsize,testsize])
            image_datasets = {'train':train_dataset,'val':test_dataset}
            # wrap your data and label into Tensor
            dataloders = {x: torch.utils.data.DataLoader(image_datasets[x],batch_size=64,shuffle=True,
                                                         num_workers=4) for x in ['train', 'val']}
            dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                scheduler.step()
                model.train(True)  # Set model to training mode
            else:
                model.train(False)  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0.0

            # Iterate over data.
            for data in dataloders[phase]:
                # get the inputs
                inputs, labels = data
                

                # wrap them in Variable
                if use_gpu:
                    inputs = Variable(inputs.cuda())
                    labels = Variable(labels.cuda())
                else:
                    inputs, labels = Variable(inputs), Variable(labels)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                outputs = model(inputs)
                _, preds = torch.max(outputs.data, 1)
                loss = lossfunc(outputs, labels)

                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

                # statistics
                running_loss += loss.data
                running_corrects += torch.sum(preds == labels.data).to(torch.float32)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = model.state_dict()

    elapsed_time = time.time() - start_time
    print('Training complete in {:.0f}m {:.0f}s'.format(
        elapsed_time // 60, elapsed_time % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
  
    return model

In [15]:
# get model and replace the original fc layer with your fc layer
model_ft = models.resnet50(pretrained=True)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, 10)

if use_gpu:
    model_ft = model_ft.cuda()

    # define loss function
lossfunc = nn.CrossEntropyLoss()

    # setting optimizer and trainable parameters
 #   params = model_ft.parameters()
 # list(model_ft.fc.parameters())+list(model_ft.layer4.parameters())
#params = list(model_ft.fc.parameters())+list( model_ft.parameters())
params = list(model_ft.fc.parameters())
optimizer_ft = optim.SGD(params, lr=0.001, momentum=0.9)

    # Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=5, gamma=0.1)

model_ft.load_state_dict(torch.load('params.pkl'))
model_ft = train_model(model=model_ft,
                       lossfunc=lossfunc,
                       optimizer=optimizer_ft,
                       scheduler=exp_lr_scheduler,
                       num_epochs=30)

Epoch 0/29
----------
train Loss: 0.0093 Acc: 0.8280
val Loss: 0.0095 Acc: 0.8300
Epoch 1/29
----------
train Loss: 0.0091 Acc: 0.8406
val Loss: 0.0094 Acc: 0.8320
Epoch 2/29
----------
train Loss: 0.0091 Acc: 0.8303
val Loss: 0.0089 Acc: 0.8353
Epoch 3/29
----------
train Loss: 0.0087 Acc: 0.8351
val Loss: 0.0096 Acc: 0.8313
Epoch 4/29
----------
train Loss: 0.0083 Acc: 0.8509
val Loss: 0.0091 Acc: 0.8333
Epoch 5/29
----------
train Loss: 0.0082 Acc: 0.8474
val Loss: 0.0091 Acc: 0.8327
Epoch 7/29
----------
train Loss: 0.0083 Acc: 0.8454
val Loss: 0.0091 Acc: 0.8393
Epoch 8/29
----------
train Loss: 0.0082 Acc: 0.8486
val Loss: 0.0089 Acc: 0.8347
Epoch 9/29
----------
train Loss: 0.0083 Acc: 0.8511
val Loss: 0.0091 Acc: 0.8380
Epoch 10/29
----------
train Loss: 0.0081 Acc: 0.8597
val Loss: 0.0090 Acc: 0.8320
Epoch 11/29
----------
train Loss: 0.0082 Acc: 0.8443
val Loss: 0.0090 Acc: 0.8287
Epoch 12/29
----------
train Loss: 0.0081 Acc: 0.8440
val Loss: 0.0091 Acc: 0.8447
Epoch 13/29
-

In [10]:
torch.save(model_ft.state_dict(), './model.pth')

In [16]:
import moxing as mox
mox.file.copy('./model.pth','s3://ai-awe-n4/model_output3/model/resnet-50-torch.pth')
print("done")

INFO:root:Using MoXing-v1.14.0-14d5d81b
INFO:root:Using OBS-Python-SDK-3.1.2


done


In [30]:
params = list(model_ft.fc.parameters())
optimizer_ft = optim.Adam(params, lr = 0.00001,weight_decay=0.1)

    # Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

model_ft.load_state_dict(torch.load('model.pth'))
model_ft = train_model(model=model_ft,
                       lossfunc=lossfunc,
                       optimizer=optimizer_ft,
                       scheduler=exp_lr_scheduler,
                       num_epochs=30)

Epoch 0/29
----------
train Loss: 0.0096 Acc: 0.8303
val Loss: 0.0088 Acc: 0.8507
Epoch 1/29
----------
train Loss: 0.0094 Acc: 0.8380
val Loss: 0.0090 Acc: 0.8400
Epoch 2/29
----------
train Loss: 0.0097 Acc: 0.8257
val Loss: 0.0090 Acc: 0.8380
Epoch 3/29
----------
train Loss: 0.0094 Acc: 0.8403
val Loss: 0.0089 Acc: 0.8400
Epoch 4/29
----------
train Loss: 0.0095 Acc: 0.8369
val Loss: 0.0089 Acc: 0.8527
Epoch 5/29
----------
train Loss: 0.0096 Acc: 0.8320
val Loss: 0.0089 Acc: 0.8473
Epoch 6/29
----------
train Loss: 0.0095 Acc: 0.8311
val Loss: 0.0089 Acc: 0.8487
Epoch 7/29
----------
train Loss: 0.0096 Acc: 0.8357
val Loss: 0.0090 Acc: 0.8480
Epoch 8/29
----------
train Loss: 0.0096 Acc: 0.8297
val Loss: 0.0089 Acc: 0.8460
Epoch 9/29
----------
train Loss: 0.0094 Acc: 0.8397
val Loss: 0.0091 Acc: 0.8413
Epoch 10/29
----------
train Loss: 0.0094 Acc: 0.8266
val Loss: 0.0092 Acc: 0.8400
Epoch 11/29
----------
train Loss: 0.0094 Acc: 0.8351
val Loss: 0.0094 Acc: 0.8353
Epoch 12/29
--

In [31]:
torch.save(model_ft.state_dict(), './model-resnet.pth')

In [32]:
import moxing as mox
mox.file.copy('./model-resnet.pth','s3://ai-awe-n4/model_output5/model/resnet-50-torch.pth')
print("done")

done


### VGG

In [19]:
# 构建多模型融合结构
model_1 = models.vgg16(pretrained = False)

# 设置模型的参数不需要进行梯度下降
for param in model_1.parameters():
    param.requires_grad = False
model_1.classifier = torch.nn.Sequential(torch.nn.Linear(25088,4096),
                                        torch.nn.ReLU(),
                                        torch.nn.Dropout(p = 0.5),
                                        torch.nn.Linear(4096,4096),
                                        torch.nn.ReLU(),
                                        torch.nn.Dropout(p = 0.5),
                                        torch.nn.Linear(4096,10))

Use_gpu = torch.cuda.is_available()
# 设置损失函数以及优化方法
if Use_gpu:
    model_1 = model_1.cuda()
    
loss_f_1 = torch.nn.CrossEntropyLoss()
optimizer_1 = torch.optim.Adam(model_1.classifier.parameters(), lr = 0.00001,weight_decay=0.1)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_1, step_size=7, gamma=0.1)

In [21]:
load_name = './vgg-resnet-50-torch.pth'
checkpoint = torch.load(load_name,map_location ='cpu')
model_1.load_state_dict(checkpoint['model_1_state_dict'])
# model_2.load_state_dict(checkpoint['model_2_state_dict'])
model_1.eval()

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (17): Conv2d

In [24]:
optimizer_1 = torch.optim.Adam(model_1.classifier.parameters(), lr = 0.000001)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_1, step_size=7, gamma=0.1)

In [27]:
model_1 = train_model(model=model_1,
                       lossfunc=loss_f_1,
                       optimizer=optimizer_1,
                       scheduler=exp_lr_scheduler,
                       num_epochs=20)

Epoch 0/19
----------
train Loss: 0.0058 Acc: 0.8894
val Loss: 0.0053 Acc: 0.9007
Epoch 1/19
----------
train Loss: 0.0060 Acc: 0.8791
val Loss: 0.0052 Acc: 0.9127
Epoch 2/19
----------
train Loss: 0.0059 Acc: 0.8840
val Loss: 0.0051 Acc: 0.9193
Epoch 3/19
----------
train Loss: 0.0059 Acc: 0.8926
val Loss: 0.0050 Acc: 0.9133
Epoch 4/19
----------
train Loss: 0.0060 Acc: 0.8869
val Loss: 0.0052 Acc: 0.9060
Epoch 5/19
----------
train Loss: 0.0057 Acc: 0.8874
val Loss: 0.0051 Acc: 0.9073
Epoch 6/19
----------
train Loss: 0.0060 Acc: 0.8886
val Loss: 0.0052 Acc: 0.9020
Epoch 7/19
----------
train Loss: 0.0060 Acc: 0.8823
val Loss: 0.0052 Acc: 0.9013
Epoch 8/19
----------
train Loss: 0.0060 Acc: 0.8791
val Loss: 0.0053 Acc: 0.9007
Epoch 9/19
----------
train Loss: 0.0060 Acc: 0.8849
val Loss: 0.0050 Acc: 0.9060
Epoch 10/19
----------
train Loss: 0.0057 Acc: 0.8911
val Loss: 0.0054 Acc: 0.8947
Epoch 11/19
----------
train Loss: 0.0059 Acc: 0.8891
val Loss: 0.0056 Acc: 0.8887
Epoch 12/19
--

In [28]:
torch.save(model_ft.state_dict(), './model-vgg.pth')

In [29]:
import moxing as mox
mox.file.copy('./model-vgg.pth','s3://ai-awe-n4/model_output4/model/vgg-16-torch.pth')
print("done")

done


### 模型训练
采用resnet50神经网络结构训练模型,模型训练需要一定时间，等待该段代码运行完成后再往下执行。

In [15]:
# 构建多模型融合结构
model_1 = models.vgg16(pretrained = False)
model_2 = models.resnet50(pretrained = True)

# 设置模型的参数不需要进行梯度下降
for param in model_1.parameters():
    param.requires_grad = False
model_1.classifier = torch.nn.Sequential(torch.nn.Linear(25088,4096),
                                        torch.nn.ReLU(),
                                        torch.nn.Dropout(p = 0.5),
                                        torch.nn.Linear(4096,4096),
                                        torch.nn.ReLU(),
                                        torch.nn.Dropout(p = 0.5),
                                        torch.nn.Linear(4096,10))
for param in model_2.parameters():
    param.requires_grad = False
num_ftrs = model_2.fc.in_features
model_2.fc = torch.nn.Linear(num_ftrs,10)


Use_gpu = torch.cuda.is_available()
# 设置损失函数以及优化方法
if Use_gpu:
    model_1 = model_1.cuda()
    model_2 = model_2.cuda()
    
loss_f_1 = torch.nn.CrossEntropyLoss()
loss_f_2 = torch.nn.CrossEntropyLoss()

optimizer_1 = torch.optim.Adam(model_1.classifier.parameters(), lr = 0.00001,weight_decay=0.1)
params_2 = list(model_2.fc.parameters())
optimizer_2 = optim.SGD(params_2, lr=0.001, momentum=0.9)

exp_lr_scheduler = lr_scheduler.StepLR(optimizer_2, step_size=7, gamma=0.1)

# 设置两个模型融合的权重参数
weight_1 = 0.4
weight_2 = 0.6

In [16]:
load_name = './vgg-resnet-50-torch.pth'
checkpoint = torch.load(load_name,map_location ='cpu')
model_1.load_state_dict(checkpoint['model_1_state_dict'])
# model_2.load_state_dict(checkpoint['model_2_state_dict'])
model_1.eval()
# model_2.eval()

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (17): Conv2d

In [20]:
def train_model_cross(model_1,model_2, loss_f_1,loss_f_2, optimizer_1,optimizer_2,weight_1,weight_2, scheduler, num_epochs=10):
    start_time = time.time()

    best_model_wts_1 = model_1.state_dict()
    best_model_wts_2 = model_2.state_dict()
    best_acc = 0.0
    
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)
        
        if epoch % 5 == 0:
            # random dataset
            train_dataset, test_dataset = torch.utils.data.random_split(all_image_datasets,[trainsize,testsize])
            image_datasets = {'train':train_dataset,'val':test_dataset}
            # wrap your data and label into Tensor
            dataloders = {x: torch.utils.data.DataLoader(image_datasets[x],batch_size=64,shuffle=True,
                                                         num_workers=4) for x in ['train', 'val']}
            dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                scheduler.step()
                model_1.train(True)
                model_2.train(True)  # Set model to training mode
            else:
                model_1.train(False)
                model_2.train(False)  # Set model to evaluate mode

            running_loss_1 = 0.0
            running_loss_2 = 0.0
            running_corrects_1 = 0.0
            running_corrects_2 = 0.0
            blending_running_corrects = 0.0
            
            # Iterate over data.
            for data in dataloders[phase]:
                # get the inputs
                inputs, labels = data
                

                # wrap them in Variable
                if use_gpu:
                    inputs = Variable(inputs.cuda())
                    labels = Variable(labels.cuda())
                else:
                    inputs, labels = Variable(inputs), Variable(labels)


                # forward
                y_pred_1 = model_1(inputs)
                y_pred_2 = model_2(inputs)
#                 out = torch.cat((y_pred_1,y_pred_2),1)
                blending_y_pred = y_pred_1 * weight_1 + y_pred_2 * weight_2
                _, pred_1 = torch.max(y_pred_1.data,1) # 找出每一行最大值对应的索引值
                _, pred_2 = torch.max(y_pred_2.data,1)
                _, blending_y_pred = torch.max(blending_y_pred.data,1)
                
                optimizer_1.zero_grad()
                optimizer_2.zero_grad()

                loss_1 = loss_f_1(y_pred_1,labels)
                loss_2 = loss_f_2(y_pred_2,labels)
                

                # backward + optimize only if in training phase
                if phase == 'train':
                    loss_1.backward()
                    loss_2.backward()
                    optimizer_1.step()
                    optimizer_2.step()
                    
                # statistics
                running_loss_1 += loss_1.data
                running_loss_2 += loss_2.data
                running_corrects_1 += torch.sum(pred_1 == labels.data).to(torch.float32)
                running_corrects_2 += torch.sum(pred_2 == labels.data).to(torch.float32)
                blending_running_corrects += torch.sum(blending_y_pred == labels.data).to(torch.float32)

            
            epoch_loss_1 = running_loss_1/dataset_sizes[phase]
            epoch_acc_1 = running_corrects_1 * 100/dataset_sizes[phase]
            epoch_loss_2 = running_loss_2/dataset_sizes[phase]
            epoch_acc_2 = running_corrects_2 * 100/dataset_sizes[phase]
            epoch_blending_acc = blending_running_corrects * 100/dataset_sizes[phase]

            print('{}, Model1 Loss:{:.4f},Model1 ACC:{:.4f}%,Model2 Loss:{:.4f},Model2 ACC:{:.4f}%,Blending_Model ACC:{:.4f}'
                  .format(phase,epoch_loss_1,epoch_acc_1,epoch_loss_2,epoch_acc_2,epoch_blending_acc))

            # deep copy the model
            if phase == 'val' and epoch_blending_acc > best_acc:
                best_acc = epoch_blending_acc
                best_model_wts_1 = model_1.state_dict()
                best_model_wts_2 = model_2.state_dict()

    elapsed_time = time.time() - start_time
    print('Training complete in {:.0f}m {:.0f}s'.format(
        elapsed_time // 60, elapsed_time % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model_1.load_state_dict(best_model_wts_1)
    model_2.load_state_dict(best_model_wts_2)
  
    return model_1,model_2

In [21]:
model_1,model_2 = train_model_cross(
    model_1,model_2, loss_f_1,loss_f_2, optimizer_1,optimizer_2,weight_1,weight_2, exp_lr_scheduler, num_epochs=40)

Epoch 0/39
----------
train, Model1 Loss:0.0097,Model1 ACC:80.8000%,Model2 Loss:0.0306,Model2 ACC:43.0000%,Blending_Model ACC:81.4286
val, Model1 Loss:0.0073,Model1 ACC:85.6667%,Model2 Loss:0.0245,Model2 ACC:68.3333%,Blending_Model ACC:86.6667
Epoch 1/39
----------
train, Model1 Loss:0.0080,Model1 ACC:83.6286%,Model2 Loss:0.0206,Model2 ACC:73.3714%,Blending_Model ACC:85.3429
val, Model1 Loss:0.0068,Model1 ACC:86.3333%,Model2 Loss:0.0184,Model2 ACC:75.0667%,Blending_Model ACC:86.8000
Epoch 2/39
----------
train, Model1 Loss:0.0073,Model1 ACC:85.6000%,Model2 Loss:0.0161,Model2 ACC:78.3143%,Blending_Model ACC:87.2857
val, Model1 Loss:0.0064,Model1 ACC:86.4000%,Model2 Loss:0.0151,Model2 ACC:79.3333%,Blending_Model ACC:86.9333
Epoch 3/39
----------
train, Model1 Loss:0.0066,Model1 ACC:86.8000%,Model2 Loss:0.0139,Model2 ACC:79.5714%,Blending_Model ACC:88.8000
val, Model1 Loss:0.0066,Model1 ACC:86.4667%,Model2 Loss:0.0137,Model2 ACC:79.0667%,Blending_Model ACC:86.4000
Epoch 4/39
----------
tr

val, Model1 Loss:0.0084,Model1 ACC:85.3333%,Model2 Loss:0.0107,Model2 ACC:82.6000%,Blending_Model ACC:86.7333
Epoch 34/39
----------
train, Model1 Loss:0.0073,Model1 ACC:88.2286%,Model2 Loss:0.0109,Model2 ACC:81.6857%,Blending_Model ACC:88.1714
val, Model1 Loss:0.0084,Model1 ACC:85.8667%,Model2 Loss:0.0105,Model2 ACC:83.9333%,Blending_Model ACC:88.2667
Epoch 35/39
----------
train, Model1 Loss:0.0079,Model1 ACC:86.4571%,Model2 Loss:0.0109,Model2 ACC:82.1429%,Blending_Model ACC:87.1143
val, Model1 Loss:0.0075,Model1 ACC:88.1333%,Model2 Loss:0.0104,Model2 ACC:83.4667%,Blending_Model ACC:88.9333
Epoch 36/39
----------
train, Model1 Loss:0.0077,Model1 ACC:86.8000%,Model2 Loss:0.0111,Model2 ACC:81.3714%,Blending_Model ACC:87.2286
val, Model1 Loss:0.0078,Model1 ACC:87.4000%,Model2 Loss:0.0106,Model2 ACC:82.7333%,Blending_Model ACC:88.1333
Epoch 37/39
----------
train, Model1 Loss:0.0077,Model1 ACC:87.1143%,Model2 Loss:0.0110,Model2 ACC:82.0000%,Blending_Model ACC:87.5143
val, Model1 Loss:0.0

将训练好的模型保存下来。

In [22]:
# torch.save(model_ft.state_dict(), './model.pth')
torch.save({
            'model_1_state_dict': model_1.state_dict(),
            'model_2_state_dict': model_2.state_dict()
            }, './vgg-resnet-50-torch.pth')

In [22]:
model_1 = models.vgg16(pretrained = True)
model_2 = models.resnet50(pretrained = True)

for param in model_1.parameters():
    param.requires_grad = False
model_1.classifier = torch.nn.Sequential(torch.nn.Linear(25088,4096),
                                        torch.nn.ReLU(),
                                        torch.nn.Dropout(p = 0.5),
                                        torch.nn.Linear(4096,4096),
                                        torch.nn.ReLU(),
                                        torch.nn.Dropout(p = 0.5),
                                        torch.nn.Linear(4096,10))
for param in model_2.parameters():
    param.requires_grad = False
num_ftrs = model_2.fc.in_features
model_2.fc = torch.nn.Linear(num_ftrs,10)

load_name = './model_cross.pth'
checkpoint = torch.load(load_name,map_location ='cpu')
model_1.load_state_dict(checkpoint['model_1_state_dict'])
model_2.load_state_dict(checkpoint['model_2_state_dict'])
model_1.eval()

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (17): Conv2d

### 将训练好的模型保存至OBS
将模型保存到OBS桶中model文件夹下，为后续推理测试、模型提交做准备。将如下代码中"obs-aifood-baseline"修改成您OBS桶的名称。


In [23]:
import moxing as mox
mox.file.copy('./vgg-resnet-50-torch.pth','s3://ai-awe-n4/model_output2/model/vgg-resnet-50-torch.pth')
print("done")

done
