# aifood baseline 

### 本baseline采用pytorch框架，应用ModelArts的Notebook进行开发

### 数据集获取
将您OBS桶中的数据文件加载到此notebook中，将如下代码中"obs-aifood-baseline"修改成您OBS桶名称。

In [1]:
import moxing as mox
mox.file.copy_parallel('s3://obs-aifood-baseline/aifood','./aifood/')
print("done")

INFO:root:Using MoXing-v1.14.0-14d5d81b
INFO:root:Using OBS-Python-SDK-3.1.2
INFO:root:Listing OBS: 1000
INFO:root:Listing OBS: 2000
INFO:root:Listing OBS: 3000
INFO:root:Listing OBS: 4000
INFO:root:Listing OBS: 5000
INFO:root:pid: None.	1000/5001
INFO:root:pid: None.	2000/5001
INFO:root:pid: None.	3000/5001
INFO:root:pid: None.	4000/5001
INFO:root:pid: None.	5000/5001


done


### 加载依赖

In [2]:

from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
import torchvision
from torchvision import datasets, models, transforms
import time
import os


### 加载数据集，并将其分为训练集和测试集

In [3]:
dataTrans = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])
        ])
 
    # image data path
data_dir = './aifood/images'
all_image_datasets = datasets.ImageFolder(data_dir, dataTrans)
#print(all_image_datasets.class_to_idx)    
trainsize = int(0.8*len(all_image_datasets))
testsize = len(all_image_datasets) - trainsize
train_dataset, test_dataset = torch.utils.data.random_split(all_image_datasets,[trainsize,testsize])
   
image_datasets = {'train':train_dataset,'val':test_dataset}
    

    # wrap your data and label into Tensor

    
dataloders = {x: torch.utils.data.DataLoader(image_datasets[x],
                                                 batch_size=64,
                                                 shuffle=True,
                                                 num_workers=4) for x in ['train', 'val']}

dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}

    # use gpu or not
use_gpu = torch.cuda.is_available()

In [4]:
def train_model(model, lossfunc, optimizer, scheduler, num_epochs=10):
    start_time = time.time()

    best_model_wts = model.state_dict()
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                scheduler.step()
                model.train(True)  # Set model to training mode
            else:
                model.train(False)  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0.0

            # Iterate over data.
            for data in dataloders[phase]:
                # get the inputs
                inputs, labels = data
                

                # wrap them in Variable
                if use_gpu:
                    inputs = Variable(inputs.cuda())
                    labels = Variable(labels.cuda())
                else:
                    inputs, labels = Variable(inputs), Variable(labels)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                outputs = model(inputs)
                _, preds = torch.max(outputs.data, 1)
                loss = lossfunc(outputs, labels)

                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

                # statistics
                running_loss += loss.data
                running_corrects += torch.sum(preds == labels.data).to(torch.float32)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects / dataset_sizes[phase]

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(
                phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = model.state_dict()

    elapsed_time = time.time() - start_time
    print('Training complete in {:.0f}m {:.0f}s'.format(
        elapsed_time // 60, elapsed_time % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
  
    return model

### 模型训练
采用resnet50神经网络结构训练模型,模型训练需要一定时间，等待该段代码运行完成后再往下执行。

In [5]:
# get model and replace the original fc layer with your fc layer
model_ft = models.resnet50(pretrained=True)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, 10)

if use_gpu:
    model_ft = model_ft.cuda()

    # define loss function
lossfunc = nn.CrossEntropyLoss()

    # setting optimizer and trainable parameters
 #   params = model_ft.parameters()
 # list(model_ft.fc.parameters())+list(model_ft.layer4.parameters())
#params = list(model_ft.fc.parameters())+list( model_ft.parameters())
params = list(model_ft.fc.parameters())
optimizer_ft = optim.SGD(params, lr=0.001, momentum=0.9)

    # Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)

model_ft = train_model(model=model_ft,
                           lossfunc=lossfunc,
                           optimizer=optimizer_ft,
                           scheduler=exp_lr_scheduler,
                           num_epochs=10)

Downloading: "https://download.pytorch.org/models/resnet50-19c8e357.pth" to /home/ma-user/.torch/models/resnet50-19c8e357.pth
100%|██████████| 102502400/102502400 [00:50<00:00, 2011217.02it/s]


Epoch 0/9
----------
train Loss: 0.0280 Acc: 0.5200
val Loss: 0.0193 Acc: 0.7970
Epoch 1/9
----------
train Loss: 0.0160 Acc: 0.8008
val Loss: 0.0136 Acc: 0.8190
Epoch 2/9
----------
train Loss: 0.0121 Acc: 0.8353
val Loss: 0.0112 Acc: 0.8350
Epoch 3/9
----------
train Loss: 0.0102 Acc: 0.8475
val Loss: 0.0099 Acc: 0.8440
Epoch 4/9
----------
train Loss: 0.0092 Acc: 0.8670
val Loss: 0.0090 Acc: 0.8440
Epoch 5/9
----------
train Loss: 0.0084 Acc: 0.8690
val Loss: 0.0086 Acc: 0.8520
Epoch 6/9
----------
train Loss: 0.0079 Acc: 0.8683
val Loss: 0.0082 Acc: 0.8570
Epoch 7/9
----------
train Loss: 0.0076 Acc: 0.8793
val Loss: 0.0080 Acc: 0.8580
Epoch 8/9
----------
train Loss: 0.0076 Acc: 0.8743
val Loss: 0.0080 Acc: 0.8590
Epoch 9/9
----------
train Loss: 0.0075 Acc: 0.8798
val Loss: 0.0078 Acc: 0.8610
Training complete in 4m 0s
Best val Acc: 0.861000


将训练好的模型保存下来。

In [6]:
torch.save(model_ft.state_dict(), './model.pth')

### 将训练好的模型保存至OBS
将模型保存到OBS桶中model文件夹下，为后续推理测试、模型提交做准备。将如下代码中"obs-aifood-baseline"修改成您OBS桶的名称。


In [8]:
import moxing as mox
mox.file.copy('./model.pth','s3://obs-aifood-baseline/model_output/model/resnet-50.pth')
print("done")

done
