In [1]:
# License: BSD
# Author: Sasank Chilamkurthy+Zhang Liangjun
#verson 1.0
#class 2classes
%matplotlib inline
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import torch.nn.functional as F
import scipy.io as scio
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import time
import copy
import os
import torch.utils.data as data
import random
import shutil
import glob
import csv
import math
import pandas
import cv2
from matplotlib import pyplot as plt
from skimage import transform


In [2]:
#input:mat file
#output: tensor
def mat_read(filepath):
    dataFile = filepath
    data = scio.loadmat(dataFile)
    #读取mat里的depth数据
    depth = data['depth']
    #归一化
    depth=depth*300
    depth=depth.astype(np.int)
    depth=depth.astype(np.float)
    #depth = transform.resize(depth,(240,320))
    depth_scale=depth[depth>0]
    avrg=np.mean(depth_scale)
    var=np.std(depth_scale)
    index=depth==0
    depth[index]=avrg
    depth_scale=(depth-avrg)/var
    depth=np.array(depth_scale)

    depth=np.expand_dims(depth,0)
    return depth
#把数据文件分成训练集和测试集

dst_dir = '/home/sjtu/gcj/data/depth_data/depth_26class_target'

        

In [3]:
EXTENSIONS = ['.mat','.csv']
def is_mat_file(filename):
    return any(filename.endswith(extension) for extension in EXTENSIONS)

#类名
def find_classes(dir):
    classes = [d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir,d))]
    classes.sort()
    class_to_idx = {classes[i]:i for i in range(len(classes))}
    return classes,class_to_idx

#input: dir+train(or val)+class
#output: 数据文件的集合
def make_dataset(dir,phase,class_to_idx):
    datas = []
    labels = []
    dir = os.path.join(dir,phase)
    for target in os.listdir(dir):
        d = os.path.join(dir,target)
        if not os.path.isdir(d):
            continue
        
        for root, _, fnames in sorted(os.walk(d)):
            for fname in fnames:
                if is_mat_file(fname):
                    path = os.path.join(root,fname)
                    #depth=mat_read(path)
                    item = (path,class_to_idx[target])
                    datas.append(item)
                    #datas.append(depth)
                    #labels.append(class_to_idx[target])
    return datas



In [4]:
class ViewpointDataset(data.Dataset):
    def __init__(self, root, transform = None, phase = None):
        dir = os.path.join(root, phase)
        classes, class_to_idx = find_classes(dir)
        datas= make_dataset(root,phase, class_to_idx)
        if len(datas) == 0:
            raise(RuntimeError("Found 0 images in subfolders of: " + root + "\n"
                               "Supported extensions are: " + ",".join(EXTENSIONS)))
        self.root = root
        self.phase = phase
        self.classes = classes
        #todo
        self.width = 480
        self.height = 640
        self.suffix = '.mat'
        self.transform = transform
        self.datas=datas
        
    #深度矩阵转成tensor  
    def __getitem__(self, idx):
        mat_path, label = self.datas[idx]
        if self.phase == 'target':
            with open(mat_path) as f:
                l=[]
                lines=csv.reader(f)
                for line in lines:
                    l.append(line)
            l.remove(l[0])
            l=np.array(l)
            depth=l[:,1:]
            depth=depth.astype(np.float)
            depth_scale=depth[depth>0]
            avrg=np.mean(depth_scale)
            var=np.std(depth_scale)
            index=depth==0
            depth[index]=avrg
            depth_scale=(depth-avrg)/var
            depth=np.expand_dims(depth_scale,0)
        else:
        #preprocess
            depth= mat_read(mat_path)
        #depth,label=self.datas[idx]
        #depth=self.datas[idx]
        #label=self.labels[idx]


        #create tensor from numpy.ndarray
        depth=torch.from_numpy(depth)
        depth_tensor=depth.type(torch.FloatTensor)
        if self.transform:
            toPIL=transforms.ToPILImage()
            toTensor=transforms.ToTensor()
            depth_tensor = toTensor(self.transform(toPIL(depth_tensor)))
        return depth_tensor, label
    
    def __len__(self):
        return len(self.datas)

    #设置梯度更新方式
    #增大学习率是个好办法
def optim_scheduler_ft(model, epoch, init_lr=0.0005, lr_decay_epoch=7):
    lr = init_lr * (0.1**(epoch // lr_decay_epoch))
    #lr = init_lr
    if epoch % lr_decay_epoch == 0:
        print('LR is set to {}'.format(lr))

    #optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    return optimizer

In [5]:
#建立数据集
batch_size=64
dsets = {x: ViewpointDataset(dst_dir,phase=x) for x in ['target','train', 'val']}
dset_loaders = {x:torch.utils.data.DataLoader(dsets[x],batch_size=batch_size,shuffle=True, num_workers=8) for x in ['target','train', 'val']}
dset_sizes = {x: len(dsets[x]) for x in ['target','train', 'val']}
dset_classes = dsets['val'].classes
print(dset_sizes)
print(len(dset_classes))
use_gpu = torch.cuda.is_available()

{'train': 76436, 'target': 520, 'val': 32733}
26


In [6]:
#训练所用网络模型：
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 64, kernel_size=(11,11),stride=(4,4),padding=(2,2)) # 1 input image channel, 6 output channels, 5x5 square convolution kernel
        self.conv2 = nn.Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
        self.conv3 = nn.Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.conv4 = nn.Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.conv5 = nn.Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.fc4   = nn.Linear(3072, 512)
        self.fc1   = nn.Linear(512,128)
        self.adap   = nn.Linear(128,128) 
        self.fc3   = nn.Linear(128, 26)

    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), 4) # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv2(x)), 3) # If the size is a square you can only specify a single number
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = F.max_pool2d(F.relu(self.conv5(x)), 3) 
        x = x.view(-1, self.num_flat_features(x))
        #'''
        x = F.dropout(x,p=0.5)
        x=F.relu(self.fc4(x))
        x=  F.dropout(x,p=0.5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.adap(x))
        out1=x
        x = self.fc3(x)
        #'''
        return x,out1

    def num_flat_features(self, x):
        size = x.size()[1:] # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features
model = Net().cuda()
print(model)
'''
a=mat_read('/home/sjtu/gcj/data/depth_data/depth_26class/train/off_1_90_0/1_depth.mat')
a=torch.from_numpy(a)
a=a.type(torch.FloatTensor)
c=torch.FloatTensor(1,1,480,640)#需要写成3dtensor 1代表batch
c[0]=a
b=Variable(c.cuda())
print(type(b))
outputs,out1 = model(b)
print outputs,out1
#'''

Net (
  (conv1): Conv2d(1, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
  (conv2): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (conv3): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc4): Linear (3072 -> 512)
  (fc1): Linear (512 -> 128)
  (adap): Linear (128 -> 128)
  (fc3): Linear (128 -> 26)
)


"\na=mat_read('/home/sjtu/gcj/data/depth_data/depth_26class/train/off_1_90_0/1_depth.mat')\na=torch.from_numpy(a)\na=a.type(torch.FloatTensor)\nc=torch.FloatTensor(1,1,480,640)#\xe9\x9c\x80\xe8\xa6\x81\xe5\x86\x99\xe6\x88\x903dtensor 1\xe4\xbb\xa3\xe8\xa1\xa8batch\nc[0]=a\nb=Variable(c.cuda())\nprint(type(b))\noutputs,out1 = model(b)\nprint outputs,out1\n#"

In [7]:
#def real_data(filepath):
def test(csv_path):
    with open(csv_path) as f:
        l=[]
        lines=csv.reader(f)
        for line in lines:
            l.append(line)
    l.remove(l[0])
    l=np.array(l)
    depth=l[:,1:]
    depth=depth.astype(np.float)
    
    depth_scale=depth[depth>0]
    avrg=np.mean(depth_scale)
    var=np.std(depth_scale)
    index=depth==0
    depth[index]=avrg
    depth_scale=(depth-avrg)/var
    depth3=np.expand_dims(depth_scale,0)
    depth3=np.expand_dims(depth3,0)
    #create tensor from numpy.ndarray
    depth=torch.from_numpy(depth3)
    depth=depth.type(torch.FloatTensor)
    return depth,depth_scale

def test_model(modelname,dir_name):
    for root, dirnames, _ in os.walk(dir_name):
        if len(dirnames)!=0:
            count=0
            acc=0
            length = 0
            for dirname in dirnames:
                this_acc = 0
                dname = os.path.join(root, dirname)
                names=glob.glob(dname+r'/*.csv')
                this_length  = len(names)
                #model=torch.load("./model_2class.pth")
                model=torch.load(modelname)
                model.cuda()
                length=len(names)+length
                for f in names:
                    fname = os.path.split(f)[-1]
                    csv_path = os.path.join(dname, fname)
                    inputs,depth_scale=test(csv_path)
                    label=dirname
                    inputs = Variable(inputs.cuda())
                    outputs,_ = model(inputs)
                    #print outputs.data
                    _,preds = torch.max(outputs.data, 1)
                    preds = preds.cpu().numpy()
                    #print preds
                    #preds=[[12]]
                    #plt.imshow(preds)
                    #print dset_classes[preds[0][0]]
                    #plt.figure(count)
                    count=count+1
                    title_name=dset_classes[preds[0][0]].split('_')[2]+'_'+dset_classes[preds[0][0]].split('_')[3]
                    if label==title_name:
                        acc=acc+1
                        this_acc = this_acc+1
                    this_accuracy=float(this_acc)/float(this_length)
                    #print('{}`s Acc: {:.4f}'.format(dirname,this_accuracy))
            accuracy=float(acc)/float(length)
            print('{}`s Acc: {:.4f}'.format(modelname,accuracy))
            #plt.title(str(count)+' '+dset_classes[preds[0][0]])
            #plt.title(title_name)
            #plt.imshow(depth_scale)
    return accuracy,modelname

def visual_best_model(modelname,dir_name):
    for root, dirnames, _ in os.walk(dir_name):
        if len(dirnames)!=0:
            count=0
            acc=0
            length = 0
            for dirname in dirnames:
                this_acc = 0
                dname = os.path.join(root, dirname)
                names=glob.glob(dname+r'/*.csv')
                this_length  = len(names)
                #model=torch.load("./model_2class.pth")
                model=torch.load(modelname)
                model.cuda()
                length=len(names)+length
                for f in names:
                    fname = os.path.split(f)[-1]
                    csv_path = os.path.join(dname, fname)
                    inputs,depth_scale=test(csv_path)
                    label=dirname
                    inputs = Variable(inputs.cuda())
                    outputs,_ = model(inputs)
                    #print outputs.data
                    _,preds = torch.max(outputs.data, 1)
                    preds = preds.cpu().numpy()
                    #print preds
                    #preds=[[12]]
                    #plt.imshow(preds)
                    #print dset_classes[preds[0][0]]
                    plt.figure(count)
                    count=count+1
                    title_name=dset_classes[preds[0][0]].split('_')[2]+'_'+dset_classes[preds[0][0]].split('_')[3]
                    if label==title_name:
                        acc=acc+1
                        this_acc = this_acc+1
                    plt.title(title_name)
                    plt.imshow(depth_scale)
                this_accuracy=float(this_acc)/float(this_length)
                print('{}`s Acc: {:.4f}'.format(dirname,this_accuracy))
            accuracy=float(acc)/float(length)
            print('{}`s Acc: {:.4f}'.format(modelname,accuracy))
            print('total:{}, wrong:{}'.format(length,length-acc))
            #plt.title(str(count)+' '+dset_classes[preds[0][0]])
    return accuracy,modelname
def get_L(n_src,n_tar):
    L_ss=(1./(n_src*n_src))*torch.ones(n_src,n_src)
    L_st=(-1./(n_src*n_tar))*torch.ones(n_src,n_tar)
    L_ts=(-1./(n_tar*n_src))*torch.ones(n_tar,n_src)
    L_tt=(1./(n_tar*n_tar))*torch.ones(n_tar,n_tar)
    L=torch.zeros(n_src+n_tar,n_src+n_tar)
    L[:n_src,:n_src]=L_ss
    L[:n_src,n_src:]=L_st
    L[n_src:,:n_src]=L_ts
    L[n_src:,n_src:]=L_tt
    return L
def get_kernel(x1, kernelparam=1, kerneltype='rbf'):
    row,col=x1.size()[0],x1.size()[1]
    K=None
    if kerneltype=='rbf':
        P=torch.sum(x1*x1,dim=1)
        P=P.resize(len(P),1)
        K=torch.exp(-1*(P.transpose(0,1).repeat(row,1)+P.repeat(1,row)-2*torch.mm(x1,x1.transpose(0,1)))/(col*2*kernelparam))
    return K

In [8]:
def _mix_rbf_kernel(X, Y, sigma_list):
    assert(X.size(0) == Y.size(0))
    m = X.size(0)

    Z = torch.cat((X, Y), 0)
    ZZT = torch.mm(Z, Z.t())
    diag_ZZT = torch.diag(ZZT).unsqueeze(1)
    Z_norm_sqr = diag_ZZT.expand_as(ZZT)
    exponent = Z_norm_sqr - 2 * ZZT + Z_norm_sqr.t()

    K = 0.0
    for sigma in sigma_list:
        gamma = 1.0 / (2 * sigma**2)
        K += torch.exp(-gamma * exponent)

    return K[:m, :m], K[:m, m:], K[m:, m:], len(sigma_list)


def mix_rbf_mmd2(X, Y, sigma_list, biased=True):
    K_XX, K_XY, K_YY, d = _mix_rbf_kernel(X, Y, sigma_list)
    # return _mmd2(K_XX, K_XY, K_YY, const_diagonal=d, biased=biased)
    return _mmd2(K_XX, K_XY, K_YY, const_diagonal=False, biased=biased)
def _mmd2(K_XX, K_XY, K_YY, const_diagonal=False, biased=False):
    m = K_XX.size(0)    # assume X, Y are same shape

    # Get the various sums of kernels that we'll use
    # Kts drop the diagonal, but we don't need to compute them explicitly
    if const_diagonal is not False:
        diag_X = diag_Y = const_diagonal
        sum_diag_X = sum_diag_Y = m * const_diagonal
    else:
        diag_X = torch.diag(K_XX)                       # (m,)
        diag_Y = torch.diag(K_YY)                       # (m,)
        sum_diag_X = torch.sum(diag_X)
        sum_diag_Y = torch.sum(diag_Y)

    Kt_XX_sums = K_XX.sum(dim=1) - diag_X             # \tilde{K}_XX * e = K_XX * e - diag_X
    Kt_YY_sums = K_YY.sum(dim=1) - diag_Y             # \tilde{K}_YY * e = K_YY * e - diag_Y
    K_XY_sums_0 = K_XY.sum(dim=0)                     # K_{XY}^T * e

    Kt_XX_sum = Kt_XX_sums.sum()                       # e^T * \tilde{K}_XX * e
    Kt_YY_sum = Kt_YY_sums.sum()                       # e^T * \tilde{K}_YY * e
    K_XY_sum = K_XY_sums_0.sum()                       # e^T * K_{XY} * e

    if biased:
        mmd2 = ((Kt_XX_sum + sum_diag_X) / (m * m)
            + (Kt_YY_sum + sum_diag_Y) / (m * m)
            - 2.0 * K_XY_sum / (m * m))
    else:
        mmd2 = (Kt_XX_sum / (m * (m - 1))
            + Kt_YY_sum / (m * (m - 1))
            - 2.0 * K_XY_sum / (m * m))

    return mmd2

In [10]:
##定义模型如何训练
def train_model(model, criterion, optim_scheduler, num_epochs=25):
    since = time.time()
        
    best_model = model
    best_acc = 0.0
    
    count=0
    #target_data
    
    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)
        target_data=[]
        for data in dset_loaders['target']:
            # get the inputs
            inputs, labels = data
            # wrap them in Variable
            target_data.append(inputs)
        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase=='train':
                optimizer = optim_scheduler(model, epoch)

            running_loss = 0.0
            running_mmd=0.0
            running_corrects = 0

            # Iterate over data.
            for data in dset_loaders[phase]:
                # get the inputs
                inputs, labels = data
                # wrap them in Variable
                inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
                # zero the parameter gradients
                # forward
                mmd_loss=0
                outputs,src = model(inputs)
                _, preds = torch.max(outputs.data, 1)
                optimizer.zero_grad()
                
                tar_inputs=target_data[random.randint(0,7)]
                tar_inputs=Variable(tar_inputs.cuda(),requires_grad=False)
                tar_out,tar=model(tar_inputs)
                #mmd-rbf
                '''src=src.size()[0]
                n_tar=tar.size()[0]
                L=get_L(n_src,n_tar)
                L=Variable(L.cuda())
                row,col=src.size()[0]+tar.size()[0],src.size()[1]
                X=Variable(torch.zeros(row,col).cuda())
                X[:src.size()[0],:]=src
                X[src.size()[0]:,:]=tar
                K=get_kernel(X)
                A=torch.mm(K,L)
                mmd_loss=A.trace()
                
                '''
                sigma_list=[1,2,4,8,16]
                if src.size(0)==tar.size(0):
                    mmd2_D = mix_rbf_mmd2(src, tar, sigma_list)
                    mmd2_D = F.relu(mmd2_D)
                    mmd_loss=mmd2_D
                else:
                    mmd_loss=Variable(torch.Tensor([0.0]).cuda())
                
                #print mmd_loss
                #if epoch%2==0:
                loss = criterion(outputs, labels)
                #else:
                    #loss = mmd_loss

                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()

                # statistics
                running_mmd += mmd_loss.data[0]
                running_loss += loss.data[0]
                running_corrects += torch.sum(preds== labels.data)
                #print(running_loss)
            epoch_loss = running_loss / dset_sizes[phase]
            epoch_mmd = running_mmd / dset_sizes[phase]
            final_loss=epoch_loss
            epoch_acc = float(running_corrects) / float(dset_sizes[phase])
            print epoch_mmd
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))
            # deep copy the model
            #if phase == 'val' and epoch_acc >= best_acc:
            if phase == 'val':
                best_acc = epoch_acc
                lowest_loss=final_loss
                best_model = copy.deepcopy(model)
        model_name='./model/model_26class_'+str(count)+'.pth'
        dname='/home/sjtu/gcj/data/crop_test'
        torch.save(best_model,model_name)
        acc,modeltemp=test_model(model_name,dname)
        count=count+1

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(
        time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))
    torch.save(best_model, './model/model_26class.pth')
    print('done')
    return best_model

In [None]:
dname='/home/sjtu/gcj/data/crop_test'
model_dir = './model'
train_num = 10
count = 0
criterion = nn.CrossEntropyLoss()
num_epochs=25
for i in range(train_num):
#训练
    print("{} round".format(count+1))
    model = Net().cuda()
    model = train_model(model,criterion, optim_scheduler_ft, num_epochs=num_epochs)
    modelname=glob.glob(model_dir+r'/model_26class_*.pth')
    modelname = sorted(modelname)
    bestacc=0
    best_model=None
    for model in modelname:
        acc,modeltemp=test_model(model,dname)
        if acc>=bestacc:
            best_model = modeltemp
            bestacc=acc
    print('Best_Acc: {:.4f}'.format(bestacc))
    print best_model
    bestmodel = torch.load(best_model)
    #bestmodel = torch.load('./model/model_26class_'+str(num_epochs-1)+'.pth')
    count = count +1
    model_name='./model/model_best_'+str(count)+'.pth'
    torch.save(bestmodel,model_name)
#print('All_Best_Acc: {:.4f}'.format(all_bestacc))
    #visual_best_model(best_model,dname)

1 round
Epoch 0/24
----------
LR is set to 0.0005
