In [None]:
import sys
sys.path.append('E:/zlab/')
from loader import Loader
# ----------------

import time
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

import tables as tb
import mxnet as mx
from mxnet import nd, autograd, gluon
import gluonbook as gb
from mxnet import gluon, init, nd
from mxnet.gluon import data as gdata, loss as gloss, model_zoo, nn
from sklearn.semi_supervised.label_propagation import LabelSpreading
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report, accuracy_score
from mxboard import SummaryWriter

# 载入数据

In [None]:
import tables as tb

h5 = tb.open_file('E:/xdata/X.h5')
#h5 = tb.open_file('./X.h5')

data = h5.root.cifar10

## 划分数据集

In [None]:
from sklearn.model_selection import train_test_split

X = data.trainX[:]
y = data.trainY[:]
X_train, X_unlabel, y_train, y_unlabel = train_test_split(
    X, y, test_size=0.98, random_state=42)

batch_size = 32
trainset = Loader(batch_size, X_train, y_train, shuffle=True, name='train')
testset = Loader(
    batch_size, data.testX, data.testY, shuffle=False, name='test')
unlabelset = Loader(
    batch_size,
    X_unlabel[:10000],
    y_unlabel[:10000],
    shuffle=False,
    name='agent')

In [None]:
for imgs, labels in iter(trainset):
    trainset.show_imgs(data.label_names, imgs.astype(np.uint8), labels)
    break

# 模型的配置

In [None]:
class SemiModel(nn.HybridBlock):
    def __init__(self, features, **kwargs):
        super().__init__(**kwargs)
        self.features = features
        self.output = nn.Dense(10)
        
    def hybrid_forward(self, F, x):
        x = self.features(x)
        return self.output(x)

pretrain_net = model_zoo.vision.resnet50_v2(pretrained=False)
net = SemiModel(pretrain_net.features)

# 网络预设
_net = model_zoo.vision.resnet50_v2(pretrained=True)
net.features = _net.features
#net.features.add(nn.Flatten())
#net.features[-1].initialize(init.Xavier(magnitude=2.24))
net.output.initialize(init.Xavier(magnitude=2.24))

# 模型超参数设定

In [None]:
from xtrain import XModel

# 超参数设定
epochs = 500
learning_rate = 0.1
model = XModel(learning_rate)
ctx = model.ctx
print('training on', ctx)
net.collect_params().reset_ctx(ctx)
net.hybridize()

# 数据打包

In [None]:
class DataLoader(dict):
    def __init__(self, ctx, batch_size, trainset, unlabelset, *args, **kwargs):
        '''
        初始化数据迭代器和标签传播算法
        '''
        self.__dict__ = self
        self.ctx = ctx
        self.train = trainset
        self.unlabel = unlabelset
        self.X_l = Loader(
            batch_size, self.train.X, shuffle=False, name='x_train')
        self.X_u = Loader(
            batch_size, self.unlabel.X, shuffle=False, name='x_test')
        self.features_u = self.unlabel.X.reshape((-1, 32 * 32 * 3))
        self.features_l = self.train.X.reshape((-1, 32 * 32 * 3))
        _min_max_scaler = MinMaxScaler()
        self.features_l = _min_max_scaler.fit_transform(self.features_l)
        self.features_u = _min_max_scaler.transform(self.features_u)
        self.y_agent = -np.ones_like(self.unlabel.Y)
        self._lbp = LabelSpreading(gamma=0.007, n_jobs=-1, max_iter=100)
        self.ssl()

    def ssl(self):
        '''
        标签传播
        '''
        X = np.concatenate([self.features_l, self.features_u])
        y_ = np.concatenate([trainset.Y, self.y_agent])
        self._lbp.fit(X, y_)  # 必须将 X 缩放到 [0,1]
        self.y_hats = self._lbp.transduction_[self.train.Y.shape[0]:]
        
        if np.unique(self.y_hats).size == 10:
            print('分类指标：')
            print(classification_report(self.unlabel.Y, self.y_hats))
        else:
            print('SSL 失败！')

    def get_features(self, net, data_iter):
        for x in data_iter:
            x = nd.array(x, ctx=self.ctx).transpose((0, 3, 1, 2))
            features = net.features(x)
            yield features

    def agent(self, net):
        '''
        net:: 神经网络
        inference::输出 net 的 features
        '''
        # 提取有标签数据的特征
        self.features_l = np.concatenate(
            [f.asnumpy() for f in dataset.get_features(net, dataset.X_l)]).astype(np.uint8)
        # 提取无标签数据的特征
        self.features_u = np.concatenate(
            [f.asnumpy() for f in dataset.get_features(net, dataset.X_u)]).astype(np.uint8)
        self.ssl()
        self.agency_acc = accuracy_score(dataset.unlabel.Y, self.y_hats)

dataset = DataLoader(ctx, batch_size, trainset, unlabelset)

# 训练和调优

In [None]:
logdir = 'D:/graph/'
sw = SummaryWriter(logdir, flush_secs=5)  # 可视化
y_pre = dataset.y_hats  # 微调前的预测标签
dataset.agent(net)
sw.add_scalar('accuracy_curves',
              {unlabelset.name + '_SSL': dataset.agency_acc}, 0)
epoch = 1
while epoch < 100:
    print('微调带有代理标签数据')
    print('~_~' * 25)
    X_u_set = Loader(
        batch_size,
        dataset.unlabel.X,
        dataset.y_hats,
        shuffle=True,
        name='train_agent')
    net = model.train(net, X_u_set, testset, epochs=1, start=epoch)
    dataset.agent(net)
    dataset.y_agent = y_pre
    dataset.y_agent[dataset.y_hats != y_pre] = -1
    sw.add_scalar('accuracy_curves',
                  {unlabelset.name + '_SSL': dataset.agency_acc}, epoch)
    if -1 in dataset.y_agent:
        print(unlabelset.name + '_SSL: ', dataset.agency_acc)
        print('-_' * 50)
        print('微调有标签数据')
        epoch += 1
        net = model.train(net, dataset.train, testset, epochs=10, start=epoch)
        dataset.agent(net)
        dataset.y_agent = y_pre
        dataset.y_agent[dataset.y_hats != y_pre] = -1
        sw.add_scalar('accuracy_curves',
                      {unlabelset.name + '_SSL': dataset.agency_acc}, epoch)
        epoch += 10
    else:
        break