In [9]:
from google.colab import drive  # 接入Colab
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [11]:
import os   #设置目录位置
os.chdir('drive/My Drive')

FileNotFoundError: ignored

In [12]:
!pwd

/content/drive/My Drive/HED


In [13]:
%cd '..'

/content/drive/My Drive


In [14]:
!unzip 'Edge-Detection-Using-ML-main.zip'

Archive:  Edge-Detection-Using-ML-main.zip
c71ec0ca2f3875209d168cd09aa2164ec0202fff
   creating: Edge-Detection-Using-ML-main/
   creating: Edge-Detection-Using-ML-main/.ipynb_checkpoints/
  inflating: Edge-Detection-Using-ML-main/.ipynb_checkpoints/DexiNed-checkpoint.ipynb  
  inflating: Edge-Detection-Using-ML-main/.ipynb_checkpoints/InitialImplementation-checkpoint.ipynb  
   creating: Edge-Detection-Using-ML-main/BIPED/
   creating: Edge-Detection-Using-ML-main/BIPED/edges/
   creating: Edge-Detection-Using-ML-main/BIPED/edges/edge_maps/
   creating: Edge-Detection-Using-ML-main/BIPED/edges/edge_maps/test/
   creating: Edge-Detection-Using-ML-main/BIPED/edges/edge_maps/test/rgbr/
  inflating: Edge-Detection-Using-ML-main/BIPED/edges/edge_maps/test/rgbr/RGB_008.png  
  inflating: Edge-Detection-Using-ML-main/BIPED/edges/edge_maps/test/rgbr/RGB_010.png  
  inflating: Edge-Detection-Using-ML-main/BIPED/edges/edge_maps/test/rgbr/RGB_017.png  
  inflating: Edge-Detection-Using-ML-main/B

In [None]:
## loss parameters
# sides weights
sides_weights=1.0 # side0
# class balance weights
pos_weights=53.4698638405
# use deep supervising if is_deep_supervised is true else just using fused side
is_deep_supervised=True
# use weight decay
use_weight_regularizer=True
# weight decay ratio
weight_decay_ratio=0.0002
## train parameters
log_dir='logs/'
model_weights_path='data/weights/model_weights/'
init_weights='data/weights/initial_weights/vgg16.npy'
image_path='data/dataset/train_data/'
file_name='data/dataset/train.txt'

batch_size=3
max_epochs=120
snapshot_epochs=60

height=481
width=321
channel=3

bmean=122.20417892 # blue
gmean=119.55591164 # green
rmean=123.94569574 # red

In [None]:
# -*- coding: UTF-8 -*-
import numpy as np
import tensorflow as tf
import yaml
import cv2
import argparse
import gc

def img_pre_process(img):
    '''
    图片预处理
    :param img: 图片
    :param kwargs: 配置信息，如均值
    :return: 处理后的图片
    '''
    def stretch(bands, lower_percent=2, higher_percent=98, bits=8):
        if bits not in [8, 16]:
            print('error ! dest image must be 8bit or 16bits !')
            return
        # 创建一个0矩阵shape形同输入的bands
        out = np.zeros_like(bands, dtype=np.float32)
        n = bands.shape[2]
        for i in range(n):
            a = 0
            b = 1
            # numpy.percentile常用于处理离群数据点
            c = np.percentile(bands[:, :, i], lower_percent)
            d = np.percentile(bands[:, :, i], higher_percent)
            if d-c == 0:
                out[:, :, i] = 0
                continue
            t = a + (bands[:, :, i] - c) * (b - a) / (d - c)
            # numpy.clip用于将数据元素限制在a,b之间，如t=[1,2,3,4,5],a=2,b=4,np.clip后为[2,2,3,4,4]
            out[:, :, i] = np.clip(t, a, b)
        if bits == 8:
            return out.astype(np.float32)*255
        else:
            return np.uint16(out.astype(np.float32)*65535)

    img = stretch(img)
    # 减去配置文件的均值
    img -= kwargs['mean']
    return img


def predict_big_map(img_path, out_shape=(448, 448), inner_shape=(224, 224), out_channel=1, pred_fun=None, **kwargs):
    """
    预测，生成对应的边缘检测图和视频，若图片不一致，会进行切割合成等操作输出固定大小的图片
    注：这样进行切割合成的图片对于进行全图边缘检测是可以的，但是对于特定需求边缘检测这样是不合适的。当然如果测试和训练图片大小一致，这个函数同样会输出等大合适的结果
    :param img_path: 图片路径
    :param out_shape: 输出图片大小
    :param inner_shape: 输入图片大小
    :param out_channel: 预测图片输出通道，通常为黑白图像，通道数1
    :param pred_fun: 前向计算模型，调用sess.run计算hed中间层，返回图片数据
    :return: 预测的图片结果
    """
    make_video = True   # 是否生成video文件

    image = cv2.imread(img_path, )  #读取图片
    # 如果图片只有二维，添加一维生成满足网络要求的占位符比如（?,224,224,1）
    if len(image.shape) == 2:
        image = np.expand_dims(image, axis=-1)
        gc.collect()    # gc为垃圾回收

    # 以下大量代码为如果图片大小不满足网络占位符需求，则对图片进行拆分迭代分别计算每个子图片的
    pd_up_h, pd_lf_w = np.int64((np.array(out_shape)-np.array(inner_shape)) / 2)
    # print(image.shape)
    ori_shape = image.shape
    pd_bm_h = (out_shape[0]-pd_up_h) - (image.shape[0] % inner_shape[0])
    pd_rt_w = (out_shape[1]-pd_lf_w) - (image.shape[1] % inner_shape[1])
    it_h = np.int64(np.ceil(1.0*image.shape[0] / inner_shape[0]))
    it_w = np.int64(np.ceil(1.0*image.shape[1] / inner_shape[1]))
    image_pd = np.pad(image, ((pd_up_h, pd_bm_h), (pd_lf_w, pd_rt_w), (0, 0)), mode='reflect').astype(np.float32)  # the image is default a color one
    # print(image_pd.shape)
    # print((pd_up_h, pd_bm_h), (pd_lf_w, pd_rt_w))
    gc.collect()
    tp1 = np.array(inner_shape[0] - ori_shape[0] % inner_shape[0])
    tp2 = np.array(inner_shape[1] - ori_shape[1] % inner_shape[1])
    if ori_shape[0] % inner_shape[0] == 0:
        tp1 = 0
    if ori_shape[1] % inner_shape[0] == 0:
        tp2 = 0
    out_img = np.zeros((ori_shape[0]+tp1, ori_shape[1]+tp2, out_channel), np.float32)

    # video config #################################
    if make_video:
        fps = 24  # 视频帧率
        wd = 1360
        ht = int(1360*out_img.shape[0]/out_img.shape[1])
        # haha = np.zeros((ht, wd, 3), np.uint8)
        haha = cv2.resize(np.pad(image, ((0, tp1), (0, tp2), (0, 0)), mode='reflect'), (wd, ht), interpolation=cv2.INTER_LINEAR)
        video_writer = cv2.VideoWriter('./data/s2.avi',
                                       cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps,
                                       (wd, ht))   # isColor=False? (1360,480)为视频大小
    image = None  # release memory
    # main loop
    for ith in range(0, it_h):
        h_start = ith * inner_shape[0]
        count = 1
        for itw in range(0, it_w):
            w_start = itw*inner_shape[1]
            tp_img = image_pd[h_start:h_start+out_shape[0], w_start:w_start+out_shape[1], :]

            # image pre-process
            tp_img = img_pre_process(tp_img.copy(), **kwargs)
            # print('tp_img', tp_img.shape)

            tp_out = pred_fun(tp_img[np.newaxis, :])
            tp_out = np.squeeze(tp_out, axis=0)

            # image post-process
            # tp_out = post-process

            out_img[h_start:h_start+inner_shape[0], w_start:w_start+inner_shape[1], :] = tp_out[pd_up_h:pd_up_h+inner_shape[0], pd_lf_w:pd_lf_w+inner_shape[1], :]

            # write video ##########################
            if make_video:
                tp = cv2.resize(out_img[:, :, 0], (wd, ht), interpolation=cv2.INTER_LINEAR)
                # print(np.unique(tp))
                # xixi = np.uint8((tp > 0.5)*255)
                xixi = tp > 1e-5
                mimi = np.uint8(tp[xixi] * 255)
                haha[xixi, 0] = mimi
                haha[xixi, 1] = mimi
                haha[xixi, 2] = mimi
                video_writer.write(haha)

            print('haha!', h_start, w_start, count)
            count += 1
    if make_video:
        video_writer.release()
    return out_img[0:ori_shape[0], 0:ori_shape[1], :]


def predict_big_map_show(img_path, pred_fun=None, **kwargs):
    '''
    展示中间层图片
    注：由于没有类似predict_big_map切分图片的操作，这里对图片的大小严苛，需要形同训练图片大小才行，否则会报错，后续可增添相应的图片缩放等操作进行通用化处理。
    :param img_path: 图片路径
    :param pred_fun:前向计算模型，调用sess.run计算hed中间层，返回图片数据，可直接处理该数据进行可视化展示
    :param kwargs: 均值信息
    :return:无返回，调用该函数opencv弹窗显示中间图片
    '''
    image = cv2.imread(img_path, )
    # 如果图片只有二维，添加一维生成满足网络要求的占位符比如（?,224,224,1）
    if len(image.shape) == 2:
        image = np.expand_dims(image, axis=-1)
        gc.collect()
    image = image.astype(np.float32)  # cv2.imread后类型为unit8，转类型处理
    # 图片前置处理
    tp_img = img_pre_process(image.copy(), **kwargs)
    # 图片预测，这里相当于pred_fun为一个函数传入，这里调用该函数，该函数已经使用session对传入的图片进行处理，输出结果直接为hed网络中6张图片的数据
    tp_out = pred_fun(tp_img[np.newaxis, :])
    print("***********")
    print("***********")
    # numpy.squeeze把维度为1的条目去除，
    img1 = np.squeeze((tp_out[0] * 255).astype(np.uint8))
    img2 = np.squeeze((tp_out[1] * 255).astype(np.uint8))
    img3 = np.squeeze((tp_out[2] * 255).astype(np.uint8))
    img4 = np.squeeze((tp_out[3] * 255).astype(np.uint8))
    img5 = np.squeeze((tp_out[4] * 255).astype(np.uint8))
    img6 = np.squeeze((tp_out[5] * 255).astype(np.uint8))
    # 图集
    output_img = np.hstack([img1, img2, img3, img4, img5, img6])
    # 展示多个
    cv2.namedWindow('ouput_image', cv2.WINDOW_AUTOSIZE)
    cv2.imshow('ouput_image', output_img)
    cv2.waitKey(0)
    print("***********")
    print("***********")


if __name__ == '__main__':
    # 定义gpu和图片路径
    args = arg_parser()
    # 定义session的config配置
    config = sess_config(args)
    # 读取配置文件
    with open('cfg.yml') as file:
        cfg = yaml.load(file)
    # path = args.img_path
    # path = "data/dataset/train_data/IMG_1580.JPG"
    path = "data/dataset/IMG_2717_224.JPG"

    # 读取高、宽、通道，均值等
    height = cfg['height']
    width = cfg['width']
    channel = cfg['channel']
    mean = cfg['mean']

    # 定义session，HED网络
    sess = tf.Session(config=config)
    hed_class = HED(height=height, width=width, channel=channel)
    hed_class.vgg_hed()
    saver = tf.train.Saver()
    # 读取权重
    saver.restore(sess, 'data/weights/model_weights/vgg16_hed-120')

    '''
    # 如果这样调用中间图片可视化展示将导致图片非常尖锐不平滑
    sides = [hed_class.side1,hed_class.side2,hed_class.side3,hed_class.side4,hed_class.side5,hed_class.fused_side]
    predict_big_map_show(img_path=path, out_shape=(224, 224), inner_shape=(224, 224), out_channel=1,pred_fun=(lambda ipt: sess.run(sides, feed_dict={hed_class.x: ipt})), mean=cfg['mean'])
    '''
    # sigmoid处理后图片更加平滑
    sides = [tf.sigmoid(hed_class.side1),
             tf.sigmoid(hed_class.side2),
             tf.sigmoid(hed_class.side3),
             tf.sigmoid(hed_class.side4),
             tf.sigmoid(hed_class.side5),
             tf.sigmoid(hed_class.fused_side)]
    # 可视化展示
    predict_big_map_show(img_path=path,pred_fun=(lambda ipt: sess.run(sides, feed_dict={hed_class.x: ipt})), mean=cfg['mean'])

    # tf.add_n表示相加，这里的sides为相加后取均值实现图片融合
    sides = 1.0*tf.add_n(sides) / len(sides)
    # 图片预测
    output_img = predict_big_map(img_path=path, out_shape=(height, width), inner_shape=(height, width), out_channel=1,
                                 pred_fun=(lambda ipt: sess.run(sides, feed_dict={hed_class.x: ipt})), mean=cfg['mean'])

    # 去除维度为1的，实现图片转化
    output_img = np.squeeze((output_img*255).astype(np.uint8))
    cv2.imwrite('./data/tb_gray_img.png', output_img)
    cv2.imwrite('./data/tb_black_img.png', 255*(output_img > 127))
    sess.close()




In [None]:
# -*- coding: UTF-8 -*-
import numpy as np
import tensorflow as tf
import yaml

'''
HED网络类
基于VGG16，所以模型形同VGG16，唯一注意点是concat处，加上concat导致梯度计算不出，不知是某个类库的问题还是其他未知问题
'''

# HED网络定义类
class HED(object):
    def __init__(self, height, width, channel):
        # 定义图片长宽
        self.height = height
        self.width = width
        # 定义占位符
        self.x = tf.placeholder(tf.float32, (None, height, width, channel))
        # 定义配置属性，来自配置文件
        with open('cfg.yml') as file:
            self.cfg = yaml.load(file)

    def vgg_hed(self):
        '''
        VGG16模型为2层卷积+reLU,池化，2层卷积+reLU，池化，3层卷积+reLU，池化，3层卷积+reLU，池化，3层卷积+reLU，池化，3层全连接+reLU,softmax输出
        :return: 中间5层图片及最后融合的图片共6个数据
        '''
        # block对应的函数是iteration层卷积+reLU
        bn1, relu1 = self.block(input_tensor=self.x, filters=64, iteration=2, dilation_rate=[(4, 4), (1, 1)], name='block1')
        mp1 = tf.layers.max_pooling2d(inputs=relu1, pool_size=(2, 2), strides=(2, 2), padding='same', name='max_pool1')

        bn2, relu2 = self.block(input_tensor=mp1, filters=128, iteration=2, name='block2')
        mp2 = tf.layers.max_pooling2d(inputs=relu2, pool_size=(2, 2), strides=(2, 2), padding='same', name='max_pool2')

        bn3, relu3 = self.block(input_tensor=mp2, filters=256, iteration=3, name='block3')
        mp3 = tf.layers.max_pooling2d(inputs=relu3, pool_size=(2, 2), strides=(2, 2), padding='same', name='max_pool3')

        bn4, relu4 = self.block(input_tensor=mp3, filters=512, iteration=3, name='block4')
        mp4 = tf.layers.max_pooling2d(inputs=relu4, pool_size=(2, 2), strides=(2, 2), padding='same', name='max_pool4')

        bn5, relu5 = self.block(input_tensor=mp4, filters=512, iteration=3, name='block5')

        # self.side()对图片进行反卷积
        self.side1 = self.side(input_tensor=bn1, stride=(1, 1), name='side1', deconv=False)
        self.side2 = self.side(input_tensor=bn2, stride=(2, 2), name='side2')
        self.side3 = self.side(input_tensor=bn3, stride=(4, 4), name='side3')
        self.side4 = self.side(input_tensor=bn4, stride=(8, 8), name='side4')
        self.side5 = self.side(input_tensor=bn5, stride=(16, 16), name='side5')
        # sides原本对应side12345的合成，但是优化器迭代concat报错所以先直接采用第五层作为sides
        sides = self.side5
        '''
        t1 = [[1, 2, 3], [4, 5, 6]]  
        t2 = [[7, 8, 9], [10, 11, 12]]  
        tf.concat([t1, t2], 0)  # [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]  
        tf.concat([t1, t2], 1)  # [[1, 2, 3, 7, 8, 9], [4, 5, 6, 10, 11, 12]]
        '''
        # sides = tf.concat(values=[self.side1, self.side2, self.side3, self.side4, self.side5], axis=3)
        # tf.layers.conv2d参数含义：filters输出通道数，kernel_size卷积核大小，strides卷积步长
        # 通过1×1卷积核实现通道数的缩放，sides通过concat后(?,224,224,5)通过卷积核变为(?,224,224,1)
        self.fused_side = tf.layers.conv2d(inputs=sides, filters=1, kernel_size=(1, 1), strides=(1, 1),
                                           use_bias=False, kernel_initializer=tf.constant_initializer(0.2), name='fused_side')
        return self.side1, self.side2, self.side3, self.side4, self.side5, self.fused_side

    def block(self, input_tensor, filters, iteration, dilation_rate=None, name=None):
        '''
        相当于将HED网络分层分块，VGG16模型为2层(卷积+reLU),池化，2层(卷积+reLU)，池化，3层(卷积+reLU)，池化，3层(卷积+reLU)，池化，3层(卷积+reLU)，池化，3层全连接+reLU,softmax输出
        这里一个block对应iteration层的卷积+reLU
        :param input_tensor:输入的tensor
        :param filters:输出通道个数，等价于output_channels
        :param iteration:迭代次数，比如第一个block对应2层的(卷积+reLU)，这里iteration为2
        :param dilation_rate:扩张卷积
        :param name:命名空间，不重要，只在tensorboard可视化时看起来会简美些
        :return:经过(卷积+reLU)处理的tensor
        '''
        # dilation_rate表示扩张卷积，针对的是卷积核的大小，
        # 扩张卷积优点：扩展卷积在保持参数个数不变的情况下增大了卷积核的感受野，同时它可以保证输出的特征映射（feature map）的大小保持不变。
        # dilation_rate默认为（1，1）
        # 扩张卷积应用于图像语义分割问题中下采样会降低图像分辨率、丢失信息的一种卷积思路，所以实际上整个代码只是在VGG16第一层卷积中加入了扩张卷积
        if dilation_rate is None:
            dilation_rate = [(1, 1)]
        if len(dilation_rate) == 1:
            dilation_rate *= iteration

        regularizer = tf.contrib.layers.l2_regularizer(self.cfg['weight_decay_ratio'])
        with tf.variable_scope(name):
            relu = input_tensor
            for it in range(iteration):
                tp_dilation_rate = dilation_rate.pop(0)
                print("hed_net:",tp_dilation_rate)
                conv = tf.layers.conv2d(inputs=relu, filters=filters,
                                        kernel_size=(3, 3), strides=(1, 1), padding='same',
                                        activation=None, use_bias=True,
                                        kernel_regularizer=regularizer,
                                        dilation_rate=tp_dilation_rate,
                                        # kernel_initializer=tf.truncated_normal_initializer(stddev=0.5),
                                        name='conv{:d}'.format(it))
                # bn = tf.layers.batch_normalization(inputs=conv, axis=-1, name='bn{:d}'.format(it))
                bn = conv
                relu = tf.nn.relu(bn, name='relu{:d}'.format(it))
        return relu, relu

    def side(self, input_tensor, stride, name, deconv=True):
        '''
        对图片进行反卷积
        :param input_tensor:输入的tensor
        :param stride:卷积步长，反卷积为扩大倍数
        :param name:命名空间名字
        :param deconv:是否反卷积
        :return:反卷积后的张量
        '''
        with tf.variable_scope(name):
            side = tf.layers.conv2d(inputs=input_tensor, filters=1, kernel_size=(1, 1), strides=(1, 1),
                                    padding='same',
                                    activation=None,
                                    bias_initializer=tf.constant_initializer(value=0),
                                    kernel_initializer=tf.constant_initializer(value=0),
                                    kernel_regularizer=tf.contrib.layers.l2_regularizer(0.0002))
            if deconv:
                # conv2d_transpose名字虽然叫转置，但实际上就是代表反卷积
                # stride步长，即扩大倍数
                side = tf.layers.conv2d_transpose(inputs=side, filters=1, kernel_size=(2*stride[0], 2*stride[1]),
                                                  strides=stride, padding='same',
                                                  kernel_initializer=tf.truncated_normal_initializer(stddev=0.1),
                                                  bias_initializer=tf.truncated_normal_initializer(stddev=0.1),
                                                  kernel_regularizer=tf.contrib.layers.l2_regularizer(self.cfg['weight_decay_ratio']),
                                                  activation=None)
            # 以上已经处理反卷积完成，这里在对反卷积完成后的张量做一次双线性插值图片处理
            side = tf.image.resize_images(images=side, size=(self.height, self.width),
                                          method=tf.image.ResizeMethod.BILINEAR)
        return side

    def evaluate(self):
        '''
        评价，暂无，之后可用F1等评价
        :return:
        '''
        # evaluation criteria
        # accuracy

        # precision

        # recall

        # F1 score
        pass

    def summary(self):
        '''
        记录
        :return:
        '''
        max_outputs = 1
        tf.summary.image(name='orig_image_sm', tensor=self.x, max_outputs=max_outputs)
        tf.summary.image(name='side1_im', tensor=tf.sigmoid(self.side1), max_outputs=max_outputs, )
        tf.summary.image(name='side2_im', tensor=tf.sigmoid(self.side2), max_outputs=max_outputs, )
        tf.summary.image(name='side3_im', tensor=tf.sigmoid(self.side3), max_outputs=max_outputs, )
        tf.summary.image(name='side4_im', tensor=tf.sigmoid(self.side4), max_outputs=max_outputs, )
        tf.summary.image(name='side5_im', tensor=tf.sigmoid(self.side5), max_outputs=max_outputs, )
        tf.summary.image(name='fused_side_im', tensor=tf.sigmoid(self.fused_side), max_outputs=max_outputs, )

        tf.summary.histogram(name='side1_hist', values=tf.sigmoid(self.side1))
        tf.summary.histogram(name='side2_hist', values=tf.sigmoid(self.side2))
        tf.summary.histogram(name='side3_hist', values=tf.sigmoid(self.side3))
        tf.summary.histogram(name='side4_hist', values=tf.sigmoid(self.side4))
        tf.summary.histogram(name='side5_hist', values=tf.sigmoid(self.side5))
        tf.summary.histogram(name='fused_side_hist', values=tf.sigmoid(self.fused_side))

    def assign_init_weights(self, sess=None):
        '''
        初始化权重,读取VGG16权重文件进行权重初始化
        :param sess: session
        :return:
        '''
        with open(self.cfg['init_weights'], 'rb') as file:
            weights = np.load(file, encoding='latin1').item()
        with tf.variable_scope('block1', reuse=True):
            k = tf.get_variable(name='conv0/kernel')
            sess.run(tf.assign(k, weights['conv1_1'][0]))
            k = tf.get_variable(name='conv0/bias')
            sess.run(tf.assign(k, weights['conv1_1'][1]))

            k = tf.get_variable(name='conv1/kernel')
            sess.run(tf.assign(k, weights['conv1_2'][0]))
            k = tf.get_variable(name='conv1/bias')
            sess.run(tf.assign(k, weights['conv1_2'][1]))
        print('assign first block done !')
        with tf.variable_scope('block2', reuse=True):
            k = tf.get_variable(name='conv0/kernel')
            sess.run(tf.assign(k, weights['conv2_1'][0]))
            k = tf.get_variable(name='conv0/bias')
            sess.run(tf.assign(k, weights['conv2_1'][1]))

            k = tf.get_variable(name='conv1/kernel')
            sess.run(tf.assign(k, weights['conv2_2'][0]))
            k = tf.get_variable(name='conv1/bias')
            sess.run(tf.assign(k, weights['conv2_2'][1]))
        print('assign second block done !')
        with tf.variable_scope('block3', reuse=True):
            k = tf.get_variable(name='conv0/kernel')
            sess.run(tf.assign(k, weights['conv3_1'][0]))
            k = tf.get_variable(name='conv0/bias')
            sess.run(tf.assign(k, weights['conv3_1'][1]))

            k = tf.get_variable(name='conv1/kernel')
            sess.run(tf.assign(k, weights['conv3_2'][0]))
            k = tf.get_variable(name='conv1/bias')
            sess.run(tf.assign(k, weights['conv3_2'][1]))

            k = tf.get_variable(name='conv2/kernel')
            sess.run(tf.assign(k, weights['conv3_3'][0]))
            k = tf.get_variable(name='conv2/bias')
            sess.run(tf.assign(k, weights['conv3_3'][1]))
        print('assign third block done !')
        with tf.variable_scope('block4', reuse=True):
            k = tf.get_variable(name='conv0/kernel')
            sess.run(tf.assign(k, weights['conv4_1'][0]))
            k = tf.get_variable(name='conv0/bias')
            sess.run(tf.assign(k, weights['conv4_1'][1]))

            k = tf.get_variable(name='conv1/kernel')
            sess.run(tf.assign(k, weights['conv4_2'][0]))
            k = tf.get_variable(name='conv1/bias')
            sess.run(tf.assign(k, weights['conv4_2'][1]))

            k = tf.get_variable(name='conv2/kernel')
            sess.run(tf.assign(k, weights['conv4_3'][0]))
            k = tf.get_variable(name='conv2/bias')
            sess.run(tf.assign(k, weights['conv4_3'][1]))
        print('assign fourth block done !')
        with tf.variable_scope('block5', reuse=True):
            k = tf.get_variable(name='conv0/kernel')
            sess.run(tf.assign(k, weights['conv5_1'][0]))
            k = tf.get_variable(name='conv0/bias')
            sess.run(tf.assign(k, weights['conv5_1'][1]))

            k = tf.get_variable(name='conv1/kernel')
            sess.run(tf.assign(k, weights['conv5_2'][0]))
            k = tf.get_variable(name='conv1/bias')
            sess.run(tf.assign(k, weights['conv5_2'][1]))

            k = tf.get_variable(name='conv2/kernel')
            sess.run(tf.assign(k, weights['conv5_3'][0]))
            k = tf.get_variable(name='conv2/bias')
            sess.run(tf.assign(k, weights['conv5_3'][1]))
        weights = None  # gc
        print('assign fifth block done !')
        print('net initializing successfully with vgg16 weights trained by imagenet data')

In [None]:
# -*- coding: UTF-8 -*-
from __future__ import print_function
import tensorflow as tf
import yaml

'''
损失函数类
1.对应两种损失函数计算方法，calc_loss和focal_loss
2.主要研究了calc_loss,该损失函数涉及两个超参数，is_deep_supervised是否深层监督即是否考虑VGG16中间层的输出图，use_weight_regularizer是否正则化
'''

# 损失函数
class HedLoss(object):
    def __init__(self, sides):
        self.sides = sides  # sides对应hed网络输出的6个图
        self.loss = 0.0     # loss对应calc_loss计算的损失函数
        self.floss = 0.0    # floss对应focal_loss计算的损失函数
        with open('cfg.yml') as file:
            self.cfg = yaml.load(file)
        self.label = tf.placeholder(tf.float32, (None, self.cfg['height'], self.cfg['width'], 1))   # 定义标签图占位符
        # self.calc_loss()

    def calc_loss(self):
        '''
        损失函数
        :return: 损失函数计算值
        '''
        # is_deep_supervised是否深层监督，若深层监督则考虑HED网络过程中每一张图的权重并取均值
        # 如果不考虑深层监督，则直接考虑融合后图的损失
        # 个人初步想法：考虑深层监督，则每一次迭代实现分层次的计算损失修改每一层次的权重，再计算融合图，修改权重，实现权重更新粒度更加精确，但是最终效果其实只与融合图有关，所以最后结果尚待测试
        if self.cfg['is_deep_supervised']:
            for n in range(len(self.sides)-1):
                tp_loss = self.cfg['sides_weights'][n] * tf.nn.weighted_cross_entropy_with_logits(targets=self.label, logits=self.sides[n], pos_weight=self.cfg['pos_weights'])
                self.loss += tf.reduce_mean(tp_loss)
        self.loss += tf.reduce_mean(tf.nn.weighted_cross_entropy_with_logits(targets=self.label, logits=self.sides[-1], pos_weight=self.cfg['pos_weights']))

        # tf.get_collection零存整取获取数据，tf.GraphKeys.REGULARIZATION_LOSSES形同名字，正则化处理
        reg_loss = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
        if self.cfg['use_weight_regularizer']:
            self.loss = tf.add_n(reg_loss) + self.loss

        return self.loss


    def focal_loss(self):
        '''
        另一种损失函数
        :return: 损失函数计算值
        '''
        if self.cfg['is_deep_supervised']:
            for n in range(len(self.sides) - 1):
                sg_p = tf.nn.sigmoid(self.sides[n])
                sg_n = 1.0 - sg_p
                sg_p += 1e-5
                sg_n += 1e-5
                pos_num = tf.reduce_sum(tf.cast(self.label > 0.99, tf.float32))
                neg_num = tf.reduce_sum(tf.cast(self.label < 0.01, tf.float32))

                pos = -self.label*sg_n*sg_n*tf.log(sg_p)
                pos = tf.reduce_sum(pos) / (pos_num+1e-5)

                neg = -(1.0-self.label)*sg_p*sg_p*tf.log(sg_n)
                neg = tf.reduce_sum(neg) / (neg_num+1e-5)
                self.floss = self.floss + 0.25*pos + neg*0.75

        sg_p = tf.nn.sigmoid(self.sides[-1])
        sg_n = 1.0 - sg_p
        sg_p += 1e-5
        sg_n += 1e-5
        pos_num = tf.reduce_sum(tf.cast(self.label > 0.99, tf.float32))
        neg_num = tf.reduce_sum(tf.cast(self.label < 0.01, tf.float32))

        pos = -self.label * sg_n * sg_n * tf.log(sg_p)
        pos = tf.reduce_sum(pos) / (pos_num+1e-5)

        neg = -(1.0 - self.label) * sg_p * sg_p * tf.log(sg_n)
        neg = tf.reduce_sum(neg) / (neg_num+1e-5)
        self.floss = self.floss + 0.25*pos + neg*0.75
        if self.cfg['use_weight_regularizer']:
            reg_loss = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
            self.floss += tf.add_n(reg_loss)
        return self.floss


    def summary(self):
        '''
        统计
        :return:
        '''
        tf.summary.scalar(name='loss_sm', tensor=self.loss)
        tf.summary.scalar(name='floss_sm', tensor=self.floss)
        max_outputs = 1
        tf.summary.image(name='label_sm', tensor=self.label, max_outputs=max_outputs, )

In [None]:
# -*- coding: UTF-8 -*-
import numpy as np
import tensorflow as tf
import yaml
from hed_net import HED
from loss import HedLoss
import os
import cv2
import argparse
import random
from time import time
import matplotlib.pyplot as plt

'''
训练HED网络主函数
1.超参数通过cfg.yml配置更改
2.采用batch的模式训练，每一轮迭代遍历所有训练集(处理时会打乱顺序）,本机对于224×224图片来说，batch_size=2能够不卡顿的运行，可以适当增加batch_size提高迭代速度
3.暂未写损失函数可视化！
'''

# dataSet存放数据文件，包括图片信息和标签信息
class DataSet(object):
    def __init__(self):
        # 读取配置文件
        with open('cfg.yml') as file:
            self.cfg = yaml.load(file)
        self.imgs = None    # 图片信息
        self.labels = None  # 标签信息
        self.samples_num = 0    # 样本数量
        self.read_data()    # 调用函数加载图片和标签

    def read_data(self):
        '''
        读取训练图片文件和标签图片文件
        :return:
        '''
        img_names = []
        label_names = []
        # 配置文件cfg.yml中file_name对应train.txt，记录一一对应的训练图片与标签图片，存放在img_names,label_names,为之后读取做准备
        with open(self.cfg['file_name']) as file:
            while True:
                il = file.readline(1500)    # 如果样本数据大于1500，修改该值
                if not il:
                    break
                a = il.split(sep=' ')
                img_names.append(a[0])
                label_names.append(a[1][0:-1])  # remove '\n'
        self.samples_num = len(img_names)
        print('total image num: ', self.samples_num)
        # 初始化self.imgs和self.labels，开辟对应大小空间和类型，与配置文件设置有关
        self.imgs = np.zeros((len(img_names), self.cfg['height'], self.cfg['width'], self.cfg['channel']), np.float32)
        self.labels = np.zeros((len(img_names), self.cfg['height'], self.cfg['width'], 1), np.float32)
        # cv2.imread读取后格式为unit8，所以遍历所有图片及标签进行读取图片并设置格式
        # 注：这里对标签进行了归一化处理，但并未对训练图片进行归一化处理，后续可考虑训练时对训练图片进行归一化看训练效果，同时要注意修改测试时同样处理方式
        for it in range(len(self.labels)):
            tp_img = cv2.imread(os.path.join(self.cfg['image_path'], img_names[it]))
            tp_label = cv2.imread(os.path.join(self.cfg['image_path'], label_names[it]), cv2.IMREAD_GRAYSCALE)  # cv2.IMREAD_GRAYSCALE加载一张灰度图
            self.imgs[it, :, :, :] = tp_img.astype(np.float32)
            self.labels[it, :, :, 0] = (tp_label/255).astype(np.float32)
        # 图像减去均值是为了让损失函数平滑收敛，但是这里的均值是直接读取配置文件
        # 若之后搭建训练集后可考虑计算下均值做相应改变
        self.imgs -= self.cfg['mean']
        print('images and labels reading done!')

    def batch_iterator(self, shuffle=False):
        '''
        根据batch进行迭代，利用shuffle进行打乱顺序，批次大小配置文件配置
        :param shuffle: 是否打乱
        :return: 单个训练图片和标签文件用于迭代
        '''
        batch_size = self.cfg['batch_size']
        num_examples = len(self.imgs)
        idx = list(range(num_examples))
        if shuffle:
            random.shuffle(idx)
        for i in range(0, num_examples, batch_size):
            imgs = self.imgs[idx[i:min(i+batch_size, num_examples)], :, :, :]
            labels = self.labels[idx[i:min(i+batch_size, num_examples)], :, :, :]
            # print('batch_size: ', labels.shape[0])
            # yield是一个生成器generator，简单的说就是每次执行到yield就返回，下次又迭代进入时又从yield处继续，实现迭代
            yield imgs, labels


def arg_parser():
    '''
    GPU配置parser
    :return:
    '''
    parser = argparse.ArgumentParser()
    parser.add_argument('-gpu', type=str, required=False, default='0')
    args = parser.parse_args()
    return args


def sess_config(args=None):
    '''
    session的config配置，若没有GPU将直接使用CPU，不会报错
    :param args:
    :return: 配置config
    '''
    log_device_placement = True  # 是否打印设备分配日志
    allow_soft_placement = True  # 如果你指定的设备不存在，允许TF自动分配设备
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.95, allow_growth=True)
    os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu  # 使用 GPU 0
    config1 = tf.ConfigProto(log_device_placement=log_device_placement,
                            allow_soft_placement=allow_soft_placement,
                            gpu_options=gpu_options)
    return config1


if __name__ == "__main__":
    # 读取配置文件
    with open('cfg.yml') as file:
        cfg = yaml.load(file)
    args = arg_parser()     # 配置
    config = sess_config(args)   # session的config配置

    # 训练数据
    dataset = DataSet()
    # HED网络定义
    hed_class = HED(height=cfg['height'], width=cfg['width'], channel=cfg['channel'])
    sides = hed_class.vgg_hed()
    # 损失函数定义
    loss_class = HedLoss(sides)
    loss = loss_class.calc_loss()
    # 优化器，采用动态学习率
    global_step = tf.Variable(0, trainable=False)
    learning_rate = tf.train.exponential_decay(learning_rate=1e-5,
                                               global_step=global_step,
                                               decay_steps=10000,
                                               decay_rate=0.1,
                                               staircase=True)
    train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss, global_step=global_step)
    # summary记录，用于tensorboard可视化
    tf.summary.scalar(name='lr', tensor=learning_rate)
    hed_class.summary()
    loss_class.summary()
    merged_summary_op = tf.summary.merge_all()

    startTime = time()
    # 训练
    with tf.Session(graph=tf.get_default_graph(), config=config) as sess:
        saver = tf.train.Saver()

        # session变量初始化
        sess.run(tf.global_variables_initializer())
        # 初始化HED网络权重（HED网络基于VGG16，直接使用VGG16权重初始化）
        hed_class.assign_init_weights(sess)

        # 断点续训
        ckpt_dir = cfg['model_weights_path']
        ckpt = tf.train.latest_checkpoint(ckpt_dir)
        if ckpt != None:
            saver.restore(sess, ckpt)
            # sess.run(tf.assign(global_step, 0))
            print('saver restore finish')
        else:
            print("training from scratch")



        # 日志记录
        summary_writer = tf.summary.FileWriter(cfg['log_dir'], graph=sess.graph, flush_secs=15)

        step = 0    # 记录summary的step
        # 通过配置文件max_epochs最大迭代次数进行训练
        for epoch in range(1, cfg['max_epochs']+1):
            for imgs, labels in dataset.batch_iterator():   # 通过迭代获取训练图片和标签信息
                '''
                # 输出中间过程图片
                print("*************")
                print("*************")
                sides_show = sess.run([sides],feed_dict={hed_class.x: imgs})
                picture_batch1 = sides_show[0][0] #对应第一层输出的图片batch

                plt.subplot(331)
                outImage = sides_show[0][0]
                plt.imshow(outImage[0, :, :, 0])
                plt.subplot(332)
                outImage = sides_show[0][1]
                plt.imshow(outImage[0, :, :, 0])
                plt.subplot(333)
                outImage = sides_show[0][2]
                plt.imshow(outImage[0, :, :, 0])
                plt.subplot(334)
                outImage = sides_show[0][3]
                plt.imshow(outImage[0, :, :, 0])
                plt.subplot(335)
                outImage = sides_show[0][4]
                plt.imshow(outImage[0, :, :, 0])
                plt.subplot(336)
                outImage = sides_show[0][5]
                plt.imshow(outImage[0, :, :, 0])
                plt.show()

                print("*************")
                # print(sides_show)
                # print("*************")
                # print("*************")
                '''
                # 核心训练语句，利用训练图片和标签图片信息代入优化器进行训练，记录summary
                merged_summary, _ = sess.run([merged_summary_op, train_op],feed_dict={hed_class.x: imgs, loss_class.label: labels})
                if not (step % 1):
                    summary_writer.add_summary(merged_summary, global_step=step)
                    print('save a merged summary !')
                step += 1
                print('global_step:', sess.run(global_step), 'epoch: ', epoch)

            # 配置文件设置多少代输出一次模型
            if not epoch % cfg['snapshot_epochs']:
                saver.save(sess=sess, save_path=os.path.join(cfg['model_weights_path'], 'vgg16_hed'), global_step=epoch)
                print('save a snapshoot !')
        summary_writer.close()
        saver.save(sess=sess, save_path=os.path.join(cfg['model_weights_path'], 'vgg16_hed'), global_step=epoch)
        print('save final model')

    duration = time() - startTime
    print("train takes:", "{:.2f}".format(duration))

usage: ipykernel_launcher.py [-h] [-gpu GPU]
ipykernel_launcher.py: error: unrecognized arguments: -f /root/.local/share/jupyter/runtime/kernel-08c5f861-0eaa-4e97-833d-5b4aadeaf19b.json


SystemExit: ignored

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
