In [1]:
#!/user/bin/env python
#--------------------
#@Time  : 2020/4/12 16:18
#@Author: ZHANG YINGJIE
#@File  : train.py
import os

import tensorflow as tf
import time
import Ipynb_importer
from configs import *
from network import ic_cnn
from utils import *


def train():
    # dataset
    dataset = 'A'
    # training dataset
    img_root_dir = r'F:/crowd_counting_datasets/ShanghaiTech_Crowd_Counting_Dataset/part_' + dataset + r'_final/train_data/images/'
    gt_root_dir = r'F:/crowd_counting_datasets/ShanghaiTech_Crowd_Counting_Dataset/part_' + dataset + r'_final/train_data/ground_truth/'
    # testing dataset
    val_img_root_dir = r'F:/crowd_counting_datasets/ShanghaiTech_Crowd_Counting_Dataset/part_' + dataset + r'_final/test_data/images/'
    val_gt_root_dir = r'F:/crowd_counting_datasets/ShanghaiTech_Crowd_Counting_Dataset/part_' + dataset + r'_final/test_data/ground_truth/'

    # training dataset file list
    img_file_list = os.listdir(img_root_dir)
    gt_img_file_list = os.listdir(gt_root_dir)

    # testing dataset file list
    val_img_file_list = os.listdir(val_img_root_dir)
    val_gt_file_list = os.listdir(val_gt_root_dir)

    # msg
    cfig = ConfigFactory()

    # place holder
    input_img_placeholder = tf.placeholder(tf.float32, shape=(None, None, None, 3))
    hr_density_map_placeholder = tf.placeholder(tf.float32, shape=(None, None, None, 1))
    lr_density_map_placeholder = tf.placeholder(tf.float32, shape=(None, None, None, 1))

    # network generation
    hr_density_map, lr_density_map = ic_cnn(input_img_placeholder)

    # density map loss
    # L2范数的平方
    hr_map_loss = 0.5 * tf.reduce_sum(tf.square(tf.subtract(hr_density_map_placeholder, hr_density_map)))
    lr_map_loss = 0.5 * tf.reduce_sum(tf.square(tf.subtract(lr_density_map_placeholder, lr_density_map)))

    # jointly training   方程式(3)
    joint_loss = tf.reduce_sum(cfig.lossterm_l * lr_map_loss + cfig.lossterm_h * hr_map_loss)

    # optimizer
    optimizer = tf.train.GradientDescentOptimizer(cfig.lr).minimize(joint_loss)

    init = tf.global_variables_initializer()

    file_path = cfig.log_router

    # training log route
    if not os.path.exists(file_path):
        os.makedirs(file_path)

    # model saver route
    if not os.path.exists(cfig.ckpt_router):
        os.makedirs(cfig.ckpt_router)
    log = open(cfig.log_router + cfig.name + r'_training.logs', mode='a+', encoding='utf-8')

    saver = tf.train.Saver(max_to_keep=cfig.max_ckpt_keep)
    ckpt = tf.train.get_checkpoint_state(cfig.ckpt_router)

    # start session
    sess = tf.Session()
    if ckpt and ckpt.model_checkpoint_path:
        print('load model')
        print(ckpt.model_checkpoint_path)
        saver.restore(sess, ckpt.model_checkpoint_path)
    sess.run(init)

    # start training
    for i in range(cfig.total_iters):
        # training
        for file_index in range(len(img_file_list)):
            img_path = img_root_dir + img_file_list[file_index]
            gt_path = gt_root_dir + 'GT_' + img_file_list[file_index].split(r'.')[0]
            img, gt_dmp, gt_count, lr_gt_dmp = readTrainData(img_path, gt_path, scale=4)

            feed_dict = {input_img_placeholder: (img - 127.5) / 128,
                         lr_density_map_placeholder: lr_gt_dmp,
                         hr_density_map_placeholder: gt_dmp}

            _, lr_dmp, hr_dmp, loss = sess.run([optimizer, lr_density_map, hr_density_map, joint_loss], feed_dict=feed_dict)

            format_time = str(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
            format_str = 'step %d, joint loss=%.5f, LR=%.5f, HR=%.5f, gt=%d \n'%(i*len(img_file_list)+file_index, loss, lr_dmp.sum(), hr_dmp.sum(), gt_count)
            log_line = format_time, img_file_list[file_index], format_str
            log.writelines(str(log_line))
            print(log_line)

        saver.save(sess, cfig.ckpt_router + 'v1', global_step=i) # 

        if i % 50 == 0:
            val_log = open(cfig.log_router + cfig.name + r'_validating_' + str(i) + '_.logs', mode='w', encoding='utf-8')
            absolute_error = 0.0
            square_error = 0.0
            #validating
            for file_index in range(len(val_img_file_list)):
                img_path = val_img_root_dir +val_img_file_list[file_index]
                gt_path = val_gt_root_dir + 'GT_' + val_img_file_list[file_index].split(r'.')[0]
                img, gt_dmp, gt_count, lr_gt_dmp = readTestData(img_path, gt_path, scale=4)

                feed_dict = {input_img_placeholder: (img - 127.5) / 128,
                             lr_density_map_placeholder: lr_gt_dmp,
                             hr_density_map_placeholder: gt_dmp}

                _, lr_dmp, hr_dmp, loss = sess.run([optimizer, lr_density_map, hr_density_map, joint_loss],
                                                   feed_dict=feed_dict)

                format_time = str(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))
                format_str = 'step %d, joint loss=%.5f, LR=%.5f, HR=%.5f, gt=%d \n' % (
                    file_index, loss, lr_dmp.sum(), hr_dmp.sum(), gt_count)
                log_line = format_time, val_img_file_list[file_index], format_str
                log.writelines(str(log_line))
                print(log_line)

            mae = tf.keras.losses.mean_absolute_error(gt_count, hr_dmp.sum())
            rmse = np.sqrt(tf.keras.losses.mean_squared_error(gt_count, hr_dmp.sum()))
            val_log.writelines(str('MAE_' + str(mae) + '_RMSE_' + str(rmse)) + '\n')
            val_log.close()
            print(str('MAE_' + str(mae) + '_RMSE_' + str(rmse)))



if __name__ == '__main__':
    train()




  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


importing Jupyter notebook from configs.ipynb
importing Jupyter notebook from network.ipynb
importing Jupyter notebook from utils.ipynb
('2020-04-30 19:07:38', 'IMG_1.jpg', 'step 0, joint loss=0.00146, LR=28.36599, HR=0.00000, gt=0 \n')
('2020-04-30 19:07:39', 'IMG_10.jpg', 'step 1, joint loss=14.69742, LR=50.39455, HR=0.00000, gt=13 \n')
('2020-04-30 19:07:40', 'IMG_100.jpg', 'step 2, joint loss=113.65215, LR=38.04742, HR=0.00000, gt=56 \n')
('2020-04-30 19:07:40', 'IMG_101.jpg', 'step 3, joint loss=13.44623, LR=39.88744, HR=0.00000, gt=15 \n')
('2020-04-30 19:07:41', 'IMG_102.jpg', 'step 4, joint loss=46.22974, LR=38.90398, HR=0.00000, gt=31 \n')
('2020-04-30 19:07:42', 'IMG_103.jpg', 'step 5, joint loss=17.66989, LR=47.33603, HR=0.00000, gt=17 \n')
('2020-04-30 19:07:42', 'IMG_104.jpg', 'step 6, joint loss=486.16470, LR=37.62152, HR=0.00000, gt=125 \n')
('2020-04-30 19:07:43', 'IMG_105.jpg', 'step 7, joint loss=15.28487, LR=29.53030, HR=0.00000, gt=15 \n')
('2020-04-30 19:07:44', 'I

('2020-04-30 19:08:40', 'IMG_168.jpg', 'step 76, joint loss=41.39727, LR=84.87802, HR=0.00000, gt=31 \n')
('2020-04-30 19:08:41', 'IMG_169.jpg', 'step 77, joint loss=70.79318, LR=78.73201, HR=0.00000, gt=50 \n')
('2020-04-30 19:08:42', 'IMG_17.jpg', 'step 78, joint loss=377.43555, LR=81.54372, HR=0.00000, gt=121 \n')
('2020-04-30 19:08:43', 'IMG_170.jpg', 'step 79, joint loss=2522.49976, LR=60.92342, HR=0.00000, gt=246 \n')
('2020-04-30 19:08:45', 'IMG_171.jpg', 'step 80, joint loss=3599.51001, LR=73.47013, HR=0.00000, gt=420 \n')
('2020-04-30 19:08:46', 'IMG_172.jpg', 'step 81, joint loss=1.55936, LR=70.97708, HR=0.00000, gt=4 \n')
('2020-04-30 19:08:46', 'IMG_173.jpg', 'step 82, joint loss=69.04508, LR=86.08633, HR=0.00000, gt=49 \n')
('2020-04-30 19:08:47', 'IMG_174.jpg', 'step 83, joint loss=36.01017, LR=74.53566, HR=0.00000, gt=27 \n')
('2020-04-30 19:08:47', 'IMG_175.jpg', 'step 84, joint loss=101.27005, LR=75.68700, HR=0.00000, gt=42 \n')
('2020-04-30 19:08:48', 'IMG_176.jpg', '

('2020-04-30 19:09:42', 'IMG_237.jpg', 'step 153, joint loss=1341.57996, LR=67.28516, HR=0.00000, gt=144 \n')
('2020-04-30 19:09:42', 'IMG_238.jpg', 'step 154, joint loss=131.10591, LR=83.72289, HR=0.00000, gt=71 \n')
('2020-04-30 19:09:43', 'IMG_239.jpg', 'step 155, joint loss=31.91909, LR=74.14700, HR=0.00000, gt=24 \n')
('2020-04-30 19:09:44', 'IMG_24.jpg', 'step 156, joint loss=0.00491, LR=60.18028, HR=0.00000, gt=0 \n')
('2020-04-30 19:09:44', 'IMG_240.jpg', 'step 157, joint loss=739.85864, LR=18.09965, HR=0.00000, gt=87 \n')
('2020-04-30 19:09:45', 'IMG_241.jpg', 'step 158, joint loss=1.03815, LR=66.60059, HR=0.00000, gt=4 \n')
('2020-04-30 19:09:46', 'IMG_242.jpg', 'step 159, joint loss=386.03714, LR=78.17467, HR=0.00000, gt=108 \n')
('2020-04-30 19:09:47', 'IMG_243.jpg', 'step 160, joint loss=15.91476, LR=62.07509, HR=0.00000, gt=16 \n')
('2020-04-30 19:09:47', 'IMG_244.jpg', 'step 161, joint loss=144.68587, LR=80.57017, HR=0.00000, gt=63 \n')
('2020-04-30 19:09:48', 'IMG_245.j

('2020-04-30 19:10:48', 'IMG_36.jpg', 'step 230, joint loss=147.15076, LR=107.69791, HR=0.00000, gt=53 \n')
('2020-04-30 19:10:49', 'IMG_37.jpg', 'step 231, joint loss=423.40323, LR=91.17072, HR=0.00000, gt=101 \n')
('2020-04-30 19:10:50', 'IMG_38.jpg', 'step 232, joint loss=24.79971, LR=98.69409, HR=0.00000, gt=23 \n')
('2020-04-30 19:10:51', 'IMG_39.jpg', 'step 233, joint loss=2.01827, LR=95.83160, HR=0.00000, gt=1 \n')
('2020-04-30 19:10:51', 'IMG_4.jpg', 'step 234, joint loss=196.20724, LR=91.09687, HR=0.00000, gt=63 \n')
('2020-04-30 19:10:52', 'IMG_40.jpg', 'step 235, joint loss=70.44801, LR=90.88449, HR=0.00000, gt=39 \n')
('2020-04-30 19:10:53', 'IMG_41.jpg', 'step 236, joint loss=326.47723, LR=106.96806, HR=0.00000, gt=84 \n')
('2020-04-30 19:10:54', 'IMG_42.jpg', 'step 237, joint loss=16.31685, LR=101.65129, HR=0.00000, gt=15 \n')
('2020-04-30 19:10:54', 'IMG_43.jpg', 'step 238, joint loss=40.59017, LR=84.86191, HR=0.00000, gt=27 \n')
('2020-04-30 19:10:55', 'IMG_44.jpg', 'st

ResourceExhaustedError: OOM when allocating tensor with shape[1,32,704,1024] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[Node: gradients/iccnn/hr_conv9/Conv2D_grad/Conv2DBackpropInput = Conv2DBackpropInput[T=DT_FLOAT, data_format="NCHW", dilations=[1, 1, 1, 1], padding="SAME", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](gradients/iccnn/hr_conv9/Conv2D_grad/ShapeN, iccnn/hr_conv9/weights/read, gradients/iccnn/hr_conv9/Relu_grad/ReluGrad, ^gradients/iccnn/hr_conv9/BiasAdd_grad/BiasAddGrad)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.


Caused by op 'gradients/iccnn/hr_conv9/Conv2D_grad/Conv2DBackpropInput', defined at:
  File "d:\program files\python36\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "d:\program files\python36\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "d:\program files\python36\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "d:\program files\python36\lib\site-packages\traitlets\config\application.py", line 664, in launch_instance
    app.start()
  File "d:\program files\python36\lib\site-packages\ipykernel\kernelapp.py", line 583, in start
    self.io_loop.start()
  File "d:\program files\python36\lib\site-packages\tornado\platform\asyncio.py", line 149, in start
    self.asyncio_loop.run_forever()
  File "d:\program files\python36\lib\asyncio\base_events.py", line 422, in run_forever
    self._run_once()
  File "d:\program files\python36\lib\asyncio\base_events.py", line 1434, in _run_once
    handle._run()
  File "d:\program files\python36\lib\asyncio\events.py", line 145, in _run
    self._callback(*self._args)
  File "d:\program files\python36\lib\site-packages\tornado\ioloop.py", line 690, in <lambda>
    lambda f: self._run_callback(functools.partial(callback, future))
  File "d:\program files\python36\lib\site-packages\tornado\ioloop.py", line 743, in _run_callback
    ret = callback()
  File "d:\program files\python36\lib\site-packages\tornado\gen.py", line 787, in inner
    self.run()
  File "d:\program files\python36\lib\site-packages\tornado\gen.py", line 748, in run
    yielded = self.gen.send(value)
  File "d:\program files\python36\lib\site-packages\ipykernel\kernelbase.py", line 365, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "d:\program files\python36\lib\site-packages\tornado\gen.py", line 209, in wrapper
    yielded = next(result)
  File "d:\program files\python36\lib\site-packages\ipykernel\kernelbase.py", line 268, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "d:\program files\python36\lib\site-packages\tornado\gen.py", line 209, in wrapper
    yielded = next(result)
  File "d:\program files\python36\lib\site-packages\ipykernel\kernelbase.py", line 545, in execute_request
    user_expressions, allow_stdin,
  File "d:\program files\python36\lib\site-packages\tornado\gen.py", line 209, in wrapper
    yielded = next(result)
  File "d:\program files\python36\lib\site-packages\ipykernel\ipkernel.py", line 300, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "d:\program files\python36\lib\site-packages\ipykernel\zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "d:\program files\python36\lib\site-packages\IPython\core\interactiveshell.py", line 2858, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "d:\program files\python36\lib\site-packages\IPython\core\interactiveshell.py", line 2886, in _run_cell
    return runner(coro)
  File "d:\program files\python36\lib\site-packages\IPython\core\async_helpers.py", line 68, in _pseudo_sync_runner
    coro.send(None)
  File "d:\program files\python36\lib\site-packages\IPython\core\interactiveshell.py", line 3063, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "d:\program files\python36\lib\site-packages\IPython\core\interactiveshell.py", line 3254, in run_ast_nodes
    if (await self.run_code(code, result,  async_=asy)):
  File "d:\program files\python36\lib\site-packages\IPython\core\interactiveshell.py", line 3331, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-1-f71aceea817d>", line 135, in <module>
    train()
  File "<ipython-input-1-f71aceea817d>", line 54, in train
    optimizer = tf.train.GradientDescentOptimizer(cfig.lr).minimize(joint_loss)
  File "C:\Users\asus\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\training\optimizer.py", line 399, in minimize
    grad_loss=grad_loss)
  File "C:\Users\asus\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\training\optimizer.py", line 511, in compute_gradients
    colocate_gradients_with_ops=colocate_gradients_with_ops)
  File "C:\Users\asus\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\ops\gradients_impl.py", line 532, in gradients
    gate_gradients, aggregation_method, stop_gradients)
  File "C:\Users\asus\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\ops\gradients_impl.py", line 701, in _GradientsHelper
    lambda: grad_fn(op, *out_grads))
  File "C:\Users\asus\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\ops\gradients_impl.py", line 396, in _MaybeCompile
    return grad_fn()  # Exit early
  File "C:\Users\asus\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\ops\gradients_impl.py", line 701, in <lambda>
    lambda: grad_fn(op, *out_grads))
  File "C:\Users\asus\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\ops\nn_grad.py", line 520, in _Conv2DGrad
    data_format=data_format),
  File "C:\Users\asus\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\ops\gen_nn_ops.py", line 1340, in conv2d_backprop_input
    dilations=dilations, name=name)
  File "C:\Users\asus\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "C:\Users\asus\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\framework\ops.py", line 3414, in create_op
    op_def=op_def)
  File "C:\Users\asus\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\framework\ops.py", line 1740, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

...which was originally created as op 'iccnn/hr_conv9/Conv2D', defined at:
  File "d:\program files\python36\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
[elided 26 identical lines from previous traceback]
  File "<ipython-input-1-f71aceea817d>", line 135, in <module>
    train()
  File "<ipython-input-1-f71aceea817d>", line 43, in train
    hr_density_map, lr_density_map = ic_cnn(input_img_placeholder)
  File "<string>", line 64, in ic_cnn
  File "C:\Users\asus\AppData\Roaming\Python\Python36\site-packages\tensorflow\contrib\framework\python\ops\arg_scope.py", line 183, in func_with_args
    return func(*args, **current_args)
  File "C:\Users\asus\AppData\Roaming\Python\Python36\site-packages\tensorflow\contrib\layers\python\layers\layers.py", line 1154, in convolution2d
    conv_dims=2)
  File "C:\Users\asus\AppData\Roaming\Python\Python36\site-packages\tensorflow\contrib\framework\python\ops\arg_scope.py", line 183, in func_with_args
    return func(*args, **current_args)
  File "C:\Users\asus\AppData\Roaming\Python\Python36\site-packages\tensorflow\contrib\layers\python\layers\layers.py", line 1057, in convolution
    outputs = layer.apply(inputs)
  File "C:\Users\asus\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\keras\engine\base_layer.py", line 774, in apply
    return self.__call__(inputs, *args, **kwargs)
  File "C:\Users\asus\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\layers\base.py", line 329, in __call__
    outputs = super(Layer, self).__call__(inputs, *args, **kwargs)
  File "C:\Users\asus\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\keras\engine\base_layer.py", line 703, in __call__
    outputs = self.call(inputs, *args, **kwargs)
  File "C:\Users\asus\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\keras\layers\convolutional.py", line 184, in call
    outputs = self._convolution_op(inputs, self.kernel)
  File "C:\Users\asus\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\ops\nn_ops.py", line 868, in __call__
    return self.conv_op(inp, filter)
  File "C:\Users\asus\AppData\Roaming\Python\Python36\site-packages\tensorflow\python\ops\nn_ops.py", line 520, in __call__
    return self.call(inp, filter)

ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[1,32,704,1024] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[Node: gradients/iccnn/hr_conv9/Conv2D_grad/Conv2DBackpropInput = Conv2DBackpropInput[T=DT_FLOAT, data_format="NCHW", dilations=[1, 1, 1, 1], padding="SAME", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](gradients/iccnn/hr_conv9/Conv2D_grad/ShapeN, iccnn/hr_conv9/weights/read, gradients/iccnn/hr_conv9/Relu_grad/ReluGrad, ^gradients/iccnn/hr_conv9/BiasAdd_grad/BiasAddGrad)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

