In [None]:
import tensorflow as tf
import numpy as np
import itertools
import cv2
import time
import sys
from data_parsing import voc_utils
from data_parsing import voc_train

class YOLO_TF:
    
    # Control variables
    file_input = 'person.jpg'        #getting image from file
    file_output_img = 'test/output.jpg' #target file
    file_output_txt = 'test/output.txt'
    weights_file = 'YOLO_small.ckpt'  #getting pretrained weights from file
    fwrt_img = False     #writing image
    written = None   
    image = True         
    imshow = True
    display = True       
    

    # algorihtm variable
    alpha = 0.1
    threshold_value = 0.2
    int_over_un_threshold_value = 0.5
    num_class = 20                 #giving the number of classes
    num_box = 2                    #specifying the number of boxes
    grid_size = 7                  #specifying grid size
    #using following classes for classification
    classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable",
               "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]

    w_img = 640                    #image width
    h_img = 480                    #image heigth

    # training variaible
    training = False
    keep_prob = tf.placeholder(tf.float32)
    lamb_cord = 5.0
    lamb_nobj = 0.5
    label = None
    label = None
    ind_in_epoch = 0
    epochs_done = 0

    def __init__(self):
        self.network_building()
        if self.training:          #if self.training == True, training will be done
            self.build_training()
            self.train()
        print("detection")
        print(self.file_input)
        if self.file_input is not None:
            if self.image:
                print("image")
                self.input_file(self.file_input)

    def define_layer_convulation(self, idx, inputs, filters, size, stride, trainable=False):   #defining convulation layers
        channels = inputs.get_shape()[3]      #getting number of channels
        weight = tf.Variable(tf.truncated_normal([size, size, int(channels), filters], stddev=0.1), trainable=trainable)  #getting weigths
        biases = tf.Variable(tf.constant(0.1, shape=[filters]), trainable=trainable)                                      #getting biases

        pad_size = size // 2           #padding size
        pad_mat = np.array([[0, 0], [pad_size, pad_size], [pad_size, pad_size], [0, 0]])
        inputs_pad = tf.pad(inputs, pad_mat)

        conv = tf.nn.conv2d(inputs_pad, weight, strides=[1, stride, stride, 1], padding='VALID',
                            name=str(idx) + '_conv')
        layer_convolutionbiased = tf.add(conv, biases, name=str(idx) + '_layer_convolutionbiased')
        if self.display: print ('    Layer  %d : Type = Conv, Size = %d * %d, Stride = %d, Filters = %d, Input channels = %d' %
        (idx, size, size, stride, filters, int(channels)))
        return tf.maximum(tf.multiply(self.alpha, layer_convolutionbiased), layer_convolutionbiased, name=str(idx) + '_leaky_relu')

    def define_layer_pooling(self, idx, inputs, size, stride):                        #defining pooling layers
        if self.display: print ('    Layer  %d : Type = Pool, Size = %d * %d, Stride = %d' % (
        idx, size, size, stride))
        return tf.nn.max_pool(inputs, ksize=[1, size, size, 1], strides=[1, stride, stride, 1], padding='SAME',
                              name=str(idx) + '_pool')

    def define_fullyconnected_layer(self, idx, inputs, hiddens, flat=False, linear=False, trainable=False):     #defining fully connected layers
        input_shape = inputs.get_shape().as_list()
        if flat:
            dim = input_shape[1] * input_shape[2] * input_shape[3]
            inputs_transposed = tf.transpose(inputs, (0, 3, 1, 2))
            inputs_processed = tf.reshape(inputs_transposed, [-1, dim])
        else:
            dim = input_shape[1]
            inputs_processed = inputs
        #weight = tf.Variable(tf.truncated_normal([dim, hiddens], stddev=0.1), trainable=trainable)
        weight = tf.Variable(tf.zeros([dim, hiddens]), trainable=trainable)
        biases = tf.Variable(tf.constant(0.1, shape=[hiddens]), trainable=trainable)
        if self.display: print ('    Layer  %d : Type = Full, Hidden = %d, Input dimension = %d, Flat = %d, Activation = %d' % (
        idx, hiddens, int(dim), int(flat), 1 - int(linear)))
        if linear: return tf.add(tf.matmul(inputs_processed, weight), biases, name=str(idx) + '_fc')
        ip = tf.add(tf.matmul(inputs_processed, weight), biases)
        return tf.maximum(tf.multiply(self.alpha, ip), ip, name=str(idx) + '_fc')

    def dropout(self, idx, inputs):                    #defining dropout
        if self.display: print ('    Layer  %d : Type = DropOut' % (idx))
        return tf.nn.dropout(inputs, keep_prob=self.keep_prob)

    def cvmat_input(self, img):     #getting input from cvmat
        s = time.time()
        self.h_img, self.w_img, _ = img.shape           #getting image height and width
        img_resized = cv2.resize(img, (448, 448))            #resizing the image
        img_RGB = cv2.cvtColor(img_resized, cv2.COLOR_BGR2RGB)      #for RGB image
        img_resized_np = np.asarray(img_RGB)
        inputs = np.zeros((1, 448, 448, 3), dtype='float32')      #in same dimension as that of modified image
        inputs[0] = (img_resized_np / 255.0) * 2.0 - 1.0
        in_dict = {self.x: inputs, self.keep_prob: 1.0}           #input dictionary
        net_output = self.sess.run(self.layer_fullyconnected32, feed_dict=in_dict)       
        self.result = self.interpret_output(net_output[0])
        strtime = str(time.time() - s)
        if self.display: print ('Elapsed time : ' + strtime + ' secs' + '\n')

    def input_file(self, filename):     #detecting from file
        if self.display: print ('Detect from ' + filename)
        img = cv2.imread(filename)
        self.cvmat_input(img)
        self.disp_results(img, self.result)
       
    def network_building(self):                  #building the network
        if self.display: print ("Building YOLO_small graph...")
        self.x = tf.placeholder('float32', [None, 448, 448, 3])
        self.layer_convolution1 = self.define_layer_convulation(1, self.x, 64, 7, 2,trainable=self.training)  #1st convulation layer
        self.layer_pooling2 = self.define_layer_pooling(2, self.layer_convolution1, 2, 2)          #2nd pooling layer
        self.layer_convolution3 = self.define_layer_convulation(3, self.layer_pooling2, 192, 3, 1,trainable=self.training)        #3rd convulation layer
        self.layer_pooling4 = self.define_layer_pooling(4, self.layer_convolution3, 2, 2)          #4th pooling layer
        self.layer_convolution5 = self.define_layer_convulation(5, self.layer_pooling4, 128, 1, 1,trainable=self.training)        #5th convulation layer
        self.layer_convolution6 = self.define_layer_convulation(6, self.layer_convolution5, 256, 3, 1,trainable=self.training)        #6th convulation layer
        self.layer_convolution7 = self.define_layer_convulation(7, self.layer_convolution6, 256, 1, 1,trainable=self.training)        #7th convulation layer
        self.layer_convolution8 = self.define_layer_convulation(8, self.layer_convolution7, 512, 3, 1,trainable=self.training)        #8th convulation layer
        self.layer_pooling9 = self.define_layer_pooling(9, self.layer_convolution8, 2, 2)          #9th pooling layer
        self.layer_convolution10 = self.define_layer_convulation(10, self.layer_pooling9, 256, 1, 1,trainable=self.training)      #10th convulation layer
        self.layer_convolution11 = self.define_layer_convulation(11, self.layer_convolution10, 512, 3, 1,trainable=self.training)     #11th convulation layer
        self.layer_convolution12 = self.define_layer_convulation(12, self.layer_convolution11, 256, 1, 1,trainable=self.training)     #12th convulation layer
        self.layer_convolution13 = self.define_layer_convulation(13, self.layer_convolution12, 512, 3, 1,trainable=self.training)     #13th convulation layer
        self.layer_convolution14 = self.define_layer_convulation(14, self.layer_convolution13, 256, 1, 1,trainable=self.training)     #14th convulation layer
        self.layer_convolution15 = self.define_layer_convulation(15, self.layer_convolution14, 512, 3, 1,trainable=self.training)     #15th convulation layer
        self.layer_convolution16 = self.define_layer_convulation(16, self.layer_convolution15, 256, 1, 1,trainable=self.training)     #16th convulation layer
        self.layer_convolution17 = self.define_layer_convulation(17, self.layer_convolution16, 512, 3, 1,trainable=self.training)     #17th convulation layer
        self.layer_convolution18 = self.define_layer_convulation(18, self.layer_convolution17, 512, 1, 1,trainable=self.training)     #18th convulation layer
        self.layer_convolution19 = self.define_layer_convulation(19, self.layer_convolution18, 1024, 3, 1,trainable=self.training)    #19th convulation layer
        self.layer_pooling20 = self.define_layer_pooling(20, self.layer_convolution19, 2, 2)       #20th pooling layer
        self.layer_convolution21 = self.define_layer_convulation(21, self.layer_pooling20, 512, 1, 1,trainable=self.training)     #21st convulation layer
        self.layer_convolution22 = self.define_layer_convulation(22, self.layer_convolution21, 1024, 3, 1,trainable=self.training)    #22nd convulation layer
        self.layer_convolution23 = self.define_layer_convulation(23, self.layer_convolution22, 512, 1, 1,trainable=self.training)     #23rd convulation layer
        self.layer_convolution24 = self.define_layer_convulation(24, self.layer_convolution23, 1024, 3, 1,trainable=self.training)    #24th convulation layer
        self.layer_convolution25 = self.define_layer_convulation(25, self.layer_convolution24, 1024, 3, 1,trainable=self.training)    #25th convulation layer
        self.layer_convolution26 = self.define_layer_convulation(26, self.layer_convolution25, 1024, 3, 2,trainable=self.training)    #26th convulation layer
        self.layer_convolution27 = self.define_layer_convulation(27, self.layer_convolution26, 1024, 3, 1,trainable=self.training)    #27th convulation layer
        self.layer_convolution28 = self.define_layer_convulation(28, self.layer_convolution27, 1024, 3, 1,trainable=self.training)    #28th convulation layer
        self.layer_fullyconnected29 = self.define_fullyconnected_layer(29, self.layer_convolution28, 512, flat=True, linear=False,trainable=self.training)   #29th fully connected layer
        self.layer_fullyconnected30 = self.define_fullyconnected_layer(30, self.layer_fullyconnected29, 4096, flat=False, linear=False,trainable=self.training)   #30th fully connected layer
        self.drop_31 = self.dropout(31, self.layer_fullyconnected30)                                                          #31st dropout layer
        self.layer_fullyconnected32 = self.define_fullyconnected_layer(32, self.drop_31, 1470, flat=False, linear=True,trainable=self.training)  #32nd fully connected layer
        self.sess = tf.Session()
        self.sess.run(tf.global_variables_initializer())
        self.saver = tf.train.Saver()                           #saving the model
        self.saver.restore(self.sess, self.weights_file)
        if self.display: print ("Loading complete!" + '\n')
    
    
    def disp_results(self, img, results):      #for displaying results
        cp_img = img.copy()                    #making copy of image
        for i in range(len(results)):
            x = int(results[i][1])
            y = int(results[i][2])
            w = int(results[i][3]) // 2
            h = int(results[i][4]) // 2
            if self.display: print ('    class : ' + results[i][0] + ' , [x,y,w,h]=[' + str(x) + ',' + str(
                y) + ',' + str(int(results[i][3])) + ',' + str(int(results[i][4])) + '], Confidence = ' + str(
                results[i][5]))
            if self.fwrt_img or self.imshow:
                cv2.rectangle(cp_img, (x - w, y - h), (x + w, y + h), (0, 255, 0), 2)
                cv2.rectangle(cp_img, (x - w, y - h - 20), (x + w, y - h), (125, 125, 125), -1)
                cv2.putText(cp_img, results[i][0] + ' : %.2f' % results[i][5], (x - w + 5, y - h - 7),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 1)
            
        if self.imshow:
            cv2.imshow('YOLO_small detection', cp_img)
            cv2.waitKey(1)

    def interpret_output(self, output):                         #defining the function for interpreting output
        probs = np.zeros((7, 7, 2, 20))                        
        class_probability = np.reshape(output[0:980], (7, 7, 20))
        scales = np.reshape(output[980:1078], (7, 7, 2))
        bound_box = np.reshape(output[1078:], (7, 7, 2, 4))        
        offset = np.transpose(np.reshape(np.array([np.arange(7)] * 14), (2, 7, 7)), (1, 2, 0))

        bound_box[:, :, :, 0] += offset
        bound_box[:, :, :, 1] += np.transpose(offset, (1, 0, 2))
        bound_box[:, :, :, 0:2] = bound_box[:, :, :, 0:2] / 7.0
        bound_box[:, :, :, 2] = np.multiply(bound_box[:, :, :, 2], bound_box[:, :, :, 2])
        bound_box[:, :, :, 3] = np.multiply(bound_box[:, :, :, 3], bound_box[:, :, :, 3])

        bound_box[:, :, :, 0] *= self.w_img
        bound_box[:, :, :, 1] *= self.h_img
        bound_box[:, :, :, 2] *= self.w_img
        bound_box[:, :, :, 3] *= self.h_img

        for i in range(2):
            for j in range(20):
                probs[:, :, i, j] = np.multiply(class_probability[:, :, j], scales[:, :, i])

        filter_mat_probs = np.array(probs >= self.threshold_value, dtype='bool')
        filter_mat_bound_box = np.nonzero(filter_mat_probs)
        bound_box_filtered = bound_box[filter_mat_bound_box[0], filter_mat_bound_box[1], filter_mat_bound_box[2]]
        probs_filtered = probs[filter_mat_probs]
        classes_num_filtered = np.argmax(filter_mat_probs, axis=3)[
            filter_mat_bound_box[0], filter_mat_bound_box[1], filter_mat_bound_box[2]]

        argsort = np.array(np.argsort(probs_filtered))[::-1]
        bound_box_filtered = bound_box_filtered[argsort]
        probs_filtered = probs_filtered[argsort]
        classes_num_filtered = classes_num_filtered[argsort]

        for i in range(len(bound_box_filtered)):
            if probs_filtered[i] == 0: continue
            for j in range(i + 1, len(bound_box_filtered)):
                if self.int_over_un(bound_box_filtered[i], bound_box_filtered[j]) > self.int_over_un_threshold_value:
                    probs_filtered[j] = 0.0

        filter_int_over_un = np.array(probs_filtered > 0.0, dtype='bool')
        bound_box_filtered = bound_box_filtered[filter_int_over_un]
        probs_filtered = probs_filtered[filter_int_over_un]
        classes_num_filtered = classes_num_filtered[filter_int_over_un]

        result = []
        for i in range(len(bound_box_filtered)):
            result.append([self.classes[classes_num_filtered[i]], bound_box_filtered[i][0], bound_box_filtered[i][1],
                           bound_box_filtered[i][2], bound_box_filtered[i][3], probs_filtered[i]])

        return result
    
    def develop_training(self):
        #developing training variables and loss function
        
        self.y_ = tf.placeholder(tf.float32, [None,7, 7, 2]) #y-coordinate of centre of box
        self.h_ = tf.placeholder(tf.float32, [None,7, 7, 2]) #heigth of box
        self.x_ = tf.placeholder(tf.float32, [None,7, 7, 2]) #x-coordinate of centre of box
        self.C_ = tf.placeholder(tf.float32, [None,7, 7, 2]) #class of image predicted
        self.w_ = tf.placeholder(tf.float32, [None,7, 7, 2]) #width of box
        self.p_ = tf.placeholder(tf.float32, [None,7, 7, 20])#probability of predicted class
        self.noobj = tf.placeholder(tf.float32, [None,7, 7, 2])
        self.obj = tf.placeholder(tf.float32, [None,7, 7, 2])
        self.Obj_I = tf.placeholder(tf.float32, [None,7, 7])
        

        #network of o/p defined
        output = self.fc_32
        img_nb = tf.shape(self.x_)[0]
        class_probability = tf.reshape(output[0:img_nb,0:980], (img_nb,7, 7, 20))
        scales = tf.reshape(output[0:img_nb,980:1078], (img_nb,7, 7, 2))
        bound_box = tf.reshape(output[0:img_nb,1078:], (img_nb,7, 7, 2, 4))

        bound_box0 = bound_box[:,:, :, :, 0]           
        bound_box1 = bound_box[:,:, :, :, 1]
        bound_box2 = bound_box[:,:, :, :, 2]
        bound_box3 = bound_box[:,:, :, :, 3]

        # loss funtion
        self.subX = tf.sub(bound_box0, self.x_)    #loss for x-coordinate of centre of box
        self.subY = tf.sub(bound_box1, self.y_)    #loss for y-coordinate of centre of box
        self.subW = tf.sub(tf.sqrt(tf.abs(bound_box2)), tf.sqrt(self.w_))  #loss for width of box
        self.subH = tf.sub(tf.sqrt(tf.abs(bound_box3)), tf.sqrt(self.h_))  #loss for heigth of box
        self.subC = tf.sub(scales, self.C_)   #loss for class of image predicted
        self.subP = tf.sub(class_probability, self.p_)  #loss for probability of predicted class
        self.lossX=tf.multiply(self.lamb_cord,tf.reduce_sum(tf.multiply(self.obj,tf.multiply(self.subX, self.subX)),axis=[1,2,3]))
        self.lossY=tf.multiply(self.lamb_cord, tf.reduce_sum(tf.multiply(self.obj, tf.multiply(self.subY, self.subY)),axis=[1,2,3]))
        self.lossW=tf.multiply(self.lamb_cord, tf.reduce_sum(tf.multiply(self.obj, tf.multiply(self.subW, self.subW)),axis=[1,2,3]))
        self.lossH=tf.multiply(self.lamb_cord, tf.reduce_sum(tf.multiply(self.obj, tf.multiply(self.subH, self.subH)),axis=[1,2,3]))
        self.lossCObj=tf.reduce_sum(tf.multiply(self.obj, tf.multiply(self.subC, self.subC)),axis=[1,2,3])
        self.lossCNobj=tf.multiply(self.lamb_nobj, tf.reduce_sum(tf.multiply(self.noobj, tf.multiply(self.subC, self.subC)),axis=[1,2,3]))
        self.lossP=tf.reduce_sum(tf.multiply(self.Obj_I,tf.reduce_sum(tf.multiply(self.subP, self.subP), axis=3)) ,axis=[1,2])
        self.loss = tf.add_n((self.lossX,self.lossY,self.lossW,self.lossH,self.lossCObj,self.lossCNobj,self.lossP))
        self.loss = tf.reduce_mean(self.loss)     #merging all the losses which are calculated in steps

        #variable for the training
        global_step = tf.Variable(0, trainable=False)
        starter_learning_rate = 0.001
        decay = 0.0005
        end_learning_rate = 0.01
        self.epoch=tf.placeholder(tf.int32)

        # Different case of learning rate
        def learning_rate1():
            return tf.train.polynomial_decay(starter_learning_rate, global_step, decay, end_learning_rate=end_learning_rate,
                                             power=1.0)
        def learning_rate2():
            return tf.constant(0.01)
        def learning_rate3():
            return tf.constant(0.001)
        def learning_rate4():
            return tf.constant(0.0001)
        lr = tf.case({tf.less_equal(self.epoch, 1): learning_rate1,
                      tf.logical_and(tf.greater(self.epoch, 76), tf.less_equal(self.epoch, 106)): learning_rate2,
                      tf.logical_and(tf.greater(self.epoch, 106), tf.less_equal(self.epoch, 136)): learning_rate3,
                      tf.greater(self.epoch, 136): learning_rate4},learning_rate4, exclusive=True)

        self.train_step = tf.train.MomentumOptimizer(learning_rate=lr, momentum=0.9).minimize(self.loss,global_step=global_step)
        self.sess = tf.Session()
        self.sess.run(tf.global_variables_initializer())

    
    def int_over_un(self, box1, box2):           #function for intersection over union
        l_r = min(box1[1] + 0.5 * box1[3], box2[1] + 0.5 * box2[3]) - max(box1[1] - 0.5 * box1[3],
                                                                         box2[1] - 0.5 * box2[3])
        t_b = min(box1[0] + 0.5 * box1[2], box2[0] + 0.5 * box2[2]) - max(box1[0] - 0.5 * box1[2],
                                                                         box2[0] - 0.5 * box2[2])
        
        if t_b < 0 or l_r < 0:
            intersection = 0
        else:
            intersection= t_b * l_r
            iou_value = intersection / (box1[2] * box1[3] + box2[2] * box2[3] - intersection)
            print("IOU%=",iou_value*100)
        return intersection / (box1[2] * box1[3] + box2[2] * box2[3] - intersection)
    
    def build_label (self,img_filenms,epoch):     #function for building label
            glob_X=[]; glob_Y=[]; glob_W=[]; glob_H=[]; glob_C=[]; glob_P=[]; glob_obj=[]; glob_objI=[];
            glob_noobj=[];Image=[]
            for img_filenm in img_filenms:
                Lab_Pre=voc_train.get_training_data(img_filenm)
                x = np.zeros([7,7,2]); y = np.zeros([7,7,2]); w = np.zeros([7,7,2]); h = np.zeros([7,7,2])
                C = np.zeros([7,7,2]); p = np.zeros([7,7,20]); obj = np.zeros([7,7,2]); objI = np.zeros([7,7])
                noobj = np.ones([7,7,2]); img = voc_utils.load_img(img_filenm)
                for i,j in itertools.product(range(0,7),range(0,7)):
                    if Lab_Pre[i][j] is not None:
                        ind=0
                        while(len(Lab_Pre[i][j])>ind and ind<2):
                            x[i][j][ind]= (float(Lab_Pre[i][j][ind][0])/len(img))*7-i
                            y[i][j][ind] = (float(Lab_Pre[i][ j][ ind][ 1])/len(img[0]))*7-j
                            w[i][j][ind] = np.sqrt(Lab_Pre[i][ j][ ind][ 2])/len(img)*7
                            h[i][j][ind] = np.sqrt(Lab_Pre[i][ j][ ind][ 3])/len(img[0])
                            C[i][j][ind] = 1.0
                            p[i][j][self.classes.ind(Lab_Pre[i][ j][ ind][ 4])] = 1.0/float(len(Lab_Pre[i][j]))
                            obj[i][j][ind] = 1.0
                            objI[i][j] = 1.0
                            noobj[i][j][ ind]=0.0
                            ind=ind+1
                glob_X.append(x); glob_Y.append(y); glob_W.append(w); glob_H.append(h); glob_C.append(C)
                glob_P.append(p); glob_obj.append(obj); glob_objI.append(objI); glob_noobj.append(noobj)

                #resize the image
                Resize_Img = cv2.resize(img, (448, 448))
                img_RGB = cv2.cvtColor(Resize_Img, cv2.COLOR_BGR2RGB)
                Resize_Img_np = np.asarray(img_RGB)
                ips = np.zeros((1, 448, 448, 3), dtype='float32')
                ips[0] = (Resize_Img_np / 255.0) * 2.0 - 1.0
                Image.append(ips[0])
            glob_X=np.array(glob_X); glob_Y=np.array(glob_Y); glob_W=np.array(glob_W)
            glob_H=np.array(glob_H); glob_C=np.array(glob_C); glob_P=np.array(glob_P)
            glob_obj=np.array(glob_obj); glob_objI=np.array(glob_objI); glob_noobj=np.array(glob_noobj)
            Image=np.array(Image)
            return {self.x:Image,self.x_:glob_X,self.y_:glob_Y,self.w_:glob_W,self.h_:glob_H,self.C_:glob_C,
                    self.p_:glob_P,self.obj:glob_obj,self.Obj_I:glob_objI,self.noobj:glob_noobj,
                    self.keep_prob: 0.5,self.epoch:epoch}

    
    def training_step(self, i, tst_upd, trn_upd):     #defining the training step
        for nbatch in range(0,len(self.label)/64):
            dict=self.build_label(self.next_batch(64,exam_num=len(self.label)),i)
            self.sess.run(self.train_step, dict)

        trn_lst = []                   #training list 
        tst_lst = []                   #test list

        if trn_upd:                    #for updating taining list
            l = self.sess.run(self.loss, feed_dict=self.build_label(self.label,i))
            trn_lst.append(l)

        if tst_upd:                    #for updating test list
            l= self.sess.run(self.loss, feed_dict=self.build_label(self.label_test,i))
            print("\r", i, "loss : ", l)
            tst_lst.append(l)

        return (trn_lst, tst_lst)

    def next_batch(self,sizeofbatch, exam_num):
        start = self.epoc_ind
        self.epoc_ind += sizeofbatch
        if self.epoc_ind > exam_num:
            # Finished epoch
            self.epochs_done += 1
            # Shuffle the data
            perm = np.arange(exam_num)
            np.random.shuffle(perm)
            self.label=self.label[perm]
            # Start next epoch
            start = 0
            self.epoc_ind = sizeofbatch
            assert sizeofbatch <= exam_num
        end = self.epoc_ind
        return  self.label[start:end]
    
    #Training   function: This function is the first step in the training procedure.
    def train(self):
        trn_lst = []
        tst_lst = []
        self.label=voc_utils.imgs_from_category_as_list("bird", "train")    #training for a sample of bird images with training tag
        self.label_test=voc_utils.imgs_from_category_as_list("bird", "val") #testing for bird images with validating tag
        iteration_train = 137      #This should ideally be 137
        epoch_size = 5             
        for i in range(iteration_train):
            test = True             
            if i % epoch_size == 0:
                test = True
            l, tl = self.training_step(i, test, test)
            trn_lst += l
            tst_lst += tl
        print("train loss")
        print(trn_lst)
        print("test loss")
        print(tst_lst)
        
if __name__ == '__main__':
    yolo = YOLO_TF()
    cv2.waitKey(1000)