#### File: ReadExpData.ipynb
- This file will read all video you provide, extract experiment data, and put it into corresponding excel
- The main function `readExpData()` is at the bottom. **Please ensure that all of code above `readExpData()` has been compiled and run before launching the main function.**
- Just compile and run in the order of the code.
- To see the introduction of all functions, you'd better see them from bottom to top.

##### some imports and global variables
- You may need to install opencv-python, matplotlib, torch, numpy, torchvision, xlwt, Pillow.
- If you run it in VsCode, you may add the path of "ExpNetwork.py" so that it can run smoothly. Please check "settings: Python > Analysis: Extra Paths" and add the path to it.

In [None]:
import cv2
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from torchvision import transforms
from os import makedirs
from xlwt import Workbook
from torch.utils.data import DataLoader
from PIL import Image
from sys import stdout
from datetime import datetime
from ExpNetwork import *

custom_transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5 ), (0.5))],
    )
classes = ('-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'empty')

torch.set_printoptions(profile="full")

#### assisting functions 1
- `pad_image()`: used in `processImage()`
- `findMin()`: used in `processImage()`

In [None]:
def colorThres(img, gaugetype):
    if gaugetype != 'R' and gaugetype != 'I' and gaugetype != 'Tm' and gaugetype != 'T':
        raise Exception('gaugetype error.')
    row, col = img.shape[:2]
    onechannel=np.zeros((row,col))
    for i in range(row):    
        for j in range(col):
            if gaugetype=='Tm':
                if img[i,j,2]>250 and img[i,j,0]>75 and img[i,j,0]<200:
                    onechannel[i,j] = 1
            else:
                if img[i,j,1]>250 and img[i,j,0]<150 and img[i,j,2]<200:
                    onechannel[i,j] = 1
    # kernel = np.ones((1,1), np.uint8)
    # onechannel = cv2.morphologyEx(onechannel, cv2.MORPH_OPEN, kernel)
    onechannel = cv2.dilate(onechannel, np.ones((2,2), np.uint8))
    return onechannel

def handleLR(mat,imgL):
    height, width = mat.shape
    while imgL<width and sum(mat[:,imgL])<=2:
        imgL+=1
    imgR=imgL
    while imgR<width and sum(mat[:,imgR])>2:
        imgR+=1
    # if imgR-imgL<10:
    #     # 还要判断是不是1
    #     sample = mat[:, imgL:imgR]
    #     sampleT, sampleB = handleTB(sample, 0, strictmode=False)
    #     if sampleB - sampleT < 3: #说明是一个杂点
    #         imgL=imgR
    #     else:
    #         break
    # else:
    #     break
    return imgL, imgR

def handleTB(mat, imgT, strictmode = True):
    height, width = mat.shape
    while True:
        while imgT<height and sum(mat[imgT,:])<=2:
            imgT+=1
        imgB=imgT
        while imgB<height and sum(mat[imgB,:])>2:
            imgB+=1
        if strictmode and imgB-imgT<10:
            imgT=imgB
        else:
            break
    return imgT, imgB

def Split(mat):
    imgT, imgB = handleTB(mat, 0)
    mat = mat[imgT:imgB, :]
    imgL = 0
    postuplelist = []
    for i in range(5):
        imgL, imgR = handleLR(mat, imgL)
        if imgL == imgR:
            break
        postuplelist.append((imgL, imgR))
        imgL = imgR+1
    # test_mat = mat
    # for i, (l, r) in enumerate(postuplelist):
    #     cv2.imshow(str(i), test_mat[imgT:imgB, l:r])
        # test_mat = cv2.rectangle(test_mat, (l, b), (r, t), (255, 255, 255), 2)
    # cv2.imshow('test_mat', test_mat)
    return imgT, imgB, postuplelist

def pad_image(im, height, width): #(height width) are the target height and width.
    im = Image.fromarray(im)  # convert "im" from "array" to "PIL Image". Now "im" is 2D. 
    w, h = im.size  
    if w>=h*width/height:
        h1 = int(h*width/w)
        im = im.resize((width, h1),  Image.BILINEAR)
        im_new = Image.new(mode='L', size=(width, height), color=0)
        im_new.paste(im, ( 0, (height-h1)//2 ) )
    else:
        w1 = int(w*height/h)
        im = im.resize((w1, height),  Image.BILINEAR)
        im_new = Image.new(mode='L', size=(width, height), color=0)
        im_new.paste(im, ( (width-w1)//2, 0 ) )
    im_new = np.asarray(im_new)  # convert "im_new" from "PIL Image" to "array"
    return im_new

def findMin(arr):
    min = 99999
    id = -1
    for i, num in enumerate(arr):
        if num < min:
            min = num
            id = i
    return id, min

def computeImageSize():
    return

#### assisting functions 2
- `processImage()`:
  - It will be called in `splitSave()`. Initially it gets a image, a template image, a type.
  - Then it call `cv2.matchTemplate()` to locate the number. For example, given an image of template 'E', it locates 'E'(see the variable `max_loc`) and other numbers according to the relative position of 'E'.
  - Different types mean different guages to be read. For each guage, we set the relative position by hand. You may adjust the relative position by hand if necessary.
  - This function will also pre-process the image, such as converting from BGR to GRAY, thresholding, resizing, and dilating.
- `netRead()`: will be called in `readNumber()` to read number from a batch provided by dataloader.
- `drawImages_Gray()`: just for debugging.
- `isValidNum()`: will be called in `readNumber()` to judge whether it reads a valid number.

In [None]:
def processImage(img, ned, gaugetype):
    result = cv2.matchTemplate(img, ned, cv2.TM_CCOEFF_NORMED)
    #Note: img and ned are both in BGR, not in RGB!
    if gaugetype == "R":
        # original size of sample1, sample2, sample3: 26*26
        # for example, if img.shape= (360, 426, 3), ned.shape= (26, 21, 3), 
        # then result.shape= (360-(26-1)=335, 426-(21-1)=406)
        _, _, _, max_loc = cv2.minMaxLoc(result)
        test_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        _, test_img = cv2.threshold(test_img, 150, 255, cv2.THRESH_BINARY)
        # 21*21
        sample1 = test_img[max_loc[1] : max_loc[1]+26, max_loc[0]-50 : max_loc[0]-24] 
        sample2 = test_img[max_loc[1] : max_loc[1]+26, max_loc[0]-25 : max_loc[0]+1]
        negsample = test_img[max_loc[1] : max_loc[1]+26, max_loc[0]+23 : max_loc[0]+49]
        sample3 = test_img[max_loc[1]-1 : max_loc[1]+25, max_loc[0]+48 : max_loc[0]+74]
    elif gaugetype == "Tm":
        # 多目标匹配取最左边那个0
        index = np.where(result > 0.8)
        if len(index[1]) > 0:
            yPos, _ = findMin(index[1])
            max_loc = (index[1][yPos], index[0][yPos])
        else:
            _, _, _, max_loc = cv2.minMaxLoc(result)
        test_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        _, test_img = cv2.threshold(test_img, 100, 255, cv2.THRESH_BINARY)
        
        # 识别0标志的参数, 32*32
        sample1 = test_img[max_loc[1]+4 : max_loc[1]+36, max_loc[0]+27 : max_loc[0]+59] 
        sample2 = test_img[max_loc[1]+4 : max_loc[1]+36, max_loc[0]+53 : max_loc[0]+85]
        negsample = test_img[0:32, 0:32] #useless, since temperature cannot be negative
        sample3 = test_img[max_loc[1]+4 : max_loc[1]+36, max_loc[0]+81 : max_loc[0]+113] 
        
        # 识别oC标志的参数，作为备用：32*23
        # sample1 = test_img[max_loc[1]+8 : max_loc[1]+40, max_loc[0]-87 : max_loc[0]-64] #之前是+9，+42，-84，-62
        # sample2 = test_img[max_loc[1]+8 : max_loc[1]+40, max_loc[0]-60 : max_loc[0]-37] # 之前是+7，+40，-57，-34
        # negsample = test_img[0:32, 0:32] #useless, since temperature cannot be negative
        # sample3 = test_img[max_loc[1]+8 : max_loc[1]+40, max_loc[0]-30 : max_loc[0]-7] #之前是+8，+41，-28，-5
    elif gaugetype == "T":
        _, _, _, max_loc = cv2.minMaxLoc(result)
        test_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        _, test_img = cv2.threshold(test_img, 150, 255, cv2.THRESH_BINARY)
        # 27*27
        sample1 = test_img[max_loc[1]+2 : max_loc[1]+32, max_loc[0]-74 : max_loc[0]-44]
        sample2 = test_img[max_loc[1]+2 : max_loc[1]+32, max_loc[0]-38 : max_loc[0]-8]
        negsample = test_img[max_loc[1] : max_loc[1]+30, max_loc[0]+33 : max_loc[0]+63]
        sample3 = test_img[max_loc[1] : max_loc[1]+30, max_loc[0]+69 : max_loc[0]+99]
    elif gaugetype == "I":
        _, _, _, max_loc = cv2.minMaxLoc(result)
        test_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        _, test_img = cv2.threshold(test_img, 150, 255, cv2.THRESH_BINARY)
        # 30*30
        sample1 = test_img[max_loc[1]+4 : max_loc[1]+34, max_loc[0]-74 : max_loc[0]-44] 
        sample2 = test_img[max_loc[1]+3 : max_loc[1]+33, max_loc[0]-38 : max_loc[0]-8] 
        negsample = test_img[max_loc[1]+3 : max_loc[1]+33, max_loc[0]+35 : max_loc[0]+65]
        sample3 = test_img[max_loc[1] : max_loc[1]+30, max_loc[0]+73 : max_loc[0]+103]
    
    sample1 = pad_image(sample1, 32, 32)
    sample2 = pad_image(sample2, 32, 32)
    sample3 = pad_image(sample3, 32, 32)
    if gaugetype != "Tm":
        negsample = pad_image(negsample, 32, 32)
    if gaugetype == "Tm":
        kernel = np.ones((3,3), np.uint8)
    else:
        kernel = np.ones((1,1), np.uint8)
    sample1 = cv2.dilate(sample1, kernel)
    sample2 = cv2.dilate(sample2, kernel)
    sample3 = cv2.dilate(sample3, kernel)
    return sample1, sample2, negsample, sample3

def drawImages_Gray(imgarr, prompt):
    print(prompt)
    for img in imgarr:
        plt.imshow(img, cmap="gray")
        plt.show()

def netRead(batch, net):
    images = batch[0]
    outputs = net(images)
    sm = nn.Softmax(dim=1)      
    sm_outputs = sm(outputs)
    probs, index = torch.max(sm_outputs, dim=1)
    first = classes[index[0]]
    second = classes[index[1]]
    neg = classes[index[2]]
    third = classes[index[3]]
    return first, second, neg, third

def isValidNum(first, second, neg, third, caresign = True):
    if first == '-' or first == 'empty':
        return False
    if second == '-' or second == 'empty':
        return False
    if third == '-' or third == 'empty':
        return False
    if neg != '-' and neg != 'empty' and caresign == True:
        return False
    return True

#### functions in `ReadExpData()`
- `loadNet()`: load parameters of a CNN from the current directory
- `openVideo()`: check if the video exists and open it
- `showVideoInfo()`: show information of video, such as FPS, sampling_rate, etc.
- `initExcelTable()`: initialize a workbook
- `splitSave()`: 
  - According to the given ratio from @param "splitarray" in `readExpData()`, cut the frame of video into four parts. For example, if the third tuple of "splitarray" is (0, 0.5, 0.4, 0.8), and the size of frame is 1280*720, `splitSave()` will cut the guage "T" of the frame, which is `frame[1280*0:1280*0.5][720*0.4:720*0.8]`
  - Then it passes the split frame to `processImage()`, which is introduced above.
- `showProgress()`: a simple implementation of a progress bar
- `readNumber()`: it will use the CNN to identify the number, calculate the result and write it to the workbook.
- `saveResult()`: it will save the workbook to the current directory.

In [None]:
def loadNet(netpath):
    net = MyNetwork()
    net.load_state_dict(torch.load(netpath))
    return net

def readTempate():
    tlist = []
    ned = cv2.imread("./template images/E1.png")
    tlist.append(ned)
    ned = cv2.imread("./template images/0.png")
    tlist.append(ned)
    ned = cv2.imread("./template images/E2.png")
    tlist.append(ned)
    ned = cv2.imread("./template images/E3.png")
    tlist.append(ned)
    return tlist

def openVideo(videoname):
    video = cv2.VideoCapture("./videos/" + videoname)
    #check if the video exists
    if not video.isOpened():
        raise Exception("Video cannot be opened.")
    return video

def showVideoInfo(videoname, video, sampling_rate):
    #show some info of video
    fps = video.get(5)
    totalframes = video.get(7)
    print("Video: " + videoname)
    print("Total frames: " + str(totalframes))
    print("FPS: " + str(int(fps)))
    #if sampling_rate = 10, it means every 0.1s we capture a photo
    print("Sampling_rate: " + str(sampling_rate) + "Hz") 
    # timeF: we pick one frame every 'timeF' frames.
    # Here we pick one frame every 5 frames. 
    timeF = fps / sampling_rate 
    print("Frames needed to be extracted:" + str(int(totalframes/timeF)))
    return int(timeF), totalframes

def initExcelTable():
    #create and initialize an Excel table
    wb = Workbook()
    sheet1 = wb.add_sheet('Sheet 1')
    sheet1.write(0, 0, "Time/s")
    sheet1.write(0, 1, "Resistance vacuum gauge/Pa")
    sheet1.write(0, 2, "Temperature/C")
    sheet1.write(0, 3, "Thermocouple vacuum gauge/Pa")
    sheet1.write(0, 4, "Ionization vacuum gauge/Pa")
    return wb, sheet1

def splitSave(frame, splitarray, templateimglist, debugmode = []):
    width, length, _ = np.shape(frame)
    # Note：Resistance vacuum gauge--R， Thermocouple vacuum gauge--T， Ionization vacuum gauge--I
    #       temperature--Tm
    # Resistance vacuum gauge
    widthlower, widthupper, lengthlower, lengthupper = splitarray[0]
    subframe = frame[int(widthlower*width):int(widthupper*width), int(lengthlower*length):int(lengthupper*length)]
    first_R, second_R, neg_R, third_R = processImage(subframe,templateimglist[0],"R")
    # temperature
    widthlower, widthupper, lengthlower, lengthupper = splitarray[1]
    subframe = frame[int(widthlower*width):int(widthupper*width), int(lengthlower*length):int(lengthupper*length)]
    first_Tm, second_Tm, neg_Tm, third_Tm = processImage(subframe,templateimglist[1],"Tm")
    # # Thermocouple vacuum gauge
    widthlower, widthupper, lengthlower, lengthupper = splitarray[2]
    subframe = frame[int(widthlower*width):int(widthupper*width), int(lengthlower*length):int(lengthupper*length)]
    first_T, second_T, neg_T, third_T = processImage(subframe,templateimglist[2],"T")
    # # Ionization vacuum gauge
    widthlower, widthupper, lengthlower, lengthupper = splitarray[3]
    subframe = frame[int(widthlower*width):int(widthupper*width), int(lengthlower*length):int(lengthupper*length)]
    first_I, second_I, neg_I, third_I = processImage(subframe,templateimglist[3],"I")
    
    if 'R' in debugmode:
        drawImages_Gray([first_R, second_R, neg_R, third_R], "R_Split:")
    if 'Tm' in debugmode:
        drawImages_Gray([first_Tm, second_Tm, third_Tm], "Tm_Split:")
    if 'T' in debugmode:
        drawImages_Gray([first_T, second_T, neg_T, third_T], "T_Split:")
    if 'I' in debugmode:
        drawImages_Gray([first_I, second_I, neg_I, third_I], "I_Split:")
    return (first_R, second_R, neg_R, third_R, 
            first_Tm, second_Tm, neg_Tm, third_Tm, 
            first_T, second_T, neg_T, third_T, 
            first_I, second_I, neg_I, third_I)

def showProgress(cur, tot):
    print("\r", end="")
    print("progress: {:.1f}%".format(cur/tot*100), end="")
    stdout.flush()

def readNumber(net, sheet, debugmode = [], show_result = False,  imageslist_R = None, 
                imageslist_Tm = None, imageslist_T = None, imageslist_I = None):#test three images
    if imageslist_R is None or imageslist_Tm is None or imageslist_T is None or imageslist_I is None:
        raise Exception('At least one imageslist not found.')
    
    # R
    real_test_R = MyDataset_notfromdisk(imglist=imageslist_R, transform=custom_transform, mode="test")
    real_testloader_R = DataLoader(real_test_R, batch_size = 4, shuffle = False)
    for i, batch in enumerate(real_testloader_R, 1):
        first, second, neg, third = netRead(batch, net)
        if 'R' in debugmode:
            drawImages_Gray(    [batch[0][0].permute(1, 2, 0), batch[0][1].permute(1, 2, 0), 
                                batch[0][2].permute(1, 2, 0), batch[0][3].permute(1, 2, 0)],
                                "R: first={}, second={}, neg={}, third={}".format(first, second, neg, third)
                            )
        if isValidNum(first, second, neg, third):
            if neg == 'empty':
                result = (eval(first) + eval(second)*0.1)*(10**eval(third))
            elif neg == '-':
                result = (eval(first) + eval(second)*0.1)*(10**(-eval(third)))
        else:
            result = "NaN"
        sheet.write(i, 1, result)
        sheet.write(i, 0, i/10) #'result_cnt/10' means time(units of sec)
        if show_result:
            print(result, i/10) #for debug
    
    # Tm
    real_test_Tm = MyDataset_notfromdisk(imglist=imageslist_Tm, transform=custom_transform, mode="test")
    real_testloader_Tm = DataLoader(real_test_Tm, batch_size = 4, shuffle = False)
    for i, batch in enumerate(real_testloader_Tm, 1):
        first, second, neg, third = netRead(batch, net)
        if 'Tm' in debugmode:
            drawImages_Gray(    [batch[0][0].permute(1, 2, 0), batch[0][1].permute(1, 2, 0), 
                                 batch[0][3].permute(1, 2, 0)],
                                "Tm: first={}, second={}, third={}".format(first, second, third)
                            )
        if isValidNum(first, second, neg, third, False):
            result = eval(first)*100 + eval(second)*10 + eval(third)
        else:
            result = "NaN"
        sheet.write(i, 2, result)

    # T
    real_test_T = MyDataset_notfromdisk(imglist=imageslist_T, transform=custom_transform, mode="test")
    real_testloader_T = DataLoader(real_test_T, batch_size = 4, shuffle = False)
    for i, batch in enumerate(real_testloader_T, 1):
        first, second, neg, third = netRead(batch, net)
        if 'T' in debugmode:
            drawImages_Gray(    [batch[0][0].permute(1, 2, 0), batch[0][1].permute(1, 2, 0), 
                                batch[0][2].permute(1, 2, 0), batch[0][3].permute(1, 2, 0)],
                                "T: first={}, second={}, neg={}, third={}".format(first, second, neg, third)
                            )
        if isValidNum(first, second, neg, third):
            if neg == 'empty':
                result = (eval(first) + eval(second)*0.1)*(10**eval(third))
            elif neg == '-':
                result = (eval(first) + eval(second)*0.1)*(10**(-eval(third)))
        else:
            result = "NaN"
        sheet.write(i, 3, result)

    # I
    real_test_I = MyDataset_notfromdisk(imglist=imageslist_I, transform=custom_transform, mode="test")
    real_testloader_I = DataLoader(real_test_I, batch_size = 4, shuffle = False)
    for i, batch in enumerate(real_testloader_I, 1):
        first, second, neg, third = netRead(batch, net)
        if 'I' in debugmode: 
            drawImages_Gray(    [batch[0][0].permute(1, 2, 0), batch[0][1].permute(1, 2, 0), 
                                batch[0][2].permute(1, 2, 0), batch[0][3].permute(1, 2, 0)],
                                "I: first={}, second={}, neg={}, third={}".format(first, second, neg, third)
                            )
        if isValidNum(first, second, neg, third):
            if neg == 'empty':
                result = (eval(first) + eval(second)*0.1)*(10**eval(third))
            elif neg == '-':
                result = (eval(first) + eval(second)*0.1)*(10**(-eval(third)))
        else:
            result = "NaN"
        sheet.write(i, 4, result)

def saveResult(wb, videoname):  
    makedirs("./resultData/", exist_ok=True)
    filename = videoname[:-4] +" "+str(datetime.now())[0:19].replace(":","-")+'.xls'
    wb.save("./resultData/"+filename)

In [None]:
def readExpData(videonamelist, netpath, splitarray, sampling_rate = 10, savetodisk = False, 
                fnReadNumDebug = [], fnSplitDebug = []):
    # load the trained convolution network
    net = loadNet(netpath)
    templateimglist = readTempate()
    for videoname in videonamelist:
        video = openVideo(videoname)
        # timeF: we pick one frame every 'timeF' frames.
        # Here we pick one frame every 5 frames. 
        timeF, totalframes = showVideoInfo(videoname, video, sampling_rate)
        wb, sheet1 = initExcelTable()
        rval = True
        frame_cnt = 1
        imageslist_R = []
        imageslist_Tm = []
        imageslist_T = []
        imageslist_I = []
        cnt=0
        print("Splitting the image...")
        while rval: 
            # Keep reading frames until rval=False(that is, end of file)
            rval, frame = video.read() # Note: frame is in BGR colorspace, not RGB!
            if (frame_cnt % timeF == 0 and rval): 
                # take down the data
                (first_R, second_R, neg_R, third_R, 
                 first_Tm, second_Tm, neg_Tm, third_Tm, 
                 first_T, second_T, neg_T, third_T, 
                 first_I, second_I, neg_I, third_I
                ) = splitSave(frame, splitarray, templateimglist, fnSplitDebug)
                imageslist_R.append((first_R, second_R, neg_R, third_R))
                imageslist_Tm.append((first_Tm, second_Tm, neg_Tm, third_Tm))
                imageslist_T.append((first_T, second_T, neg_T, third_T))
                imageslist_I.append((first_I, second_I, neg_I, third_I))
            frame_cnt += 1
            if frame_cnt % 50 == 0:
                showProgress(frame_cnt, totalframes)
        #save the excel table
        print("\nReading the number...    ", end="")
        readNumber(net=net, sheet=sheet1, imageslist_R=imageslist_R, 
                    imageslist_Tm=imageslist_Tm, imageslist_T=imageslist_T, imageslist_I=imageslist_I,
                    debugmode=fnReadNumDebug)
        saveResult(wb, videoname)
        print("Done!\n")

#### Main function
- parameters:
  - videonamelist: a list contains all names of video you want to extract experiment data. Note that all video **should be at the same directory as the program.** For example, `videonamelist = ["00008.mp4", "00009.MTS"]`.
  - netpath: the name of file of the network you previously trained. For example, `netpath="./resultv3_gray.pth"`
  - fnReadNumDebug: an array for debugging. For instance, `fnReadNumDebug=['Tm', 'I']` means you want to check the results in function `readNumber()`
  - fnSplitDebug: an array for debugging of function `splitSave()`, like `fnReadNumDebug`.
  - splitarray: the ratio of how to cut the images. Check the details in introduction above: **functions in `ReadExpData()`**
  - sampling_rate: its unit is Hz. `sampling_rate=10` means we snapshot every 0.1 seconds.

In [None]:
#Main function: readExpData()
#for each video, you may change the variable 'videoname'

readExpData(    videonamelist = ["00023_Trim.mp4"],
                sampling_rate = 10,
                netpath="./resultv3_gray.pth",
                fnReadNumDebug=[], 
                fnSplitDebug=[], 
                splitarray=[(0, 1/2, 0, 1/3), (0.4, 0.99999, 0, 0.5), (0, 0.5, 0.3, 0.67), (0, 0.5, 0.7, 0.99999)])