####File: ReadExpData.ipynb
- This file will read all video you provide, extract experiment data, and put it into corresponding excel
- The main function `readExpData()` is at the bottom. **Please ensure that all of code above `readExpData()` has been compiled and run before launching the main function.**
- Just compile and run in the order of the code.

In [None]:
import cv2
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as functional
import torchvision 
import os
from xlwt import Workbook
from torch.utils.data import Dataset
from PIL import Image, ImageEnhance
import sys
import numpy as np

In [None]:
custom_transform = torchvision.transforms.Compose(
    [torchvision.transforms.ToTensor(),
     torchvision.transforms.Normalize((0.5 ), (0.5))],
    )
classes = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', 'empty')

In [None]:
class Network(nn.Module):
    def __init__(self):
        self.output_size = 10 # originally it was 10
        
        super(Network, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.pool = nn.MaxPool2d(2)
        self.fc = nn.Linear(400, 120)
        self.fc1 = nn.Linear(120, 84)
        self.fc2 = nn.Linear(84, self.output_size)
        
    def forward(self, x):
        x = self.pool(functional.relu(self.conv1(x)))
        x = self.pool(functional.relu(self.conv2(x)))
        x = x.view(-1, 400)
        x = functional.relu(self.fc(x))
        x = functional.relu(self.fc1(x))
        x = self.fc2(x)
        return x

class MyDataset_fromdisk(Dataset): #old
    def __init__(self, dirpath, transform = None, target_transform = None):
        # dirpath='./frames/1'
        dirlist = os.listdir(dirpath)
        dirlist.sort(key=lambda x:(eval(x[:-6]), x[:-4]))
        # print(dirlist)
        # fh = open(txt_path, 'r')
        imgs = []
        for imgtag in dirlist:
            imgs.append((dirpath + imgtag, 1)) # all tag of images is 1 because we don't use tag
        self.imgs = imgs 
        self.transform = transform
        self.target_transform = target_transform
        
    def __getitem__(self, index):
        fn, label = self.imgs[index]
        # img = Image.open(fn) #no need to convert?
        img = Image.open(fn).convert('RGB') 
        if self.transform is not None:
            img = self.transform(img) 
        return img, label
    def __len__(self):
        return len(self.imgs)

class MyDataset_notfromdisk(Dataset): 
    def __init__(self, imglist, transform = None, target_transform = None):
        imgs = []
        for img1, img2, img3 in imglist:
            imgs.append((img1, 1)) # all tag of images is 1 because we don't use tag
            imgs.append((img2, 1))
            # imgs.append((neg, 1))
            imgs.append((img3, 1))
        self.imgs = imgs 
        self.transform = transform
        self.target_transform = target_transform
        
    def __getitem__(self, index):
        fn, label = self.imgs[index]
        # img = Image.open(fn) #no need to convert?
        img = Image.fromarray(fn).convert('RGB') 
        if self.transform is not None:
            img = self.transform(img) 
        return img, label
    def __len__(self):
        return len(self.imgs)
    

In [None]:
# def printarray(arr):
#     print("In function \"printarray:\"")
#     for i in arr:
#         for j in i:
#             print(j, end=" ")
#         print()

def pad_image(im, height, width): #(height width) are the target 
    im = Image.fromarray(cv2.cvtColor(im, cv2.COLOR_BGR2RGB))  # 把图片从cv2格式转换成Image
    w, h = im.size  

    if w>=h*width/height:
        h1 = int(h*width/w)
        im = im.resize((width, h1),  Image.BILINEAR)
        im_new = Image.new(mode='RGB', size=(width, height), color=0)
        im_new.paste(im, ( 0, (height-h1)//2 ) )
    else:
        w1 = int(w*height/h)
        im = im.resize((w1, height),  Image.BILINEAR)
        im_new = Image.new(mode='RGB', size=(width, height), color=0)
        im_new.paste(im, ( (width-w1)//2, 0 ) )

    im_new = cv2.cvtColor(np.asarray(im_new), cv2.COLOR_RGB2BGR)  # 将Image格式的图片转成np进而转换成cv2格式    
    return im_new

In [None]:
def processImage(img, templatepath, type):
    # print("original image:")
    # plt.imshow(img)
    # plt.show()
    ned = cv2.imread(templatepath)
    result = cv2.matchTemplate(img, ned, cv2.TM_CCOEFF_NORMED)
    
    _, _, _, max_loc = cv2.minMaxLoc(result)
    # print(np.shape(max_loc))
    test_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    # print("test_img size: ",np.shape(test_img))
    
    # plt.imshow(test_img)
    # plt.show()
    # print("test_img=",np.shape(test_img))
    if type == "R":
        # print("max_loc=",max_loc)
        # sample = test_img[max_loc[1]-2: max_loc[1]+40, max_loc[0]-50:, ::-1] #just for debug
        # original size of sample1, sample2, sample3: 26*26
        _, test_img = cv2.threshold(test_img, 150, 255, cv2.THRESH_BINARY)
        sample1 = test_img[max_loc[1] : max_loc[1]+26, max_loc[0]-50 : max_loc[0]-24] 
        sample2 = test_img[max_loc[1] : max_loc[1]+26, max_loc[0]-25 : max_loc[0]+1]
        negsample = test_img[max_loc[1] : max_loc[1]+26, max_loc[0]+23 : max_loc[0]+49]
        sample3 = test_img[max_loc[1]-1 : max_loc[1]+25, max_loc[0]+48 : max_loc[0]+74]
        
    elif type == "Tm":
        # add a rectangle to locate the signal. Note that the point is (x,y), so it's ([0],[1]) rather than ([1],[0])
        # cv2.rectangle(test_img, (max_loc[0], max_loc[1]), (max_loc[0]+100, max_loc[1]+100), (0, 255, 0), 2)
        # plt.imshow(test_img)
        # plt.show()
        # sample = test_img[max_loc[1]: max_loc[1]+40, max_loc[0]:] #just for debug
        # printarray(sample)
        # plt.imshow(sample)
        # print("sample:")
        # plt.show()
        _, test_img = cv2.threshold(test_img, 60, 255, cv2.THRESH_BINARY)
        sample1 = test_img[max_loc[1]+7 : max_loc[1]+40, max_loc[0]-87 : max_loc[0]-64] #之前是+9，+42，-84，-62
        sample2 = test_img[max_loc[1]+7 : max_loc[1]+40, max_loc[0]-60 : max_loc[0]-37] # 之前是+7，+40，-57，-34
        negsample = test_img #useless, since temperature cannot be negative
        sample3 = test_img[max_loc[1]+7 : max_loc[1]+40, max_loc[0]-30 : max_loc[0]-7] #之前是+8，+41，-28，-5
        # plt.imshow(sample1)
        # print("sample1:")
        # plt.show()
        # plt.imshow(sample2)
        # print("sample2:")
        # plt.show()
        # plt.imshow(sample3)
        # print("sample3:")
        # plt.show()
    
    elif type == "T":
        # print("max_loc=",max_loc)
        # sample = test_img[max_loc[1]-20: max_loc[1]+40, max_loc[0]-140:, ::-1] #just for debug
        _, test_img = cv2.threshold(test_img, 150, 255, cv2.THRESH_BINARY)
        sample1 = test_img[max_loc[1]+2 : max_loc[1]+32, max_loc[0]-74 : max_loc[0]-44]
        sample2 = test_img[max_loc[1]+2 : max_loc[1]+32, max_loc[0]-38 : max_loc[0]-8]
        negsample = test_img
        sample3 = test_img[max_loc[1] : max_loc[1]+30, max_loc[0]+69 : max_loc[0]+99]

    elif type == "I":
        # sample = test_img[max_loc[1]-20: max_loc[1]+40, max_loc[0]-140:, ::-1] #just for debug
        _, test_img = cv2.threshold(test_img, 150, 255, cv2.THRESH_BINARY)
        sample1 = test_img[max_loc[1]+4 : max_loc[1]+34, max_loc[0]-74 : max_loc[0]-44] # 之前是+3，+33，-74，-44
        sample2 = test_img[max_loc[1]+3 : max_loc[1]+33, max_loc[0]-38 : max_loc[0]-8] # 之前是+2，+32，-38，-8
        negsample = test_img
        sample3 = test_img[max_loc[1] : max_loc[1]+30, max_loc[0]+73 : max_loc[0]+103] # 之前是-1，+29，+73，+103
 
    sample1 = pad_image(sample1, 32, 32)
    sample2 = pad_image(sample2, 32, 32)
    negsample = pad_image(negsample, 32, 32)
    sample3 = pad_image(sample3, 32, 32)
    
    
    return sample1, sample2, negsample, sample3

In [None]:
# import numpy as np
# imgtest = Image.open('./frames/1/1.0-1.png')
# imgtest = Image.open('./frames/1/1.0-1.png').convert('RGB')
# plt.imshow(imgtest)
# plt.show()
# print(np.shape(imgtest))

In [None]:
def splitSave(frame, cnt, splitarray = None, savetodisk = False):
    width, length, _ = np.shape(frame)
    # Note：Resistance vacuum gauge--R， Thermocouple vacuum gauge--T， Ionization vacuum gauge--I
    #       temperature--Tm
    # Resistance vacuum gauge
    widthlower, widthupper, lengthlower, lengthupper = splitarray[0]
    subframe = frame[int(widthlower*width):int(widthupper*width), int(lengthlower*length):int(lengthupper*length)]
    # plt.imshow(subframe)
    # plt.show()
    first_R, second_R, neg_R, third_R = processImage(subframe,"./E1.png","R")
    first_R = cv2.resize(first_R, (32, 32))
    second_R = cv2.resize(second_R, (32, 32))
    third_R = cv2.resize(third_R, (32, 32))
    # temperature
    widthlower, widthupper, lengthlower, lengthupper = splitarray[1]
    subframe = frame[int(widthlower*width):int(widthupper*width), int(lengthlower*length):int(lengthupper*length)]
    # plt.imshow(subframe)
    # plt.show()
    first_Tm, second_Tm, _, third_Tm = processImage(subframe,"./oC.png","Tm")
    first_Tm = cv2.resize(first_Tm, (32, 32))
    second_Tm = cv2.resize(second_Tm, (32, 32))
    third_Tm = cv2.resize(third_Tm, (32, 32))
    # Thermocouple vacuum gauge
    widthlower, widthupper, lengthlower, lengthupper = splitarray[2]
    subframe = frame[int(widthlower*width):int(widthupper*width), int(lengthlower*length):int(lengthupper*length)]
    # plt.imshow(subframe)
    # plt.show()
    first_T, second_T, neg_T, third_T = processImage(subframe,"./E2.png","T")
    first_T = cv2.resize(first_T, (32, 32))
    second_T = cv2.resize(second_T, (32, 32))
    third_T = cv2.resize(third_T, (32, 32))

    # Ionization vacuum gauge
    widthlower, widthupper, lengthlower, lengthupper = splitarray[3]
    subframe = frame[int(widthlower*width):int(widthupper*width), int(lengthlower*length):int(lengthupper*length)]
    # plt.imshow(subframe)
    # plt.show()
    first_I, second_I, neg_I, third_I = processImage(subframe,"./E3.png","I")
    first_I = cv2.resize(first_I, (32, 32))
    second_I = cv2.resize(second_I, (32, 32))
    third_I = cv2.resize(third_I, (32, 32))

    # if savetodisk: #just for debugging, not really useful
    #     test_path = './frames/1/'
    #     first_path = test_path + str(cnt) + '-1.png'
    #     second_path = test_path + str(cnt) + '-2.png'
    #     third_path = test_path + str(cnt) + '-3.png'
    #     os.makedirs(name=test_path, exist_ok=True)
    #     cv2.imwrite(first_path, first)
    #     cv2.imwrite(second_path, second)
    #     cv2.imwrite(third_path, third)
    return first_R, second_R, neg_R, third_R, first_Tm, second_Tm, third_Tm, first_T, second_T, neg_T, third_T, first_I, second_I, third_I

def readNumber(net, sheet, show_img = False, show_result = False, test_path1 = './frames/1/', readfromdisk = False, 
                imageslist_R = None, imageslist_Tm = None, imageslist_T = None, imageslist_I = None):#test three images
    if readfromdisk:
        real_test = MyDataset_fromdisk(dirpath=test_path1, transform=custom_transform)
        real_testloader = torch.utils.data.DataLoader(real_test,
            batch_size = 3,
            shuffle = False)
    else:
        if imageslist_R is None or imageslist_Tm is None or imageslist_T is None or imageslist_I is None:
            raise Exception('At least one imageslist not found.')
        # R
        real_test_R = MyDataset_notfromdisk(imglist=imageslist_R, transform=custom_transform)
        real_testloader_R = torch.utils.data.DataLoader(real_test_R,
                                                    batch_size = 3,
                                                    shuffle = False)
        for i, batch in enumerate(real_testloader_R, 1):
            images = batch[0]
            # if show_img:
            # plt.imshow(images[0].permute(1, 2, 0))
            # plt.show()
            # plt.imshow(images[1].permute(1, 2, 0))
            # plt.show()
            # plt.imshow(images[2].permute(1, 2, 0))
            # plt.show()

            
            outputs = net(images)
            sm = nn.Softmax(dim=1)      
            sm_outputs = sm(outputs)
            probs, index = torch.max(sm_outputs, dim=1)
            first = eval(classes[index[0]])
            second = eval(classes[index[1]])
            third = eval(classes[index[2]])
            # print("first=",first,",second=",second,",third=",third)
            result = (first + second*0.1)*(10**third)
            sheet.write(i, 1, result)
            sheet.write(i, 0, i/10) #'result_cnt/10' means time(units of sec)
            if show_result:
                print(result, i/10) #for debug
        
        # Tm
        real_test_Tm = MyDataset_notfromdisk(imglist=imageslist_Tm, transform=custom_transform)
        real_testloader_Tm = torch.utils.data.DataLoader(real_test_Tm,
                                                    batch_size = 3,
                                                    shuffle = False)
        for i, batch in enumerate(real_testloader_Tm, 1):
            images = batch[0]

            # plt.imshow(images[0].permute(1, 2, 0))
            # plt.show()
            # plt.imshow(images[1].permute(1, 2, 0))
            # plt.show()
            # plt.imshow(images[2].permute(1, 2, 0))
            # plt.show()
            # cv2.imwrite("./2.png",np.asarray(images[1]))            

            outputs = net(images)
            sm = nn.Softmax(dim=1)      
            sm_outputs = sm(outputs)
            probs, index = torch.max(sm_outputs, dim=1)
            first = eval(classes[index[0]])
            second = eval(classes[index[1]])
            third = eval(classes[index[2]])
            print("Tm: first=",first," second=",second, " third=", third)
            result = first*100 + second*10 + third
            sheet.write(i, 2, result)
    
        # T
        real_test_T = MyDataset_notfromdisk(imglist=imageslist_T, transform=custom_transform)
        real_testloader_T = torch.utils.data.DataLoader(real_test_T,
                                                    batch_size = 3,
                                                    shuffle = False)
        for i, batch in enumerate(real_testloader_T, 1):
            images = batch[0]

            # plt.imshow(images[0][0])
            # plt.show()
            # plt.imshow(images[1][0])
            # plt.show()
            # plt.imshow(images[2][0])
            # plt.show()
            # cv2.imwrite("./3.png",np.asarray(images[2]))
            
            outputs = net(images)
            sm = nn.Softmax(dim=1)      
            sm_outputs = sm(outputs)
            probs, index = torch.max(sm_outputs, dim=1)
            first = eval(classes[index[0]])
            second = eval(classes[index[1]])
            third = eval(classes[index[2]])
            result = (first + second*0.1)*(10**third)
            sheet.write(i, 3, result)

        # I
        real_test_I = MyDataset_notfromdisk(imglist=imageslist_I, transform=custom_transform)
        real_testloader_I = torch.utils.data.DataLoader(real_test_I,
                                                    batch_size = 3,
                                                    shuffle = False)
        for i, batch in enumerate(real_testloader_I, 1):
            images = batch[0]
            # print(np.shape(images[0]))
            # plt.imshow(images[0].permute(1, 2, 0))
            # plt.show()
            # plt.imshow(images[1].permute(1, 2, 0))
            # plt.show()
            # plt.imshow(images[2].permute(1, 2, 0))
            # plt.show()
            # cv2.imwrite("./3.png",np.asarray(images[2]))
            
            outputs = net(images)
            sm = nn.Softmax(dim=1)      
            sm_outputs = sm(outputs)
            probs, index = torch.max(sm_outputs, dim=1)
            first = eval(classes[index[0]])
            second = eval(classes[index[1]])
            third = eval(classes[index[2]])
            # print("I: first=",first," second=",second, " third=", third)
            result = (first + second*0.1)*(10**third)
            sheet.write(i, 4, result)

In [None]:
def clearDir(path, debug=False):# One layer delete
    if os.path.exists(path):
        for i, img in enumerate(os.listdir(path), 1):
            os.remove(path+img)
            if i%1000 == 0 and debug == True:
                print("%d files have been deleted"%(i))
        if debug == True:
            print("All files deleted.")

In [None]:
#assist functions of `ReadExpData()`
def loadNet(netpath):
    net = Network()
    net.load_state_dict(torch.load(netpath))
    return net

def openVideo(videoname):
    video = cv2.VideoCapture("./" + videoname)
    #check if the video exists
    if not video.isOpened():
        raise Exception("Video cannot be opened.")
    return video

def showVideoInfo(videoname, video):
    #show some info of video
    print("Video: " + videoname)
    print("Total frams: " + str(video.get(7)))
    print("FPS: " + str(video.get(5)))
    #if sampling_rate = 10, it means every 0.1s we capture a photo
    sampling_rate = 10
    print("Sampling_rate: " + str(sampling_rate) + "Hz") 
    # timeF: we pick one frame every 'timeF' frames.
    # Here we pick one frame every 5 frames. 
    timeF = video.get(5) / sampling_rate 
    print("Frams needed to be extracted:" + str(video.get(7)/timeF))
    return sampling_rate, timeF, video.get(7)

def initExcelTable():
    #create and initialize an Excel table
    wb = Workbook()
    sheet1 = wb.add_sheet('Sheet 1')
    sheet1.write(0, 0, "Time/s")
    sheet1.write(0, 1, "Resistance vacuum gauge/Pa")
    sheet1.write(0, 2, "Temperature/C")
    sheet1.write(0, 3, "Thermocouple vacuum gauge/Pa")
    sheet1.write(0, 4, "Ionization vacuum gauge/Pa")
    return wb, sheet1

def saveResult(wb, videoname):
    if os.path.exists(videoname+"_result/"):
        clearDir(videoname+"_result/")
        os.removedirs(videoname+"_result/")    
    os.makedirs(videoname+"_result/")
    wb.save(videoname+"_result/"+'result.xls')
    
def showProgress(cur, tot):
    print("\r", end="")
    print("progress: {:.1f}%".format(cur/tot*100), end="")
    sys.stdout.flush()

In [None]:
def readExpData(_videoname, _netpath = './result.pth', intermediate_img_folder = './frames/', savetodisk = False, splitarray = None):
    # load the trained convolution network
    net = loadNet(_netpath)
    for videoname in _videoname:
        if os.path.exists(intermediate_img_folder):
            clearDir(intermediate_img_folder + '1/')
        video = openVideo(videoname)
        # timeF: we pick one frame every 'timeF' frames.
        # Here we pick one frame every 5 frames. 
        _, timeF, totalframes = showVideoInfo(videoname, video) 
        wb, sheet1 = initExcelTable()
        rval = True
        frame_cnt = 1
        print("Splitting the image...")
        imageslist_R = []
        imageslist_Tm = []
        imageslist_T = []
        imageslist_I = []
        cnt = 0
        while rval: 
            # Keep reading frames until rval=False(that is, end of file)
            rval, frame = video.read()
            if (frame_cnt % timeF == 0 and rval): 
                # take down the data
                first_R, second_R, third_R, first_Tm, second_Tm, third_Tm, first_T, second_T, third_T, first_I, second_I, third_I = splitSave(
                    frame=frame, cnt=frame_cnt//timeF, savetodisk=savetodisk, splitarray=splitarray)
                if savetodisk == False:
                    # 一个subframelist中的元素长这样：(first,second,third)，可能有若干个
                    imageslist_R.append((first_R, second_R, third_R))
                    imageslist_Tm.append((first_Tm, second_Tm, third_Tm))
                    imageslist_T.append((first_T, second_T, third_T))
                    imageslist_I.append((first_I, second_I, third_I))
                    cnt += 1
                    if cnt==100:
                        break
                    # if len(imageslist) % 10 == 0:
                    #     print(len(imageslist))
                    # if len(imageslist) == 30:
                    #     break
            frame_cnt += 1
            showProgress(frame_cnt, totalframes)
            
        #save the excel table
        print("\nReading the number...    ", end="")
        readNumber(net=net, sheet=sheet1, readfromdisk=savetodisk, imageslist_R=imageslist_R, 
                    imageslist_Tm=imageslist_Tm, imageslist_T=imageslist_T, imageslist_I=imageslist_I)
        saveResult(wb, videoname)
        print("Done!\n")

####Main function
- parameters:
  - _videoname: a list contains all names of video you want to extract experiment data. Note that all video **should be at the same directory as the program.**
  - _netpath: the name of file of the network you previously trained.

In [None]:
#Main function: readExpData()
#for each video, you may change the variable 'videoname'
readExpData(_videoname = ["00010.MTS"], _netpath="./result_bsize=30, epoch=50.pth", savetodisk=False,
    splitarray=[(0, 1/2, 0, 1/3), (0.4, 0.99999, 0, 0.5), (0, 0.5, 0.4, 0.8), (0, 0.5, 0.7, 0.99999)])