In [None]:
# plot imports
import matplotlib.pyplot as plt
from lc_tda.plot import rcparams, format_axis_im

# other imports
import cv2
import glob
import pickle
import numpy as np
from tqdm import tqdm
from scipy.ndimage import zoom

# format figures
rcparams(1)

# define color cycle
COLOR = ['#515151', '#df5048', '#3370d8', '#5baa71',
         '#a87bd8', '#c49b33', '#5bc8ca', '#76504f',
         '#8e8c2b', '#ea6f2d', '#7099c8', '#80b537']


In [None]:
files = glob.glob(
    '/Volumes/Samsung_T5/Backup_20201218/data/lc_video/o3cl2/*/*.MP4')
files += glob.glob('/Volumes/Samsung_T5/Backup_20201218/data/lc_video/o3cl2/*/*.mp4')
files.sort()
print(len(files))


### 1. Load Endpoint Image


In [1]:
def extract_endpoint(file):
    # locate the relative humidity from file name
    conc = file.split('/')[-2].split('_')
    conc_o3 = float(conc[0])
    conc_cl2 = float(conc[1])

    # capture the video
    cap = cv2.VideoCapture(file)

    # the length of the video
    length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    # normally it reads the last frame
    # but sometimes, the last few frames are blank
    # so I use a while loop to locate the last frame
    flag = True
    j = 1
    while flag:
        try:
            cap.set(cv2.CAP_PROP_POS_FRAMES, length - j)
            res, frame = cap.read()
            # convert bgr to rgb
            img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            flag = False
        except:
            j += 1

    nx, ny, nc = img.shape
    max_length = int((length - j) / 30)

    vid = np.zeros(shape=(150, nx, ny, nc))

    for i in range(150):
        if i < max_length:
            cap.set(cv2.CAP_PROP_POS_FRAMES, i * 30)

            res, frame = cap.read()

            vid_ = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

            vid[i, ...] = vid_
        else:
            vid[i, ...] = vid_

    return vid, conc_o3, conc_cl2, img


In [None]:
def plot_end_frame():
    fig, ax = plt.subplots(nrows=11, ncols=5, figsize=(12, 16.5))
    ax = ax.ravel()
    for i in range(55):
        if i < len(files):
            img, conc_o3, conc_cl2 = extract_endpoint(files[i])
            ax[i].imshow(img)
            ax[i].set_title(f'{i}-{conc_o3:0.1f}-{conc_cl2:0.1f}', fontsize=12)
        format_axis_im(ax[i])
        ax[i].axis('off')
    plt.savefig('endpoint.png', bbox_inches='tight', dpi=300)
    plt.close()


plot_end_frame()


### 2. Locate Box

In [2]:
def preprocess(vid, d=100):
    """
    A function to crop and normalize images.
    Args:
        img: numpy array, image
        d: int, side length

    Returns:
        img: numpy array, processed image
    """
    dt, dx, dy, _ = vid.shape

    hx = int(dx / 2)
    hy = int(dy / 2)
    hd = int(d / 2)

    # select the center
    vid = vid[:, hx - hd:hx + hd, hy - hd:hy + hd, :]

    # normalize the images
    vid = vid / 255.0

    vid = zoom(vid, zoom=(1, 50/d, 50/d, 1))

    return vid


def crop(vid, dmin=130, dmax=150, normalize=True, img_final=None):
    """
    A function to crop the boxes from the frame.
    Args:
        img: numpy array, image

    Returns:
        box: list, contains cropped box images
    """
    # convert to grayscale
    if normalize:
        vid = vid / np.max(vid) * 255
    vid = vid.astype(np.uint8)

    if img_final is None:
        img = vid[-1, ...]
    else:
        img = img_final

    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)

    # define threshold
    thresh = cv2.threshold(
        gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]

    # find contours
    contours, hierarchy = cv2.findContours(
        thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)

    box = []
    for c in contours:
        # get the bounding rect
        x, y, w, h = cv2.boundingRect(c)

        # only if the box width and height in correct range
        if (w > dmin) and (w < dmax) and (h > dmin) and (h < dmax):
            box.append(vid[:, y:y + h, x:x + w, :])

    return box


def select_best(box, best_k=50, d=100):
    """
    A function to select the best k images for each relative humidity
    Args:
        box: list, contains cropped box images

    Returns:
        box: list, contains the best k cropped box images
    """
    # number of empty pixels
    box_emp = [len(np.where(box[i][-1, ...].mean(axis=-1) <= 45)[0])
               for i in range(len(box))]

    # select the images with less empty pixels (bad images)
    box_ind = np.argsort(box_emp)[:best_k]
    box = [preprocess(box[i], d=d) for i in box_ind]
    return box


### 3. Crop Videos

In [None]:
def video_data(best_k=50, plot=False):
    conc = np.array([files[i].split('/')[-2] for i in range(len(files))])

    conc_unique = np.unique(conc)

    x = []
    y = []
    for conc_unique_ in conc_unique:
        idx = np.where(conc == conc_unique_)[0]

        # extract endpoint frame
        max_int = []
        box = []
        for i in idx:
            vid, conc_o3, conc_cl2, _ = extract_endpoint(files[i])
            # append box images for each frame
            box_ = crop(vid)
            box += box_
        x += select_best(box, best_k)

        conc_o3 = np.ones((best_k, 1)) * conc_o3
        conc_cl2 = np.ones((best_k, 1)) * conc_cl2
        conc_comb = np.concatenate((conc_o3, conc_cl2), axis=-1)

        y.append(conc_comb)

        if plot:
            fig, ax = plt.subplots(nrows=5, ncols=10, figsize=(10, 5))
            ax = ax.ravel()
            for j in range(best_k):
                ax[j].imshow(x[j, -1, ...])
                ax[j].axis('off')
            plt.savefig(f'endpoint_{conc_unique_}.png',
                        dpi=300, bbox_inches='tight')
            plt.close()
    x = np.array(x)

    y = np.concatenate(y, axis=0)

    with open('../data/o3cl2/video.pickle', 'wb') as handle:
        pickle.dump(x, handle)
        pickle.dump(y, handle)
    print(f'x: {x.shape}, y: {y.shape}')


In [None]:
video_data()