# Train the sequence model on raw data

### Embed arts and shadings

1. train the embed model with custom dataset

In [None]:
# we requires latest version of pytorch and torchvision
!pip install torch==1.9.0
!pip install torchvision==0.10.0

In [None]:
# sanity check: how the transformed image looks like
from dataset.rawdata import ImageDataset
import matplotlib.pyplot as plt
from torchvision import transforms
transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize((100, 100))
    ])
data = ImageDataset(transform, '../data/asset/art', '../data/asset/shading')
plt.imshow(data[0].permute((1, 2, 0)))
plt.show()

In [None]:
# compare with how the original picture looks like
from skimage import io
img0 = io.imread(data.img_paths[0])
plt.imshow(img0[:,:,:3]) # remove alpha channel
plt.show()

In [None]:
#!python embedding/embedding_main.py --data-set raw --lr 0.001 --epochs 400

2. load encoder

In [None]:
import torch
from embedding.models.embed_model import ConvEncoderDecoder

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
convcoder = ConvEncoderDecoder().to(device)
convcoder.load_state_dict(torch.load('output/convcoder_raw.pt', map_location=device))

3. get embeddings of foreground images and background images

In [None]:
import glob
from skimage import io
from torchvision import transforms 

transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Resize((100, 100))
    ])

def encode(image, convcoder):
    image = transform(image)
    image = image.unsqueeze(dim=0).to(device)
    with torch.no_grad():
        emb = convcoder.encoder(image)
        emb = torch.flatten(emb, start_dim=1, end_dim=-1)
        emb = convcoder.embedder(emb)
        emb = emb.cpu()
    return emb

def get_embeddings(image_dir, encoder):
    '''
    Parameters:
    image_dir: str
    encoder: nn.Module

    Returns:
    a dict of {image_number:embedding} pair
    '''
    
    image_paths = glob.glob(image_dir + '/*.png')
    assert len(image_paths) != 0, 'No png image found'
    # TODO: use scikitimage to read image to keep rgb order
    images = {int(path.split('/')[-1].split('.')[0]): io.imread(path) for path in image_paths}
    encoded_images = {image_number: encode(image[:,:,:3], encoder)[0] for image_number, image in images.items()}
    return encoded_images

In [None]:
fg_embs = get_embeddings('../data/asset/art', convcoder)
bg_embs = get_embeddings('../data/asset/shading', convcoder)

## Load data for training

A backup: 1st verison definition of `build_raw_data`.
```python
def build_raw_data(bg_embs, fg_embs, img_manifest_path):
    with open(img_manifest_path) as f:
        img_list = [json.loads(line.strip()) for line in f.readlines()]
    training_data = []
    for img_info in img_list:
        # record last x and y for calculating relative position
        x_last = int(img_info['fg'][0][2])
        y_last = int(img_info['fg'][0][3])
        fg_reps = []
        for fg in img_info['fg']:
            number, _, x, y, scale, rotate, opaque = fg
            # convert rotate in [0, 360] to [0, 1]
            rotate /= 360
            fg_emb = fg_embs[number]
            x_rel, y_rel = x - x_last, y - y_last
            x_last, y_last = x, y
            # normalize x_rel and y_rel to [-1, 1]
            fg_meta = torch.tensor([*normalize_relative_xy(x_rel, y_rel), scale, rotate, opaque])
            fg_reps.append(torch.cat((fg_emb, fg_meta)))
        fg_reps = torch.stack(fg_reps)
        fg_reps = fg_reps.unsqueeze(dim=1) # (steps, mb_size=1, fg_emb_dim+5), 1 is the batch_size
        # normalize class label from [-1, 1] to [0, 1] (TODO: -1 and 0 are both 0)
        cls_label = 0 if int(img_info['flag']) < 0 else 1
        cls_label = torch.tensor(cls_label, dtype=torch.long).unsqueeze(dim=0) # (mb_size=1, fg_emb_dim)
        bk_emb = bg_embs[img_info['bg']].unsqueeze(dim=0)
        training_data.append((fg_reps, bk_emb, cls_label))
    return training_data

```

In [None]:
import json
from collections import defaultdict

def truncate(x, lower, upper):
    if x < lower:
        return lower
    if x > upper:
        return upper
    return x

def normalize_relative_xy(x, y):
    '''
    normalize x, y from [-402, 402] and [-600, 600] to [-1, 1]
    magic number comes from inspection of raw data
    '''
    x = truncate(x/402, -1, 1)
    y = truncate(y/600, -1, 1)
    return x, y

def rescale_fgs(fgs):
    '''
    Make raw foreground meta data compatible with model
    x, y: convert to relative to last position, scale to [-1, 1]
    rotate: convert from [0, 360] to [0, 1]
    
    Parameters:
    fgs: a list of (number, rank, x, y, scale, rotate, opaque)
    '''
    rescaled_fgs = []
    for i, fg in enumerate(fgs):
        number, rank, x, y, scale, rotate, opaque = fg
        if i == 0:
            x_last, y_last = x, y
        # convert rotate in [0, 360] to [0, 1]
        rescaled_rotate = rotate / 360
        x_rel, y_rel = x - x_last, y - y_last
        # normalize x_rel and y_rel to [-1, 1]
        x_rel, y_rel = normalize_relative_xy(x_rel, y_rel)
        # update last records
        x_last, y_last = x, y
        rescaled_fgs.append((number, rank, x_rel, y_rel, scale, rescaled_rotate, opaque))
    return rescaled_fgs
    

def build_raw_data(bg_embs, fg_embs, img_list, batch_size):
    '''
    Generate training data with a meta file and corresponding embeddings
    
    Parameters:
    bg_embs: dict 
        background embedding dictionary. key: number; entry: tensor of shape (emb_size, )
    fg_embs: dict
        foreground embedding dictionary. key: number; entry: tensor of shape (emb_size, )
    img_manifest_path: str
    batch_size: int
    
    Returns:
    data_batch: list of (in_seqs, bk_embs, cls_labels) tuples
        in_seqs: torch.Tensor of (seq_len, batch_size, fg_emb_size + 5)
        bk_embs: torch.Tensor of (batch_size, bk_emb_size)
        cls_labels: torch.Tensor of (batch_size)
    '''
    # group image by foreground art sequence length
    img_group = defaultdict(list)
    for img_info in img_list:
        img_group[len(img_info['fg'])].append(img_info)
    
    training_data = []
    # process data according to their different foreground sequence length
    for fg_len, imgs in img_group.items():
        # create mini-batch for data of a certain foreground sequence length
        for i in range(0, len(imgs), batch_size):
            fg_reps_batch = []
            cls_label_batch = []
            bk_emb_batch = []
            for img_info in imgs[i:i+batch_size]:
                # record last x and y for calculating relative position
                x_last = int(img_info['fg'][0][2])
                y_last = int(img_info['fg'][0][3])
                fg_reps = []
                for fg in img_info['fg']:
                    number, _, x, y, scale, rotate, opaque = fg
                    # convert rotate in [0, 360] to [0, 1]
                    rotate /= 360
                    fg_emb = fg_embs[number]
                    x_rel, y_rel = x - x_last, y - y_last
                    x_last, y_last = x, y
                    # normalize x_rel and y_rel to [-1, 1]
                    fg_meta = torch.tensor([*normalize_relative_xy(x_rel, y_rel), scale, rotate, opaque])
                    fg_reps.append(torch.cat((fg_emb, fg_meta)))
                fg_reps = torch.stack(fg_reps)
                fg_reps = fg_reps.unsqueeze(dim=1) # (steps, mb_size=1, fg_emb_dim+5), 1 is the batch_size
                # covert class label from [-1, 1] to [0, 2] (0 bad, 1 neutral, 2 good)
                cls_label = int(img_info['flag']) + 1
                cls_label = torch.tensor(cls_label, dtype=torch.long).unsqueeze(dim=0) # (mb_size=1, 1)
                bk_emb = bg_embs[img_info['bg']].unsqueeze(dim=0)
                fg_reps_batch.append(fg_reps)
                cls_label_batch.append(cls_label)
                bk_emb_batch.append(bk_emb)
            fg_reps_batch = torch.cat(fg_reps_batch, dim=1)
            cls_label_batch = torch.cat(cls_label_batch, dim=0)
            bk_emb_batch = torch.cat(bk_emb_batch, dim=0)
            training_data.append((fg_reps_batch, bk_emb_batch, cls_label_batch))
    return training_data

In [None]:
with open('../data/data.txt') as f:
    img_manifest = [json.loads(line.strip()) for line in f.readlines()]

run this cell to split data into train, dev and test data

In [None]:
from sklearn.model_selection import train_test_split

train_manifest, test_manifest = train_test_split(img_manifest, test_size=0.15)

In [None]:
train_data = build_raw_data(bg_embs, fg_embs, train_manifest, 128)
test_data = build_raw_data(bg_embs, fg_embs, test_manifest, 128)

## Train the model

0. define hyper parameters

In [None]:
args = dict()
args['seq_in_dim'] = 581
args['input_hid_size'] = 576
args['hid_dim'] = 256
args['num_layers'] = 4
args['lr'] = 0.001
args['wd'] = 1e-6
args['epochs'] = 800

1. examine training data

In [None]:
print("Input sequence has shape {}".format(train_data[0][0].shape))
print("Background embedding has shape {}".format(train_data[0][1].shape))
print("Class labels has shape {}".format(train_data[0][2].shape))

2. define models(s)

In [None]:
from sequence.models.seq_model import sequence_model, seq_loss_fn

seq_model = sequence_model(input_size=args['seq_in_dim'],
                           input_hid_size=args['input_hid_size'],
                           hidden_size=args['hid_dim'],
                           num_layers=args['num_layers'])
seq_model = seq_model.to(device)

3. define loss and optimizer
   
   will direct use the customized loss function.

In [None]:
optim = torch.optim.Adam(seq_model.parameters(), lr=args['lr'], weight_decay=args['wd'])

    define tensorboard writer

In [None]:
from datetime import datetime
import os
from torch.utils.tensorboard import SummaryWriter

now = datetime.today()
dt= now.strftime("%m_%d_%H_%M")
writer = SummaryWriter(os.path.join('./runs', dt))
writer.add_text('Parameters', str(args))

4. training loop

In [None]:
def test(test_data, seq_model):
    total_loss = 0
    total_samples = 0
    seq_model.eval()
    with torch.no_grad():
        for i, (in_seqs, bk_embs, cls_labels) in enumerate(test_data):
            in_seqs = in_seqs.to(device)
            bk_embs = bk_embs.to(device)
            cls_labels = cls_labels.to(device)

            h_0 = torch.stack([bk_embs for _ in range(args['num_layers'])]).to(device)

            c_0 = torch.zeros_like(h_0).to(device)

            out_seqs_logits, cls_logits = seq_model(in_seqs[:-1,], (h_0, c_0), return_last_hidden=False)

            loss, seq_loss, cls_loss = seq_loss_fn(out_seqs_logits, in_seqs[1:,], cls_logits, cls_labels, alpha=0.2, return_details=True)
            total_loss += loss.item()
            total_samples += len(cls_labels)
    return total_loss / total_samples

In [None]:
for epoch in range(args['epochs']):
    seq_model.train()
    total_loss = 0
    total_samples = 0
    for i, (in_seqs, bk_embs, cls_labels) in enumerate(train_data):
        in_seqs = in_seqs.to(device)
        bk_embs = bk_embs.to(device)
        cls_labels = cls_labels.to(device)

        h_0 = torch.stack([bk_embs for _ in range(args['num_layers'])]).to(device)
        # print("Hidden 0 shape {}".format(h_0.shape))

        c_0 = torch.zeros_like(h_0).to(device)

        out_seqs_logits, cls_logits = seq_model(in_seqs[:-1,], (h_0, c_0), return_last_hidden=False)
        
        loss, seq_loss, cls_loss = seq_loss_fn(out_seqs_logits, in_seqs[1:,], cls_logits, cls_labels, alpha=0.2, return_details=True)
        total_loss += loss.item()
        total_samples += len(cls_labels)
        writer.add_scalar('batch/train_loss', loss.item()/len(cls_labels), global_step=epoch*len(train_data)+i)
        writer.add_scalar('batch/seq_loss', seq_loss.item()/len(cls_labels), global_step=epoch*len(train_data)+i)
        writer.add_scalar('batch/cls_loss', cls_loss.item()/len(cls_labels), global_step=epoch*len(train_data)+i)
        
        optim.zero_grad()
        loss.backward()
        optim.step()
    
    test_loss = test(test_data, seq_model)
    writer.add_scalar('epoch/train_loss', total_loss/total_samples, global_step=epoch)
    writer.add_scalar('epoch/test_loss', test_loss, global_step=epoch)
    print("In epoch: {:03d} | loss: {:.6f}, test_loss: {:.6f}".format(epoch, total_loss/total_samples, test_loss))

## Generate Images

Define rate function for a generated sequence

In [None]:
def rate_seq(bg, fgs, seq_model, bg_embs, fg_embs):
    '''
    Rate a given background and foreground sequence.
    
    Parameters:
    bg (int): background sequence number
    fgs (list): a list of foreground meta data (number, rank, x, y, scale, rotate, opaque)
    seq_model (nn.Module): the trained model for rating
    bg_embs (dict): a {number:emb} dict for background
    fg_embs (dict): a {number:emb} dict for foreground
    '''
    # convert (x, y) to relative positions; scale rotate by 1/360
    rescaled_fgs = rescale_fgs(fgs)
    # construct an art sequence with fgs
    in_seqs = [torch.cat((fg_embs[fg[0]], torch.tensor(fg[-5:]))) for fg in rescaled_fgs]
    in_seqs = torch.stack(in_seqs).unsqueeze(dim=1)
    # unsqueeze dim 0 to imitate a batch behavior
    bg_emb = bg_embs[bg].unsqueeze(dim=0)
    h_0 = torch.stack([bg_emb]*args['num_layers'])
    c_0 = torch.zeros_like(h_0)
    
    in_seqs = in_seqs.to(device)
    h_0 = h_0.to(device)
    c_0 = c_0.to(device)
    with torch.no_grad():
        _, cls_logits = seq_model(in_seqs, (h_0, c_0))
    
    return torch.argmax(cls_logits).item()

In [None]:
rate_seq(init_bg, fgs, seq_model, bg_embs, fg_embs)

Define metrics for comparing foreground embeddings.

In [None]:
def mse(A, B):
    return ((A - B)**2).mean()

def find_closest(emb, emb_dict):
    diffs = {}
    for k, v in emb_dict.items():
        diffs[k] = mse(emb, v)
    min_k = min(diffs, key=diffs.get)
    return min_k

Helper function 1: Randomly generate an initial background and an inital foreground, along with foreground's meta data.

In [None]:
import random

def generate_random_inits():
    init_bg = random.randint(1, 24) # shading is in [1, 24]
    fg_seq_len = random.choice(range(15, 24))
    init_fg = random.randint(2, 21) # art is in [2, 21]
    init_fg_x = random.randint(10, 300)
    init_fg_y = random.randint(10, 300)
    init_fg_scale = random.uniform(0.9, 1.1)
    init_fg_rotate = random.randint(0, 359)
    init_fg_opaque = 1
    init_fg_meta = (init_fg,0, init_fg_x,init_fg_y,init_fg_scale,init_fg_rotate,init_fg_opaque)
    return init_bg, fg_seq_len, init_fg_meta

Helper function 2: Convert initial background and foreground to suitable input for the model.

In [None]:
def generate_init_model_input(init_bg, init_fg_meta, bg_embs, fg_embs):
    fg_emb_0 = torch.cat((fg_embs[init_fg_meta[0]], torch.tensor([0, 0, init_fg_meta[4], init_fg_meta[5]/360, init_fg_meta[6]])))
    fg_emb_0 = fg_emb_0.unsqueeze(dim=0).unsqueeze(dim=0).to(device)

    bk_emb = bg_embs[init_bg]
    h_0 = torch.stack([bk_emb for _ in range(args['num_layers'])]).unsqueeze(dim=1).to(device)
    c_0 = torch.zeros_like(h_0).to(device)

    # print(f"h shape:{h_0.shape}; fg_emb.shape:{fg_emb_0.shape}; bk_emb.shape:{bk_emb.shape}")
    
    return h_0, c_0, fg_emb_0

Helper function 3: Generate a sequence of foreground with given background embedding and initial hidden states.

In [None]:
def generate_fg_seqs(init_bg, fg_seq_len, init_fg_meta, bg_embs, fg_embs):
    h, c, fg_emb = generate_init_model_input(init_bg, init_fg_meta, bg_embs, fg_embs)
    fgs = [init_fg_meta]
    with torch.no_grad():
        for i in range(1, fg_seq_len):
        # when i is 1, use intact model
            if i == 1:
                seqs_logits, cls_logits, (h, c) = seq_model(fg_emb, (h, c), return_last_hidden=True)
            # when i is not 1, use parts of the model to directly feed h and c to rnn
            else:
                output_seqs, (h, c) = seq_model.rnn_model(fg_emb, (h, c))
                seqs_logits = seq_model.seq_transformer(output_seqs)
            # process sequence logits to fit corresponding value scales
            seqs_logits[:, :, -5:-2] = torch.sigmoid(seqs_logits[:, :, -5:-2])
            seqs_logits[:, :, -2:] = torch.tanh(seqs_logits[:, :, -2:])
            fg_emb = seqs_logits[0,0,:576] # next fg embedding
            fg_meta = seqs_logits[0,0,576:] # (x, y, scale, angle, opaque)
            fg_name = find_closest(fg_emb.detach().cpu(), fg_embs)
            fg_emb = fg_embs[fg_name].to(device)
            fg_emb = torch.cat((fg_emb, fg_meta))
            fg_emb = fg_emb.unsqueeze(dim=0).unsqueeze(dim=0).to(device)
            # again, magic numbers come from inspection of data
            fgs.append((fg_name, i, int(fgs[-1][2]+fg_meta[0].item()*402), int(fgs[-1][3]+fg_meta[1].item()*600), \
                        fg_meta[2].item(), int(fg_meta[3]*360), fg_meta[4].item()))
    return fgs

Finally, generate a image within just one function.

In [None]:
def generate_random_image(bg_embs, fg_embs):
    init_bg, fg_seq_len, init_fg_meta = generate_random_inits()
    init_fg_x,init_fg_y,init_fg_scale,init_fg_rotate,init_fg_opaque = init_fg_meta[-5:]
    print({'bg':init_bg, 'fg_seq_len':fg_seq_len, 'init_fg':{'(x, y)':(init_fg_x, init_fg_y), 'scale':init_fg_scale, \
           'rotate': init_fg_rotate, 'opaque': init_fg_opaque}})
    fgs = generate_fg_seqs(init_bg, fg_seq_len, init_fg_meta, bg_embs, fg_embs)
    return init_bg, fgs

In [None]:
init_bg, fgs = generate_random_image(bg_embs, fg_embs)

Define image transformation functions

In [None]:
import numpy as np

def tile_image(mat, rows, cols):
    '''
    Tile mat vertically rows times and horizontally cols times
    '''
    tiled_image = np.tile(mat, (rows, cols, 1))
    return tiled_image

def vanilla_rotate(mat, angle):
    '''
    Rotate mat clockwise angle degrees. This operation keeps sizes and scales, which means 
    there will be information loss, i.e. corners of original image.
    
    mat: numpy.ndarray,(h, w, c)
        Matrix to rotate.
    angle: int
        Natural number. It will be moded into [0, 360) 
    '''
    rows,cols = mat.shape[:2]
    # cols-1 and rows-1 are the coordinate limits.
    M = cv2.getRotationMatrix2D(((cols-1)/2.0,(rows-1)/2.0),angle,1)
    rotated = cv2.warpAffine(mat, M, (cols,rows))
    return rotated

def scale_image(mat, scale):
    scaled_mat = cv2.resize(mat,None,fx=scale, fy=scale, interpolation = cv2.INTER_CUBIC)
    return scaled_mat

def transform_image(mat, scale, angle):
    '''
    scale and rotate image in one step
    '''
    scaled_mat = scale_image(mat, scale)
    rotated_mat = vanilla_rotate(scaled_mat, angle)
    return rotated_mat

def overlay_transparent(background, overlay, x, y):
    '''
    Overlay top left coner of 'overlay' onto background at (x, y).
    x, y are expected to be integers.
    '''

    background_width = background.shape[1]
    background_height = background.shape[0]
    h, w = overlay.shape[0], overlay.shape[1]

    # when overlay is totally to the right or bottom of background
    if x >= background_width or y >= background_height:
        return background      
    # when overlay is totally to the left or top of background
    if x + w <= 0 or y + h <= 0:
        return background

    if x + w > background_width:
        w = background_width - x
        overlay = overlay[:, :w]   # truncate overlay's width right

    if y + h > background_height:
        h = background_height - y
        overlay = overlay[:h]      # truncate overlay's height bottom

    if x < 0:
        w = x + w
        overlay = overlay[:, -w:] # truncate overlay's width left
    
    if y < 0:
        h = y + h
        overlay = overlay[-h:]    # truncate overlay's height top

    if overlay.shape[2] < 4:
        overlay = np.concatenate(
            [
                overlay,
                np.ones((overlay.shape[0], overlay.shape[1], 1), dtype = overlay.dtype) * 255
            ],
            axis = 2,
        )

    overlay_image = overlay[..., :4]
    mask = overlay[..., 3:] / 255.0

    y = max(y, 0)
    x = max(x, 0)
    background[y:y+h, x:x+w] = (1.0 - mask) * background[y:y+h, x:x+w] + mask * overlay_image

    return background

Helper functions for image generation:

In [None]:
from PIL import ImageColor

def hex2rgb(hex_str):
    return ImageColor.getcolor(hex_str, "RGB")

def create_blank(width, height, rgb_color=(0, 0, 0)):
    """Create new image(numpy array) filled with certain color in RGB"""
    # Create black blank image
    image = np.zeros((height, width, 4), np.uint8)

    # Since OpenCV uses BGR, convert the color first
    color = tuple((*reversed(rgb_color), 255))
    # Fill image with color
    image[:] = color

    return image

In [None]:
def get_fg_path(idx):
    return f'../data/asset/art/{idx}.png'

def get_bg_path(idx):
    return f'../data/asset/shading/{idx}.png'

with open('../data/data.txt') as f:
    img_list = [json.loads(line.strip()) for line in f.readlines()]

In [None]:
import cv2
import numpy as np

img_info = img_list[0]

bg_color = '#FFFFFF'
bg_number = init_bg
fg_list = fgs

cursor = (0, 0)
blank = create_blank(1000, 1000, hex2rgb(bg_color))

bg = cv2.imread(get_bg_path(bg_number), -1)
bg = scale_image(bg, 2)
tiled = tile_image(bg, 12, 12)
bg = overlay_transparent(blank, tiled[:1000, :1000], 0, 0)
for fg_info in fg_list:
    fg_idx = fg_info[0]
    fg = cv2.imread(get_fg_path(fg_idx), -1)
    fg_x, fg_y = fg_info[2], fg_info[3]
    cursor = (fg_y, fg_x+200)
    scale = float(fg_info[4])
    angle = int(fg_info[5])
    fg = transform_image(fg, scale, angle)
    bg = overlay_transparent(bg, fg, cursor[0], cursor[1])
changed_color = cv2.cvtColor(bg, cv2.COLOR_BGRA2RGBA)
plt.imshow(changed_color)
img2 = bg

In [None]:
cv2.imwrite('output/gen.png', img2)

In [None]:
!pwd

In [None]:
help(cv2.imwrite)

## Inspect Data

1. check statistics of x and y
    x and y in train & test data are 

In [None]:
# @unused
def undo_batch(data_batched):
    '''Convert batch back to list'''
    data_list = []
    for in_seqs, bk_embs, cls_labels in data_batched:
        seqs = torch.split(in_seqs, 1, dim=1)
        embs = torch.split(bk_embs, 1, dim=0)
        labels = torch.split(cls_labels, 1, dim=0)
        data_list += [sample for sample in zip(seqs, embs, labels)]
    return data_list

In [None]:
full_data = build_raw_data(bg_embs, fg_embs, img_manifest, 1)

In [None]:
x, y = [], []
for data in full_data:   
    x += data[0][:, 0, -5]
    y += data[0][:, 0, -4]

print(f'Relative x and y positions: ({len(x)} records)')
print(f'x: [{min(x)},{max(x)}]; \tscaled: [{min(x)*402}, {max(x)*402}]')
print(f'y: [{min(y)},{max(y)}]; \tscaled: [{min(y)*600}, {max(y)*600}]')
print(f'mean x: \t{np.mean(x)}; \tscaled: {np.mean(x)*402}')
print(f'mean y: \t{np.mean(y)}; \tscaled: {np.mean(y)*600}')
print(f'variance x: \t{np.var(x)}; \tscaled: {np.var(x)*402*402}')
print(f'variance y: \t{np.var(y)}; \tscaled: {np.var(y)*600*600}')

2. check class distribution

In [None]:
cls_label = []
for data in full_data:   
    cls_label += data[2].tolist()
cls_label = np.array(cls_label)
print(f'negative samples: {(cls_label==0).sum()}')
print(f'neutral samples: {(cls_label==1).sum()}')
print(f'positive samples: {(cls_label==2).sum()}')

In [None]:
# compare with data from original manifest
flags = []
for data in img_manifest:   
    flags.append(data['flag'])
flags = np.array(flags)
print(f'negative samples: {(flags==-1).sum()}')
print(f'neutral samples: {(flags==0).sum()}')
print(f'positive samples: {(flags==1).sum()}')

### Balance Class Distribution

Will generate some more positive and neutral samples.

In [None]:
img