# Stage 0 - Data Preparation

This notebook is at Stage 0 of the tooth detection and numbering pipeline. It is going to prepare the images from the source to the formats suitable for subsequent stages of analysis.  

### 0. Process Zooniverse Data

In [None]:
import pandas as pd
import numpy as np
import json
import os
import math
import collections
import shutil

import cv2
from matplotlib import pyplot as plt  

filename_classifications = 'tooth-numbering-workflow-classifications 20230725-20230824.csv'
filename_output = 'classifications_flat_trim.csv'


columns_out = ['classification_id', 'created_at', 'user_name', 'user_id',
               'workflow_id', 'workflow_version', 'subject_ids', 
               'taskvalue_text', 'taskvalue_survey']


# Reference: column names to choose from

columns_in = ['classification_id', 'user_name', 'user_id', 'user_ip', 
              'workflow_id','workflow_name', 'workflow_version', 'created_at', 
              'gold_standard', 'expert', 'metadata', 'annotations', 
              'subject_data', 'subject_ids']
       
columns_new = ['metadata_json', 'annotations_json', 'subject_data_json', 
               'taskvalue_text', 'taskvalue_survey']

In [None]:
classifications = pd.read_csv(filename_classifications)
classifications['metadata_json'] = [json.loads(q) for q in classifications.metadata]
classifications['annotations_json'] = [json.loads(q) for q in classifications.annotations]
classifications['subject_data_json'] = [json.loads(q) for q in classifications.subject_data]

In [None]:

taskvalue_survey =[]
taskvalue_text = []

zooniverse_output_label_ori_path = './ZooniverseOutput/labels_ori'
zooniverse_output_label_YOLO_path = './ZooniverseOutput/labels_yolo'
zooniverse_output_img_path = './ZooniverseOutput/annotatedimages'

zooniverse_complete_img_path = './4 Private ZOON Anonymised/Bitewing_proc/images/Batch1_selected_NotYetAnnotated/AnnotatedonZooniverse'
zooniverse_towork_img_path = './4 Private ZOON Anonymised/Bitewing_proc/images/Batch1_selected_NotYetAnnotated/UploadedZooniverse_(Tooth Numbering Subjects Selected 2nd Batch) 20230719'

valid_lbls = ['t11','t12','t13','t14','t15','t16','t17','t18',
              't21','t22','t23','t24','t25','t26','t27','t28',
              't31','t32','t33','t34','t35','t36','t37','t38',
              't41','t42','t43','t44','t45','t46','t47','t48']



if not os.path.exists(zooniverse_output_label_ori_path):
    os.makedirs(zooniverse_output_label_ori_path)
    
if not os.path.exists(zooniverse_output_label_YOLO_path):
    os.makedirs(zooniverse_output_label_YOLO_path)
    
processed_imgs = []
for i,row in classifications.iterrows():
    img_file = [x['Filename'] for x in row['subject_data_json'].values()][0]
    processed_imgs.append(img_file)
annotation_counter = collections.Counter(processed_imgs)

    
    
for i,row in classifications.iterrows():
    
    img_file = [x['Filename'] for x in row['subject_data_json'].values()][0]

#     if (v:=annotation_counter.get(img_file, None))!=1:
#         print(f'WARNING: {img_file} has been processed multiple {v} times. No annotation file will be generated')
#         continuehttp://localhost:8888/notebooks/Documents/MSc/Dissertation/Dissertation%20Submission/dissertation_Stage0_DataPreparation.ipynb#
        
    is_valid_lbl = True
    
    output_YOLO_lines = ['YOLO_OBB\n']
    output_ori_lines = []

    for annotation in row['annotations_json']:
        
        if len(annotation['value']) > 0:
            for box in annotation['value']:
                top_left_coor = np.array([ box['x'], box['y']] )
                angle = box['angle']
                
                x1,y1 = float(box['x']), float(box['y'])
                x2,y2 = x1+float(box['width']), y1+float(box['height'])
                
                xc,yc = round(0.5*(x1+x2),6), round(0.5*(y1+y2),6)
                    
                tooth_lbl = box['details'][0]['value'].strip()
                tooth_lbl = 't' + tooth_lbl if tooth_lbl[0] != 't' else tooth_lbl
                if tooth_lbl in valid_lbls:
                    # YOLO_OBB = Label Index, x, y, H, W, Theta
                    tooth_lbl = valid_lbls.index(tooth_lbl)
                     
                    # for YOLO, the angle is in reversed polarity
                    line_yolo = str(tooth_lbl) + ' ' + str(xc) + ' ' + str(yc) + ' ' + \
                            str(round(box['width'],6)) + ' ' + str(round(box['height'],6)) + ' ' + str(-1*round(box['angle'],6)) + '\n'

                    output_YOLO_lines.append(line_yolo)
                    
                    line = str(tooth_lbl) + ' ' + str(box['x']) + ' ' + str(box['y']) + ' ' + \
                            str(round(box['width'],6)) + ' ' + str(round(box['height'],6)) + ' ' + str(round(box['angle'],6)) + '\n'

                    output_ori_lines.append(line)

                else:
                    is_valid_lbl = False
                    print(f'WARNING: {img_file} --> {tooth_lbl} not valid! Annotation files will not be generated.')
        else:
            is_valid_lbl = False
            print(f'WARNING: {img_file} has zero annotations. Annotation files will not be generated.')
            
    
            
    if is_valid_lbl:
        pf, _ = img_file.split('.')

        fname = pf+'.txt'
        i = 1
        while os.path.exists(os.path.join(zooniverse_output_label_YOLO_path, fname)):
            fname = pf + '-' + str(i) + '.txt'
            i +=1
            
        with open(os.path.join(zooniverse_output_label_YOLO_path, fname), 'w') as f:
            f.writelines(output_YOLO_lines)
            
        i = 1
        while os.path.exists(os.path.join(zooniverse_output_label_ori_path, fname)):
            fname = pf + '-' + str(i) + '.txt'
            i +=1
            
        with open(os.path.join(zooniverse_output_label_ori_path, fname), 'w') as f:
            f.writelines(output_ori_lines)
            
        inpath = os.path.join(zooniverse_towork_img_path, img_file)
        outpath = os.path.join(zooniverse_complete_img_path, img_file)
        if os.path.exists(inpath):
            shutil.move(inpath, outpath)
            
            
   

In [None]:
# Visualize the annotated files

# https://stackoverflow.com/questions/34372480/rotate-point-about-another-point-in-degrees-python
def rotate(point, origin, angle):
    """
    Rotate a point counterclockwise by a given angle around a given origin.
    
    The angle should be given in radians.
    """
    ox, oy = origin
    px, py = point

    qx = ox + math.cos(angle) * (px - ox) - math.sin(angle) * (py - oy)
    qy = oy + math.sin(angle) * (px - ox) + math.cos(angle) * (py - oy)
    return qx, qy

def rotate_box(box, angle):
    # input box is [x1, y1, x2, y2], angle is in degree
    # return rotate output box (coordinates of 4 points [top left, top right, bottom right, bottom left])
    x1, y1, x2, y2 = box
    angle = math.radians(angle) # convert to radian

    tl = (x1, y1)
    tr = (x2, y1)
    br = (x2, y2)
    bl = (x1, y2)
    center = ((x1 + x2) // 2, (y1 + y2) // 2)
    
    tl = rotate(tl, center, angle)
    tr = rotate(tr, center, angle)
    br = rotate(br, center, angle)
    bl = rotate(bl, center, angle)
    return tl, tr, br, bl


if not os.path.exists(zooniverse_output_img_path):
    os.makedirs(zooniverse_output_img_path)
    

for path in os.listdir(zooniverse_complete_img_path):
    pf, sf = path.split('.')
    
    img = cv2.imread(os.path.join(zooniverse_complete_img_path, path))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
 
    label_fname = pf+'.txt'
    i = 1
    while os.path.exists(os.path.join(zooniverse_output_label_ori_path, label_fname)):            
            
        dimg = img.copy()
        with open(os.path.join(zooniverse_output_label_ori_path, label_fname), 'r') as f:
            rows = f.readlines()
            for row in rows:
                if row.strip()!='YOLO_OBB':
                    row = row.split(' ')
                    x1,y1 = float(row[1]), float(row[2])
                    x2,y2 = float(row[1])+float(row[3]), float(row[2])+float(row[4])
                    rotated_box = np.asarray(rotate_box([x1,y1,x2,y2], float(row[5])), dtype=int)
                    tooth_lbl = valid_lbls[int(row[0])]

                    dimg = cv2.drawContours(dimg, [rotated_box], 0, color=(255,0,0), thickness=4)

                    dimg = cv2.putText(img=dimg, text=tooth_lbl, org=np.array([(x1+x2)/2, (y1+y2)/2], dtype=int), fontFace=cv2.FONT_HERSHEY_SIMPLEX, 
                       fontScale=1, color=(255, 0, 0), thickness=2, lineType=cv2.LINE_AA)


                    img_fname = pf + '.png'
                    cv2.imwrite(os.path.join(zooniverse_output_img_path, img_fname), dimg)
                 
        pf = pf + '-' + str(i)
        label_fname = pf + '.txt'
        i +=1
            
    

In [None]:
# Here we need to manually check the annotated image files and delete those not suitable for analysis
# Check multiple annotated files, delete the duplicated annotations, and retain the final version
import os
import shutil
zooniverse_img_path = './ZooniverseOutput/annotatedimages'
zooniverse_label_path = './ZooniverseOutput/labels_yolo'
zooniverse_dedup_label_path = './ZooniverseOutput/dedup_labels_yolo'

if not os.path.exists(zooniverse_dedup_label_path):
    os.makedirs(zooniverse_dedup_label_path)

for path in os.listdir(zooniverse_img_path):
    pf, sf = path.split('.')
    inpath = os.path.join(zooniverse_label_path, pf + '.txt')
    
    pf2, *sf2 = pf.split('-')
    
    if sf2 is not None:
        print(f'Renamed {pf} to {pf2}')
    outpath = os.path.join(zooniverse_dedup_label_path, pf2 + '.txt')
    
    if os.path.exists(inpath):
        shutil.copy(inpath, outpath)

### 1. JPEG to PNG Conversion
It is important to change all images to the same size for labelling, also resize for uploading to Zooniverse

In [None]:
from PIL import Image
import os

# source image folder path
dir_path = './4 Private ZOON Anonymised/Bitewing'

# all converted images in PNG format were saved the result_path 
result_path = './4 Private ZOON Anonymised/Bitewing_proc'


def resize_add_margin(inpath, outpath, paddedlogpath):
    pil_img = Image.open(inpath)
    
    # convert to grayscale 
    pil_img = pil_img.convert('L')
    
    fsize =  os.stat(inpath).st_size
   
    # resize large image
    if fsize > 1e6:
        resizefactor = 0.8/(fsize/1e6)
        print(f'{inpath} -- {fsize} too large.')
        w, h = pil_img.size
        newsize = (round(w*resizefactor), round(h*resizefactor))
        pil_img = pil_img.resize(newsize)
        
    # Pad zeros on both sides to faciliate rotated bounding boxes in Zooniverse
    width, height = pil_img.size
    xpads = round(width/7)
    ypads = round(height/8)
    
    new_width = width + 2*xpads 
    new_height = height + 2*ypads
    
    result = Image.new(pil_img.mode, (new_width, new_height), (255,))
    
    result.paste(pil_img, (xpads, ypads))
    
    result.save(outpath, quality=95)
    
    with open(paddedlogpath, 'w') as f:
        f.write(f'{xpads} {ypads} {width} {height}')
    

for path in os.listdir(dir_path):
    pf, sf = path.split('.')
    inpath = os.path.join(dir_path, path)
    outpath = os.path.join(result_path, pf + '.png')
    paddedlogpath = os.path.join(result_path, pf + '.pad.log')
    resize_add_margin(inpath, outpath, paddedlogpath)


### 2. Convert Annotation Files from YOLO format to DOTA format

The annotation label files are in YOLO format. The YOLO Oriented Bounding Box model requires DOTA format. So it is required to convert the label files to DOTA. Before conversion, it is required to have a validity check of all the labels to ensure its validity.

In [None]:
import os
import math 
import numpy as np
import random
import collections
import shutil
import torchvision

# 32cls_path for 32 tooth classes in YOLO format. 
# 4cls_path for 4 tooth classes (Upper/Lower x Molar/Nonmolar) in YOLO format
image_folder_path = './4 Private ZOON Anonymised/Bitewing_proc/images'

# Annotation files converted and deduplicated from Zooniverse (zooniverse_dedup_label_path) should be accumulatedly placed here
labelImg_path = './4 Private ZOON Anonymised/Bitewing_proc/labels_trial9XX/data_labelImg_OBB_accumulated'


train_img_path = './4 Private ZOON Anonymised/Bitewing_proc/labels_trial9XX/Stage2_yolo_obb/train/images'
train_label_32cls_path = './4 Private ZOON Anonymised/Bitewing_proc/labels_trial9XX/Stage2_yolo_obb/train/labelTxt_32Cls'
train_label_4cls_path = './4 Private ZOON Anonymised/Bitewing_proc/labels_trial9XX/Stage2_yolo_obb/train/labelTxt_4Cls'
train_label_16cls_path = './4 Private ZOON Anonymised/Bitewing_proc/labels_trial9XX/Stage2_yolo_obb/train/labelTxt_16Cls'

val_img_path = './4 Private ZOON Anonymised/Bitewing_proc/labels_trial9XX/Stage2_yolo_obb/val/images'
val_label_32cls_path = './4 Private ZOON Anonymised/Bitewing_proc/labels_trial9XX/Stage2_yolo_obb/val/labelTxt_32Cls'
val_label_4cls_path = './4 Private ZOON Anonymised/Bitewing_proc/labels_trial9XX/Stage2_yolo_obb/val/labelTxt_4Cls'
val_label_16cls_path = './4 Private ZOON Anonymised/Bitewing_proc/labels_trial9XX/Stage2_yolo_obb/val/labelTxt_16Cls'

labelfile = './4 Private ZOON Anonymised/Bitewing_proc/labels_trial9XX/classes.txt'


valid_lbls = ['t11','t12','t13','t14','t15','t16','t17','t18',
              't21','t22','t23','t24','t25','t26','t27','t28',
              't31','t32','t33','t34','t35','t36','t37','t38',
              't41','t42','t43','t44','t45','t46','t47','t48']


class_map_4classes = {'t11':'U_nonmolar','t12':'U_nonmolar','t13':'U_nonmolar','t14':'U_nonmolar','t15':'U_nonmolar',
                      't16':'U_molar','t17':'U_molar','t18':'U_molar',
                      't21':'U_nonmolar','t22':'U_nonmolar','t23':'U_nonmolar','t24':'U_nonmolar','t25':'U_nonmolar',
                      't26':'U_molar','t27':'U_molar','t28':'U_molar',
                      't31':'L_nonmolar','t32':'L_nonmolar','t33':'L_nonmolar','t34':'L_nonmolar','t35':'L_nonmolar',
                      't36':'L_molar','t37':'L_molar','t38':'L_molar',
                      't41':'L_nonmolar','t42':'L_nonmolar','t43':'L_nonmolar','t44':'L_nonmolar','t45':'L_nonmolar',
                      't46':'L_molar','t47':'L_molar','t48':'L_molar'}


class_map_16classes = {'t11':'U1','t12':'U2','t13':'U3','t14':'U4','t15':'U5',
                      't16':'U6','t17':'U7','t18':'U8',
                      't21':'U1','t22':'U2','t23':'U3','t24':'U4','t25':'U5',
                      't26':'U6','t27':'U7','t28':'U8',
                      't31':'L1','t32':'L2','t33':'L3','t34':'L4','t35':'L5',
                      't36':'L6','t37':'L7','t38':'L8',
                      't41':'L1','t42':'L2','t43':'L3','t44':'L4','t45':'L5',
                      't46':'L6','t47':'L7','t48':'L8'}


if not os.path.exists(train_img_path):
    os.makedirs(train_img_path)
    
if not os.path.exists(train_label_32cls_path):
    os.makedirs(train_label_32cls_path)

if not os.path.exists(train_label_4cls_path):
    os.makedirs(train_label_4cls_path)
    
if not os.path.exists(train_label_16cls_path):
    os.makedirs(train_label_16cls_path)
    
if not os.path.exists(val_img_path):
    os.makedirs(val_img_path)
    
    
if not os.path.exists(val_label_32cls_path):
    os.makedirs(val_label_32cls_path)
    
if not os.path.exists(val_label_4cls_path):
    os.makedirs(val_label_4cls_path)
    
if not os.path.exists(val_label_16cls_path):
    os.makedirs(val_label_16cls_path)
    
    
with open(labelfile, 'r') as f:
    labels = [s.strip() for s in f.readlines()]
    


In [None]:
# This checks the validity of labels

filelist = []

for path in os.listdir(labelImg_path):
# check if current path is a file
    if os.path.isfile(os.path.join(labelImg_path, path)):
        filelist.append(path)
        

is_invalid_lbl = False

for path in filelist: 
    with open(os.path.join(labelImg_path, path), 'r') as f:
        ann_points = f.readlines()
        
        list_lbl = []
        for row in ann_points[1:]:
            row = row.split()
            
            lbl = labels[int(row[0])]
            # check the validity of labels. If not valid, quit.
            if lbl not in valid_lbls:
                print(f'WARNING! {path} contains invalid label {lbl}')
                is_invalid_lbl = True
            else:
                list_lbl.append(lbl)
                
        c = collections.Counter(list_lbl)
        for k,v in c.items():
            if v > 1:
                is_invalid_lbl = True
                print(f'WARNING! {path} contains duplicated label {k}')
                
        # this checks contradicting labels, e.g. t23 and t14 exists at the same time.
        quads = [s[0:2] for s in list_lbl]
        quads = ''.join(list(set(quads)))
        if len(quads)>2 and (quads != 't1t4' and quads != 't4t1' and quads != 't2t3' and quads != 't3t2'):
            print(f'WARNING! {path} contains invalid labels in contradicting quadrants {quads}')
            is_invalid_lbl = True
            

if is_invalid_lbl:
    print('Invalid labels found. Please rectify before continuing.')
        

In [None]:
#https://mmrotate.readthedocs.io/en/latest/intro.html
#https://mmrotate.readthedocs.io/en/latest/tutorials/customize_dataset.html

# YOLO_OBB = Label Index, x, y, H, W, Theta
# DOTA = xA,yA, xB,yB, xC,yC, xD,yD, Category, Difficulty 

samplelabels = {'train': [], 'val': []}

random.seed(721)


# sort coordinates in counterclockwise order https://pavcreations.com/clockwise-and-counterclockwise-sorting-of-coordinates/
def sort_coordinates(list_of_xy_coords, is_clockwise):
    cx, cy = list_of_xy_coords.mean(0)
    x, y = list_of_xy_coords.T
    angles = np.arctan2(x-cx, y-cy)
    indices = np.argsort(-1*angles) if is_clockwise else np.argsort(angles)
    
    return list_of_xy_coords[indices]
 
trainlist = random.sample(filelist, round(0.7*len(filelist)))

if not is_invalid_lbl:
    
    for path in filelist:

        rows_dota_32cls = []    
        rows_dota_4cls = []
        rows_dota_16cls = []
   
        
        with open(os.path.join(labelImg_path, path), 'r') as f:
            ann_points = f.readlines()

            # map points to different classes combinations
            for row in ann_points[1:]:
                row = row.split()
                # lbl = int(labels[int(row[0])])

                lbl = labels[int(row[0])]

                # check the validity of labels. If not valid, quit.
                if lbl not in valid_lbls:
                    print(f'WARNING! {path} contains invalid label {lbl}')

                lbl_4cls = class_map_4classes[lbl]
                lbl_16cls = class_map_16classes[lbl]

                if path in trainlist:
                    samplelabels['train'].append(lbl)
                else:
                    samplelabels['val'].append(lbl)

                center = np.array([float(row[1]),float(row[2])])
                w_h = np.array([float(row[3]),float(row[4])])

                theta = np.radians(float(row[5]))

                rotate_mat = np.zeros([2,2])
                rotate_mat[0] = np.array([np.cos(theta), np.sin(theta)])
                rotate_mat[1] = np.array([-1*np.sin(theta), np.cos(theta)])

                coor_A = np.around(center + np.matmul(rotate_mat,w_h*np.array([-0.5,-0.5]))).astype(int)
                coor_B = np.around(center + np.matmul(rotate_mat,w_h*np.array([-0.5,0.5]))).astype(int)
                coor_C = np.around(center + np.matmul(rotate_mat,w_h*np.array([0.5,0.5]))).astype(int)
                coor_D = np.around(center + np.matmul(rotate_mat,w_h*np.array([0.5,-0.5]))).astype(int)

                # sort the coordinates in clockwise manner
                row_c = np.array([coor_A, coor_B, coor_C, coor_D])
                row_c = sort_coordinates(row_c, is_clockwise=True)

                # shift the topleft corner to closest to origin
                shift = np.argmin(np.sum(row_c*row_c,axis=1))
                row_c = np.roll(row_c , -1*shift, axis=0).flatten()

                # set difficulty to 0
                row_c_32cls = ' '.join([*[str(r) for r in row_c], str(lbl), str(0)]) + '\n'
                rows_dota_32cls.append(row_c_32cls)

                row_c_4cls = ' '.join([*[str(r) for r in row_c], str(lbl_4cls), str(0)]) + '\n'
                rows_dota_4cls.append(row_c_4cls)
                
                row_c_16cls = ' '.join([*[str(r) for r in row_c], str(lbl_16cls), str(0)]) + '\n'
                rows_dota_16cls.append(row_c_16cls)


        # only write converted format if len(rows_dota) > 0
        if len(rows_dota_32cls) > 0:
           
            prefix, suffix = path.split('.')
            imagename = prefix + '.png'

            if path in trainlist:
                with open(os.path.join(train_label_32cls_path, path), 'a') as f:
                    f.writelines(rows_dota_32cls)

                with open(os.path.join(train_label_4cls_path, path), 'a') as f:
                    f.writelines(rows_dota_4cls)
                    
                with open(os.path.join(train_label_16cls_path, path), 'a') as f:
                    f.writelines(rows_dota_16cls)


                # copy the image file to training image folder   
                for p in os.listdir(image_folder_path):
                    p = os.path.join(image_folder_path, p)
                    if not os.path.isfile(p):
                        if os.path.exists(os.path.join(p, imagename)):
                            shutil.copyfile(os.path.join(p, imagename), os.path.join(train_img_path, imagename))
                            break

            else:
                with open(os.path.join(val_label_32cls_path, path), 'a') as f:
                    f.writelines(rows_dota_32cls)

                with open(os.path.join(val_label_4cls_path, path), 'a') as f:
                    f.writelines(rows_dota_4cls)
                    
                with open(os.path.join(val_label_16cls_path, path), 'a') as f:
                    f.writelines(rows_dota_16cls)


                # copy the image file to val image folder            
                for p in os.listdir(image_folder_path):
                    p = os.path.join(image_folder_path, p)
                    if not os.path.isfile(p):
                        if os.path.exists(os.path.join(p, imagename)):
                            shutil.copyfile(os.path.join(p, imagename), os.path.join(val_img_path, imagename))
                            break


    c = collections.Counter(samplelabels['train'])
    myKeys = list(c.keys())
    myKeys.sort()
    sorted_dict_train = {i: c[i] for i in myKeys}

    c = collections.Counter(samplelabels['val'])
    myKeys = list(c.keys())
    myKeys.sort()
    sorted_dict_val = {i: c[i] for i in myKeys}

    label_count = {}

    for i in valid_lbls:
        label_count[i] = sorted_dict_train.get(i,0) + sorted_dict_val.get(i,0)



In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

xx = []
xx.extend(['Top Left' for i in range(0,8)])
xx.extend(['Top Right' for i in range(0,8)])
xx.extend(['Bottom Right' for i in range(0,8)])
xx.extend(['Bottom Left' for i in range(0,8)])

data = {'Quadrant name':xx,
        'Tooth number':label_count.keys(),
        'count':label_count.values()}

df = pd.DataFrame(data)


ax = sns.barplot(x='Tooth number', y='count', data=df, hue='Quadrant name', dodge=False)
plt.title("Annotation Frequency of Tooth Numbers")
plt.xticks(rotation=90)
plt.ylabel("Frequency")
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', borderaxespad=0)

### 3. Prepare Dataset for Stage 1 Left/Right Quadrant Classifier

In [None]:
# https://github.com/bentrevett/pytorch-image-classification/blob/master/5_resnet.ipynb

# DOTA = xA,yA, xB,yB, xC,yC, xD,yD, Category, Difficulty 

import os
import math 
import numpy as np
import random
import collections
import shutil
import torchvision
import torchvision.transforms as transforms
from PIL import Image


def flip_merge(img):
    
    width, height = img.size
    if width < height:
        img = img.transpose(Image.ROTATE_90)
        width, height = img.size

    flipped_img = img.transpose(Image.FLIP_LEFT_RIGHT)
    result = Image.new(img.mode, (2*width, height), (255,))
    result.paste(img, (0, 0))
    result.paste(flipped_img, (width, 0))   
    w, h = result.size
    
    result = result.resize((224, 224))
    
    return result 


random.seed(721)

res = []


labelfile = './4 Private ZOON Anonymised/Bitewing_proc/labels_trial9XX/classes.txt'

train_label_path = './4 Private ZOON Anonymised/Bitewing_proc/labels_trial9XX/Stage2_yolo_obb/train/labelTxt_32Cls'
train_image_folder_path = './4 Private ZOON Anonymised/Bitewing_proc/labels_trial9XX/Stage2_yolo_obb/train/images'
train_class_left_dataset_path = './4 Private ZOON Anonymised/Bitewing_proc/labels_trial9XX/Stage1_dataset_LR_classifier/train/LEFT'
train_class_right_dataset_path = './4 Private ZOON Anonymised/Bitewing_proc/labels_trial9XX/Stage1_dataset_LR_classifier/train/RIGHT'

test_label_path = './4 Private ZOON Anonymised/Bitewing_proc/labels_trial9XX/Stage2_yolo_obb/val/labelTxt_32Cls'
test_image_folder_path = './4 Private ZOON Anonymised/Bitewing_proc/labels_trial9XX/Stage2_yolo_obb/val/images'
test_class_left_dataset_path = './4 Private ZOON Anonymised/Bitewing_proc/labels_trial9XX/Stage1_dataset_LR_classifier/test/LEFT'
test_class_right_dataset_path = './4 Private ZOON Anonymised/Bitewing_proc/labels_trial9XX/Stage1_dataset_LR_classifier/test/RIGHT'

label_paths = [train_label_path, test_label_path]
image_paths = [train_image_folder_path, test_image_folder_path]
left_paths = [train_class_left_dataset_path, test_class_left_dataset_path]
right_paths = [train_class_right_dataset_path, test_class_right_dataset_path]

total_right_cnt = 0 
total_left_cnt = 0

for lbl_path, img_path, cls_l_path, cls_r_path in zip(label_paths,image_paths,left_paths,right_paths):
    
    if not os.path.exists(cls_l_path):
        os.makedirs(cls_l_path)

    if not os.path.exists(cls_r_path):
        os.makedirs(cls_r_path)

    with open(labelfile, 'r') as f:
        labels = [s.strip() for s in f.readlines()]


    filelist = []
    for path in os.listdir(lbl_path):
    # check if current path is a file
        if os.path.isfile(os.path.join(lbl_path, path)):
            filelist.append(path)

    transform = transforms.Grayscale(num_output_channels=3)

    for path in filelist:

        rows_dota = []
        right_lbl_cnt = 0
        left_lbl_cnt = 0
        with open(os.path.join(lbl_path, path), 'r') as f:
            ann_points = f.readlines()
            for row in ann_points[1:]:
                row = row.split()
                # lbl = int(labels[int(row[0])])
                #lbl = labels[int(row[0])]
                lbl = row[8]
                if lbl in ['t11','t12','t13','t14','t15','t16','t17','t18'] or \
                    lbl in ['t41','t42','t43','t44','t45','t46','t47','t48']:
                    right_lbl_cnt += 1

                if lbl in ['t31','t32','t33','t34','t35','t36','t37','t38'] or \
                    lbl in ['t21','t22','t23','t24','t25','t26','t27','t28']:
                    left_lbl_cnt += 1



        # only write converted format if len(rows_dota) > 0

        prefix, suffix = path.split('.')
        imagename = prefix + '.png'

        img = Image.open(os.path.join(img_path, imagename))

        # convert image to 3-channel RGB for simple classification using Resnet
        img = transform(img)
        
        
        # randomly rotate the images.
        rotate = random.sample([0,1,2,3],1)[0]
        if rotate == 1:
            img = img.transpose(Image.ROTATE_90)
        elif rotate == 2:
            img = img.transpose(Image.ROTATE_180)
        elif rotate == 3:
            img = img.transpose(Image.ROTATE_270)
            

        if right_lbl_cnt > left_lbl_cnt:
            # Save the image file to RIGHT classe folder            
            img.save(os.path.join(cls_r_path, imagename), quality=95)
            total_right_cnt += 1

        elif left_lbl_cnt > right_lbl_cnt:
            # copy the image file to LEFT classe folder            
            img.save(os.path.join(cls_l_path, imagename), quality=95)
            total_left_cnt += 1
    
                
         