In [None]:
import matplotlib.pyplot as plt
plt.figure(figsize=(10,20))
img = plt.imread("./books/chudo_derevo_redmi/IMG_20190715_112912.labeled.jpg")
print(img.shape)

import cv2
import json

from collections import defaultdict
#from . 
import letters

reverce_dict = defaultdict(set)
for d in letters.letter_dicts.values():
    # print(d)
    for lbl123, char in d.items():
        reverce_dict[char].add(lbl123)
        
# print(reverce_dict)

labeling_synonyms = {
    "xx": "XX",
    "хх": "XX",  # russian х on the left
    "cc": "CC",
    "сс": "CC",  # russian с on the left
    "<<": "«",
    ">>": "»",
    "((": "()",
    "))": "()",
    "№": "н",
    "&&": "§",
}

v = [1, 2, 4, 8, 16, 32]

def validate_int(int_label):
    '''
    Validate int_label is in [0..63]
    Raise exception otherwise
    '''
    assert isinstance(int_label, int)
    assert int_label >= 0 and int_label < 64, "Ошибочная метка: " + str(int_label)

def label123_to_int(label123):
    try:
        r = sum([v[int(ch)-1] for ch in label123])
    except:
        raise ValueError("incorrect label in 123 format: " + label123)
    validate_int(r)
    return r

def human_label_to_int(label):
    '''
    Convert label from manual annotations to int_label
    '''
    label = label.lower()
    if label[0] == '~':
        label123 = label[1:]
        if label123[-1] == '~':
            label123 = label123[:-1]
    else:
        label = labeling_synonyms.get(label, label)
        ch_list = reverce_dict.get(label, None)
        if not ch_list:
            raise ValueError("unrecognized label: " + label)
        if len(ch_list) > 1:
            # raise ValueError("label: " + label + " has more then 1 meanings: " + str(ch_list))
            pass
        label123 = list(ch_list)[0]
    return label123_to_int(label123)

def limiting_scaler(source, dest):
    '''
    Creates function to convert coordinates from source scale to dest with limiting to [0..dest)
    :param source: source scale
    :param dest: dest scale
    :return: function f(x) for linear conversion [0..sousce)->[0..dest) so that
        f(0) = 0, f(source-1) = (source-1)/source*dest, f(x<0)=0, f(x>=source) = (source-1)/source*dest
    '''
    def scale(x):
        return int(min(max(0, x), source-1)) * dest/source
    return scale

def read_LabelMe_annotation(label_filename, get_points):
    '''
    Reads LabelMe (see https://github.com/IlyaOvodov/labelme labelling tool) annotation JSON file.
    :param label_filename: path to LabelMe annotation JSON file
    :return: list of rect objects. Each rect object is a tuple (left, top, right, bottom, label) where
        left..bottom are in [0,1), label is int in [1..63]
    '''
    if get_points:
        raise NotImplementedError("read_annotation get_point mode not implemented for LabelMe annotation")
    with open(label_filename, 'r', encoding='cp1251') as opened_json:
        loaded = json.load(opened_json)
    convert_x = limiting_scaler(loaded["imageWidth"], 1.0)
    convert_y = limiting_scaler(loaded["imageHeight"], 1.0)
    # rects = [(
    #           # convert_x(min(xvals)),
    #           # convert_y(min(yvals)),
    #           # convert_x(max(xvals)),
    #           # convert_y(max(yvals)),
    #           # lt.human_label_to_int(label),
    #           min(xvals),
    #           min(yvals),
    #           max(xvals),
    #           max(yvals),
    #           human_label_to_int(label),
    #           # label,
    #           ) for label, xvals, yvals in
    #                 ((shape["label"],
    #                   [coords[0] for coords in shape["points"]],
    #                   [coords[1] for coords in shape["points"]]
    #                  ) for shape in loaded["shapes"]
    #                 )
    #         ]
    
    boxes = [[ min(xvals), min(yvals), max(xvals), max(yvals) ]
                    for label, xvals, yvals in
                            ((shape["label"],
                              [coords[0] for coords in shape["points"]],
                              [coords[1] for coords in shape["points"]]
                             ) for shape in loaded["shapes"]
                            )
            ]

    labels = [human_label_to_int(label)
                  for label, xvals, yvals in
                            ((shape["label"],
                              [coords[0] for coords in shape["points"]],
                              [coords[1] for coords in shape["points"]]
                             ) for shape in loaded["shapes"]
                            )
            ]
    
    return boxes, labels

boxes, labels = read_LabelMe_annotation("./books/chudo_derevo_redmi/IMG_20190715_112912.labeled.json", False) 
print(boxes, labels)

# with open("./books/chudo_derevo_redmi/IMG_20190715_112912.labeled.json", 'r') as f:
#     jdata = json.load(f)
#     pts = jdata['shapes'][0]['points']
#     lbl = jdata['shapes'][0]['label']
#     print("PTS:", pts)
#     print("LBL:", lbl)
# import numpy as np
# import cv2

# # 이미지의 크기를 잡고 이미지의 중심을 계산합니다.
# # (h, w) = img.shape[:2]
# # (cX, cY) = (w // 2, h // 2)
 
# # 이미지의 중심을 중심으로 이미지를 회전합니다.
# # M = cv2.getRotationMatrix2D((cX, cY), 0.4, 1.0)
# # img = cv2.warpAffine(img, M, (w, h))

# for (x1, y1, x2, y2, lbl) in rects:
#     # print(x1, y1, x2, y2, lbl)
#     img = cv2.rectangle(img, (int(x1), int(y1)), (int(x2), int(y2)), (0,0,255), 1)
#     img = cv2.putText(img, str(lbl) , (int(x1), int(y1)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,255), 1)
# # # img = cv2.rectangle(img, (441, 284), (477, 343), (0,0,255), 1)
# # # img = cv2.rectangle(img, (493, 284), (527, 343), (0,0,255), 1)

# # for c in cs:
# #     img = cv2.rectangle(img, (c.left, c.top), (c.right, c.bottom), (255,0,0), 1)
# #     # img = cv2.rectangle(img, (500, 291), (521,337), (255, 0, 0), 1)

# # img = cv2.rectangle(img, (285, 85), (313, 128), (0,0,255), 1)

# plt.imshow(img)

In [None]:
import collections

from collections import defaultdict
# from . import letters

def validate_int(int_label):
    '''
    Validate int_label is in [0..63]
    Raise exception otherwise
    '''
    assert isinstance(int_label, int)
    assert int_label >= 0 and int_label < 64, "Ошибочная метка: " + str(int_label)

def label010_to_int(label010):
    '''
    Convert label in label010 format to int_label
    '''
    v = [1,2,4,8,16,32]
    r = sum([v[i] for i in range(6) if label010[i]=='1'])
    validate_int(r)
    return r

CellInfo = collections.namedtuple('CellInfo', 
                                  ['row', 'col',  # row and column in a symbol grid
                                   'left', 'top', 'right', 'bottom',  # symbol corner coordinates in pixels
                                   'label'])  # symbol label either like '246' or '010101' format

def read_txt(file_txt, binary_label = True):
    """
    Loads Braille annotation from DSBI annotation txt file
    :param file_txt: filename of txt file
    :param binary_label: return symbol label in binary format, like '010101' (if True),
        or human readable like '246' (if False)
    :return: tuple (
        angle: value from 1st line of annotation file,
        h_lines: list of horizontal lines Y-coordinates,
        v_lines: list of vertical lines X-coordinates,,
        cells: symbols as list of CellInfo
    )
    None, None, None, None for empty annotation
    """
    with open(file_txt, 'r') as f:
        l = f.readlines()
        if len(l) < 3:
            return None, None, None, None
        angle = eval(l[0])
        v_lines = list(map(eval, l[1].split(' ')))
        assert len(v_lines)%2 == 0, (file_txt, len(v_lines))
        h_lines = list(map(eval, l[2].split(' ')))
        assert len(h_lines)%3 == 0, (file_txt, len(h_lines))
        cells = []
        for cell_ln in l[3:]:
            cell_nums = list(cell_ln[:-1].split(' ')) # exclude last '\n'
            assert len(cell_nums) == 8, (file_txt, cell_ln)
            row = eval(cell_nums[0])
            col = eval(cell_nums[1])
            if binary_label:
                label = ''.join(cell_nums[2:])
            else:
                label = ''
                for i, c in enumerate(cell_nums[2:]):
                    if c == '1':
                        label += str(i+1)
                    else:
                        assert c == '0', (file_txt, cell_ln, i, c)
            left = v_lines[(col-1)*2]
            right = v_lines[(col-1)*2+1]
            top = h_lines[(row-1)*3]
            bottom = h_lines[(row-1)*3+2]
            cells.append(CellInfo(row=row, col=col,
                                  left=left, top=top, right=right, bottom=bottom,
                                  label=label))
    return angle, h_lines, v_lines, cells


def read_DSBI_annotation(label_filename, width, height, rect_margin, get_points):
    """
    Loads Braille annotation from DSBI annotation txt file in albumentations format
    :param label_filename: filename of txt file
    :param width: image width
    :param height: image height
    :param rect_margin:
    :param get_points: Points or Symbols mode
    :return:
        List of symbol rects if get_points==False. Each rect is a tuple (left, top, right, bottom, label) where
        left..bottom are in [0,1], label is int in [1..63]. Symbol size is extended to rect_margin*width of symbol
        in every side.
        List of points rects if get_points==True. Each point is a tuple (left, top, right, bottom, label) where
        left..bottom are in [0,1], label is 0. Width and height of point is 2*rect_margin*width of symbol
    """
    _, _, _, cells = read_txt(label_filename, binary_label=True)
    if cells is not None:
        boxes = [
            [(c.left - rect_margin * (c.right - c.left)) / width,
            (c.top - rect_margin * (c.right - c.left)) / height,
            (c.right + rect_margin * (c.right - c.left)) / width,
            (c.bottom + rect_margin * (c.right - c.left)) / height]
             for c in cells if c.label != '000000']
        labels = [
            label010_to_int(c.label)
             for c in cells if c.label != '000000']
            
    else:
        boxes, labels = [[0,0, width, height]], [0]

    return boxes, labels

In [None]:
train_lst = []
val_lst = []

def make_lst(mode, data_lst, data_dir):
    if mode == 'val':
        data_txt = data_dir + 'val.txt'
    else:
        data_txt = data_dir + 'train.txt'
        
    with open(data_txt, 'r') as f:
        ls = f.readlines()
        for l in ls:
            data_lst.append(
                {'img':data_dir+l.strip().replace('\\','/'), 
                 'json':data_dir+l.strip().replace('\\','/').replace('.jpg','.json')}
            )
    return data_lst

def make_lst_dsbi(mode, data_lst, data_dir):
    if mode == 'val':
        data_txt = data_dir + 'test.txt'
    else:
        data_txt = data_dir + 'train.txt'
        
    with open(data_txt, 'r') as f:
        ls = f.readlines()
        for l in ls:
            data_lst.append(
                {'img':data_dir+'data/'+l.strip().replace('\\','/').replace('.jpg','+recto.jpg'), 
                 'txt':data_dir+'data/'+l.strip().replace('\\','/').replace('.jpg','+recto.txt')}
            )
    return data_lst


train_lst = make_lst_dsbi('train', train_lst, '/opt/ml/DSBI/')
train_lst = make_lst('train', train_lst, './books/')
train_lst = make_lst('train', train_lst, './handwritten/')
train_lst = make_lst('train', train_lst, './not_braille/')
train_lst = make_lst_dsbi('val', train_lst, '/opt/ml/DSBI/')

# val_lst = make_lst_dsbi('val', val_lst, '/opt/ml/DSBI/')
val_lst = make_lst('val', val_lst, './books/')
val_lst = make_lst('val', val_lst, './handwritten/')

print(len(train_lst), len(val_lst))
t1_img = train_lst[0]['img']

if 'json' in train_lst[1].keys():
    t1_json = train_lst[1]['json']
    boxes, labels = read_LabelMe_annotation(t1_json, False)
    # print(t1_img)
    print(labels)
    print(boxes)
else:
    t1_txt = train_lst[1]['txt']
    print(t1_txt)
    boxes, labels = read_DSBI_annotation(t1_txt, 1, 1, 0.5, False)
    # print(t1_img)
    print(labels)
    print(boxes)

# x = [i for i in range(65)]    
# print(x)

# pip install fiftyone
# from tqdm import tqdm
# import fiftyone as fo


# dataset = fo.Dataset()
# # dataset.default_classes = [str(i) for i in range(65)]
# dataset.save()
# # print(dataset)
# samples = []

# from PIL import Image
# # for idx, trn in tqdm(enumerate(train_lst)):
# #     # print(idx, trn)
# #     # boxes, labels = read_LabelMe_annotation(trn['json'], False)

# #     if 'json' in trn.keys():
# #         boxes, labels = read_LabelMe_annotation(trn['json'], False)
# #     else:
# #         boxes, labels = read_DSBI_annotation(trn['txt'], 1, 1, 0.5, False)      
    
# #     # sample = fo.Sample(filepath=trn['img'])
# #     sample = fo.Sample(filepath=trn['img'], tags=["train"])
# #     image = Image.open(sample.filepath)
# #     image = np.array(image)
# #     h, w, c = image.shape  
# #     # print(c, h, w)
    
# #     detections = []
    
# #     for idx, (x1, y1, x2, y2) in enumerate(boxes):
# #         # print("box:", (x1, y1, x2, y2), "label:", labels[idx])
# #         rel_box = [x1 / w, y1 / h, (x2 - x1) / w, (y2 - y1) / h]
# #         detections.append(fo.Detection(label=str(labels[idx]), bounding_box=rel_box))    
# #         sample["ground_truth"] = fo.Detections(detections=detections)
# #         # break

# #     samples.append(sample)
# #     # print("samples:", samples)
# #     # break

# for idx, trn in tqdm(enumerate(val_lst)):
#     # print(idx, trn)
#     # boxes, labels = read_LabelMe_annotation(trn['json'], False)

#     if 'json' in trn.keys():
#         boxes, labels = read_LabelMe_annotation(trn['json'], False)
#     else:
#         boxes, labels = read_DSBI_annotation(trn['txt'], 1, 1, 0.5, False)      
    
#     sample = fo.Sample(filepath=trn['img'], tags=["val"])
#     image = Image.open(sample.filepath)
#     image = np.array(image)
#     h, w, c = image.shape  
#     # print(c, h, w)
    
#     detections = []
    
#     for idx, (x1, y1, x2, y2) in enumerate(boxes):
#         # print("box:", (x1, y1, x2, y2), "label:", labels[idx])
#         rel_box = [x1 / w, y1 / h, (x2 - x1) / w, (y2 - y1) / h]
#         detections.append(fo.Detection(label=str(labels[idx]), bounding_box=rel_box))    
#         sample["ground_truth"] = fo.Detections(detections=detections)
#         # break

#     samples.append(sample)
#     # print("samples:", samples)
#     # break    

# print('add_samples...')
# dataset.add_samples(samples)
# dataset.save()  # must save after edits

# # splits = ["train", "val"]
# splits = ["val"]

# print('export...')

# # classes = ["list", "of", "classes"] #[str(i) for i in range(65)]

# for split in splits:
#     split_view = dataset.match_tags(split)
#     # print(dir(split_view))
#     print(split_view.to_json())
    
#     split_view.export(       
#         export_dir = "../DATA_YOLO3/",
#         # dataset_type = fo.types.YOLOv4Dataset,
#         dataset_type = fo.types.YOLOv5Dataset,
#         label_field = "ground_truth",
#         # labels_path = "labels"
#         # classes=classes,
#     )

# # Export the dataset
# # dataset.export(
# #     export_dir="../DATA_COCO/",
# #     dataset_type=fo.types.COCODetectionDataset,
# #     label_field="ground_truth",
# # )

# print('finished')

# print(fo.__version__)

In [None]:
#https://github.com/IlyaOvodov/AngelinaReader/blob/master/data_utils/data.py
#box 모델이 필요할듯.. nosegmentation
import torch
from torch.utils.data import Dataset, DataLoader
import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2

# from albumentations import bbox_utils

print(A.__version__)

# x = bbox_utils.convert_bboxes_to_albumentations
# def x(bboxes, source_format, rows, cols, check_validity=False):
#     bbox_utils.convert_bboxes_to_albumentations(bboxes, source_format, rows, cols, False)
    
# def check_bbox(bbox, maxsize=510.):
#     """Check if bbox boundaries are in range 0, 1 and minimums are lesser then maximums"""
#    #my added block 
#     bbox=list(bbox)
#     for i in range(4):
#         print(bbox[i])
#         if (bbox[i]<0.) :
#             bbox[i]=0.
#         elif (bbox[i]>maxsize) :
#             bbox[i]=maxsize
#     return tuple(bbox)

class BrailleDataset(Dataset):
    def __init__(self, file_list, mode, transform = None):
        super().__init__()
        self.mode = mode
        self.file_list = file_list
        self.transform = transform
        
    def __getitem__(self, index: int):
        # print("index", index, self.file_list[index]['json'])

        image = cv2.imread(self.file_list[index]['img'])
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0
        
        # image = cv2.resize(image, (1024, 1024))

        # boxes, labels = read_LabelMe_annotation(self.file_list[index]['json'], False)
        
        if 'json' in self.file_list[index].keys():
            boxes, labels = read_LabelMe_annotation(self.file_list[index]['json'], False)
        else:
            boxes, labels = read_DSBI_annotation(self.file_list[index]['txt'], 1, 1, 0.5, False)
            
        if self.mode == 'train':
            # print(len(boxes), len(labels))
            comboxes = []
            for idx, box in enumerate(boxes):
                # print(idx, box, labels[idx])
                box.append(labels[idx])
                comboxes.append(box)

            # print(comboxes)
            transform = A.Compose([
                # A.RandomCrop(width=1024, height=1024, p=0.7),
                # A.RandomCrop(width=512, height=512, p=0.5),
                # A.RandomCrop(width=256, height=256, p=0.3),
                A.RandomCrop(width=512, height=512, p=1.0),
                A.RandomCrop(width=256, height=256, p=0.5),

                # A.RandomCrop(width=256, height=256, p=0.5),
                # A.RandomSizedBBoxSafeCrop(width=512, height=512),
                # A.HorizontalFlip(p=0.5),
                A.RandomBrightnessContrast(p=0.5),
                A.Rotate(limit=(-8,8), p=0.5),
            # ], bbox_params=A.BboxParams(format='pascal_voc'))
            ], bbox_params=A.BboxParams(format='pascal_voc', min_visibility=1)) #, check_each_transform=False))
            # ], bbox_params=A.BboxParams(format='pascal_voc', min_area=1024, min_visibility=1, label_fields=['class_labels']))

            img_w = image.shape[0]
            img_h = image.shape[1]

            if img_w >= 512 and img_h >= 512:
            # if img_w > 1024 and img_h > 1024:
                transformed = transform(image=image, bboxes=comboxes)
                image = transformed['image']

                # print(transformed['bboxes'])
                boxes, labels = [], []
                for target in transformed['bboxes']: 
                    # ck_box = check_bbox(target[:4], 510.)
                    # boxes.append(ck_box)
                    boxes.append(target[:4])
                    labels.append(target[4])
            
        image = torch.tensor(image).permute(2,0,1)
        
        # print(len(labels))
        
        # if len(labels) == 0:
        #     return image, {"boxes":torch.tensor([[0,0,0,0]]), "labels":torch.tensor([0])}
        #     # return image, {"boxes":torch.tensor([[0,0,0,0]]), "labels":torch.tensor([0])}
        # else:
        return image, {"boxes":torch.tensor(boxes).float(), "labels":torch.tensor(labels)}

        # dataset이 index되어 list처럼 동작
#         image_id = self.coco.getImgIds(imgIds=index)
#         image_infos = self.coco.loadImgs(image_id)[0]
        
#         # cv2 를 활용하여 image 불러오기
#         images = cv2.imread(os.path.join(dataset_path, image_infos['file_name']))
#         images = cv2.cvtColor(images, cv2.COLOR_BGR2RGB).astype(np.float32)
#         images /= 255.0
        
#         if (self.mode in ('train', 'val')):
#             ann_ids = self.coco.getAnnIds(imgIds=image_infos['id'])
#             anns = self.coco.loadAnns(ann_ids)

#             # Load the categories in a variable
#             cat_ids = self.coco.getCatIds()
#             cats = self.coco.loadCats(cat_ids)

#             # masks : size가 (height x width)인 2D
#             # 각각의 pixel 값에는 "category id" 할당
#             # Background = 0
#             masks = np.zeros((image_infos["height"], image_infos["width"]))
#             # General trash = 1, ... , Cigarette = 10
#             anns = sorted(anns, key=lambda idx : idx['area'], reverse=True)
#             for i in range(len(anns)):
#                 className = get_classname(anns[i]['category_id'], cats)
#                 pixel_value = category_names.index(className)
#                 masks[self.coco.annToMask(anns[i]) == 1] = pixel_value
#             masks = masks.astype(np.int8)
                        
#             # transform -> albumentations 라이브러리 활용
#             if self.transform is not None:
#                 transformed = self.transform(image=images, mask=masks)
#                 images = transformed["image"]
#                 masks = transformed["mask"]
#             return images, masks, image_infos
        
#         if self.mode == 'test':
#             # transform -> albumentations 라이브러리 활용
#             if self.transform is not None:
#                 transformed = self.transform(image=images)
#                 images = transformed["image"]
#             return images, image_infos
    
    def __len__(self) -> int:
        return len(self.file_list)
        # 전체 dataset의 size를 return
        # return len(self.coco.getImgIds())
        


In [None]:
def collate_fn(batch):
    return tuple(zip(*batch))

train_dataset = BrailleDataset(file_list=train_lst, mode='train', transform=None)
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=False, num_workers=0, collate_fn=collate_fn) #False)

import numpy as np

print(len(train_loader))
image, targets = next(iter(train_loader))
# print(targets[0]['boxes'], targets[0]['labels'])
# print(image[0].shape)

# plt.imshow(image[0].permute(1,2,0).cpu())
# plt.show()

# image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
# image /= 255.0

img = image[0].permute(1,2,0).cpu()
img = np.array(img)
# print(img.shape)

for idx, b in enumerate(targets[0]['boxes']):
    box = b.cpu().detach().numpy()
    x1 = box[0]
    y1 = box[1]
    x2 = box[2]
    y2 = box[3]
    lbl = targets[0]['labels'][idx].cpu().detach().numpy()
    # print(x1, y1, x2, y2, lbl, scr)
    img = cv2.rectangle(img, (int(x1), int(y1)), (int(x2), int(y2)), (0,0,255), 1)
    img = cv2.putText(img, str(lbl) , (int(x1), int(y1)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,255), 1)

plt.figure(figsize=(10,20))
plt.imshow(img)

In [None]:
# FOR YOLO DATASETS

def make_yolo_dataset(prefix4name, base_img_dir, base_txt_dir, loader, filters):
    cnt = 0
    for no, (images, targets) in enumerate(loader):
        img = images[0].permute(1, 2, 0)
        img = img.detach().cpu().numpy()
        # width, height, ch = img.shape
        height, width, ch = img.shape
        # print(width, height, ch)
        # plt.imshow(img)

        lst = []
        lbl_lst = []
        for idx, b in enumerate(targets[0]['boxes']):
            box = b.cpu().detach().numpy()
            x1 = box[0]
            y1 = box[1]
            x2 = box[2]
            y2 = box[3]

            w = x2-x1
            h = y2-y1
            cx = x1 + w/2
            cy = y1 + h/2

            lbl = targets[0]['labels'][idx].cpu().detach().numpy()
            lst.append(f"{lbl} {cx/width} {cy/height} {w/width} {h/height}")
            lbl_lst.append(int(lbl))            

        #filter를 사용할 경우
        if filters!=None:
            for l in lbl_lst:
                 if l in filters:
                    with open(f"{base_txt_dir}/{prefix4name}img{no}.txt", "w") as file:
                        for l in lst:
                            file.writelines(l+'\n')
                            
                    plt.imsave(f"{base_img_dir}/{prefix4name}img{no}.jpeg", img)
                    cnt += 1
                    break
                    
        #filter를 사용하지 않는 경우
        else:
            with open(f"{base_txt_dir}/{prefix4name}img{no}.txt", "w") as file:
                for l in lst:
                    file.writelines(l+'\n')

            plt.imsave(f"{base_img_dir}/{prefix4name}img{no}.jpeg", img)
            cnt += 1

    print(f"{cnt} files saved ", base_img_dir, base_txt_dir)

train_dataset = BrailleDataset(file_list=train_lst, mode='train', transform=None)
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True, num_workers=0, collate_fn=collate_fn) #False)

base_img_dir = '/opt/ml/DATA_YOLON2/images/train'
base_txt_dir = '/opt/ml/DATA_YOLON2/labels/train'

# filters = [24]
filters = [16, 48, 56]
# make_yolo_dataset('test_org_', base_img_dir, base_txt_dir, train_loader, filters=None)
# make_yolo_dataset('test_fil1_', base_img_dir, base_txt_dir, train_loader, filters=filters)
# make_yolo_dataset('1N', base_img_dir, base_txt_dir, train_loader)
# make_yolo_dataset('2N', base_img_dir, base_txt_dir, train_loader)
# make_yolo_dataset('3N', base_img_dir, base_txt_dir, train_loader)
# make_yolo_dataset('4N', base_img_dir, base_txt_dir, train_loader)
# make_yolo_dataset('5N', base_img_dir, base_txt_dir, train_loader)
# make_yolo_dataset('6N', base_img_dir, base_txt_dir, train_loader)
# make_yolo_dataset('7N', base_img_dir, base_txt_dir, train_loader)
# make_yolo_dataset('8N', base_img_dir, base_txt_dir, train_loader)
# make_yolo_dataset('9N', base_img_dir, base_txt_dir, train_loader)

In [None]:
# make_yolo_dataset('test_fil2_', base_img_dir, base_txt_dir, train_loader, filters=filters)
# make_yolo_dataset('test_fil3_', base_img_dir, base_txt_dir, train_loader, filters=filters)
# make_yolo_dataset('test_fil4_', base_img_dir, base_txt_dir, train_loader, filters=filters)
# make_yolo_dataset('test_fil5_', base_img_dir, base_txt_dir, train_loader, filters=filters)
# make_yolo_dataset('test_fil6_', base_img_dir, base_txt_dir, train_loader, filters=filters)
# make_yolo_dataset('test_fil7_', base_img_dir, base_txt_dir, train_loader, filters=filters)
# make_yolo_dataset('test_fil8_', base_img_dir, base_txt_dir, train_loader, filters=filters)
# make_yolo_dataset('test_fil9_', base_img_dir, base_txt_dir, train_loader, filters=filters)
# make_yolo_dataset('test_fil10_', base_img_dir, base_txt_dir, train_loader, filters=filters)
# make_yolo_dataset('test_fil11_', base_img_dir, base_txt_dir, train_loader, filters=filters)
# make_yolo_dataset('test_fil12_', base_img_dir, base_txt_dir, train_loader, filters=filters)
# make_yolo_dataset('test_fil13_', base_img_dir, base_txt_dir, train_loader, filters=filters)

# make_yolo_dataset('test_fil20_', base_img_dir, base_txt_dir, train_loader, filters=filters)
# make_yolo_dataset('test_fil21_', base_img_dir, base_txt_dir, train_loader, filters=filters)
# make_yolo_dataset('test_fil22_', base_img_dir, base_txt_dir, train_loader, filters=filters)
# make_yolo_dataset('test_fil23_', base_img_dir, base_txt_dir, train_loader, filters=filters)

# make_yolo_dataset('test_fil30_', base_img_dir, base_txt_dir, train_loader, filters=filters)
# make_yolo_dataset('test_fil31_', base_img_dir, base_txt_dir, train_loader, filters=filters)
# make_yolo_dataset('test_fil32_', base_img_dir, base_txt_dir, train_loader, filters=filters)
# make_yolo_dataset('test_fil33_', base_img_dir, base_txt_dir, train_loader, filters=filters)

# make_yolo_dataset('test_fil40_', base_img_dir, base_txt_dir, train_loader, filters=filters)
# make_yolo_dataset('test_fil41_', base_img_dir, base_txt_dir, train_loader, filters=filters)
# make_yolo_dataset('test_fil42_', base_img_dir, base_txt_dir, train_loader, filters=filters)
# make_yolo_dataset('test_fil43_', base_img_dir, base_txt_dir, train_loader, filters=filters)

# make_yolo_dataset('test_fil50_', base_img_dir, base_txt_dir, train_loader, filters=filters)
# make_yolo_dataset('test_fil51_', base_img_dir, base_txt_dir, train_loader, filters=filters)
# make_yolo_dataset('test_fil52_', base_img_dir, base_txt_dir, train_loader, filters=filters)
# make_yolo_dataset('test_fil53_', base_img_dir, base_txt_dir, train_loader, filters=filters)

# make_yolo_dataset('test_fil60_', base_img_dir, base_txt_dir, train_loader, filters=filters)
# make_yolo_dataset('test_fil61_', base_img_dir, base_txt_dir, train_loader, filters=filters)
# make_yolo_dataset('test_fil62_', base_img_dir, base_txt_dir, train_loader, filters=filters)
# make_yolo_dataset('test_fil63_', base_img_dir, base_txt_dir, train_loader, filters=filters)

# make_yolo_dataset('test_fil70_', base_img_dir, base_txt_dir, train_loader, filters=filters)
# make_yolo_dataset('test_fil71_', base_img_dir, base_txt_dir, train_loader, filters=filters)
# make_yolo_dataset('test_fil72_', base_img_dir, base_txt_dir, train_loader, filters=filters)
# make_yolo_dataset('test_fil73_', base_img_dir, base_txt_dir, train_loader, filters=filters)

# make_yolo_dataset('test_fil80_', base_img_dir, base_txt_dir, train_loader, filters=filters)
# make_yolo_dataset('test_fil81_', base_img_dir, base_txt_dir, train_loader, filters=filters)
# make_yolo_dataset('test_fil82_', base_img_dir, base_txt_dir, train_loader, filters=filters)
# make_yolo_dataset('test_fil83_', base_img_dir, base_txt_dir, train_loader, filters=filters)

# make_yolo_dataset('test_fil90_', base_img_dir, base_txt_dir, train_loader, filters=filters)
# make_yolo_dataset('test_fil91_', base_img_dir, base_txt_dir, train_loader, filters=filters)
# make_yolo_dataset('test_fil92_', base_img_dir, base_txt_dir, train_loader, filters=filters)
# make_yolo_dataset('test_fil93_', base_img_dir, base_txt_dir, train_loader, filters=filters)

make_yolo_dataset('test_filcof11_', base_img_dir, base_txt_dir, train_loader, filters=filters)
make_yolo_dataset('test_filcof12_', base_img_dir, base_txt_dir, train_loader, filters=filters)
make_yolo_dataset('test_filcof13_', base_img_dir, base_txt_dir, train_loader, filters=filters)
make_yolo_dataset('test_filcof14_', base_img_dir, base_txt_dir, train_loader, filters=filters)
make_yolo_dataset('test_filcof15_', base_img_dir, base_txt_dir, train_loader, filters=filters)

make_yolo_dataset('test_filcof16_', base_img_dir, base_txt_dir, train_loader, filters=filters)
make_yolo_dataset('test_filcof17_', base_img_dir, base_txt_dir, train_loader, filters=filters)
make_yolo_dataset('test_filcof18_', base_img_dir, base_txt_dir, train_loader, filters=filters)
make_yolo_dataset('test_filcof19_', base_img_dir, base_txt_dir, train_loader, filters=filters)

# make_yolo_dataset('10', base_img_dir, base_txt_dir, train_loader)
# make_yolo_dataset('11N', base_img_dir, base_txt_dir, train_loader)
# make_yolo_dataset('12N', base_img_dir, base_txt_dir, train_loader)
# make_yolo_dataset('13N', base_img_dir, base_txt_dir, train_loader)
# make_yolo_dataset('14N', base_img_dir, base_txt_dir, train_loader)
# make_yolo_dataset('15N', base_img_dir, base_txt_dir, train_loader)
# make_yolo_dataset('16N', base_img_dir, base_txt_dir, train_loader)
# make_yolo_dataset('17N', base_img_dir, base_txt_dir, train_loader)
# make_yolo_dataset('18N', base_img_dir, base_txt_dir, train_loader)
# make_yolo_dataset('19N', base_img_dir, base_txt_dir, train_loader)

In [None]:
#check YOLO Dataset
import matplotlib.pyplot as plt
import os
import pandas as pd

path = '/opt/ml/DATA_YOLON3/labels/train'
path_img = '/opt/ml/DATA_YOLON3/images/train'
# path = '/opt/ml/DATA_NEW/train/labels'
# path_img = '/opt/ml/DATA_NEW/valid/images'
filenames = os.listdir(path)

# print(filenames)
lst = []
for filename in filenames:
    # fn, ext = filename.split('.')
    # # print(fn, ext)
    # if ext == 'txt':
    # print(filename)
    if '.ipynb_checkpoints' != filename:
        with open(path + '/' + filename, 'r') as f:
            ls = f.readlines()
            for arr in ls:
                for e in arr.split(' '):
                    lst.append(int(e))
                    break
                    
x = pd.Series(lst)
ls = x.value_counts()
# print(ls)

dict = {k:0 for k in range(65)}
# print(dict)

for l in lst:
    dict[l] += 1
    
# print(dict)
plt.plot(dict.values())

keys = []
for key, val in dict.items():
    if val < 1000:
        print(key, val)
        if key not in [0, 64]:
            keys.append(key)

print(keys)        
# x.hist()
# import matplotlib.pyplot as plt
# # plt.figure(figsize=(15,10))
# plt.hist(lst, bins=65)

In [None]:
print(len(val_lst))

In [None]:
# val_dataset = BrailleDataset(file_list=train_lst, mode='train', transform=None)
val_dataset = BrailleDataset(file_list=val_lst, mode='val', transform=None)
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=0, collate_fn=collate_fn) #False)

base_img_dir = '/opt/ml/DATA_YOLON2/images/val2'
base_txt_dir = '/opt/ml/DATA_YOLON2/labels/val2'

# make_yolo_dataset('', base_img_dir, base_txt_dir, val_loader)
make_yolo_dataset('test_', base_img_dir, base_txt_dir, val_loader, filters=None)
# filters=[4, 12, 20, 28, 32, 35, 39, 41, 44, 57, 59, 61]
# make_yolo_dataset('test_', base_img_dir, base_txt_dir, val_loader, filters=filters)

In [None]:
import torch
import torchvision
# from torchvision.models.detection import FasterRCNN
# from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
# from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor

num_classes = 65 # class 개수= 10 + background

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
# model = torchvision.models.detection.keypointrcnn_resnet50_fpn(pretrained=True)
# in_features = model.roi_heads.box_predictor.cls_score.in_features
# model.roi_heads.box_predictor = torchvision.models.detection.faster_rcnn.FastRCNNPredictor(in_features, num_classes)


print(torchvision.__version__)

model = torchvision.models.detection.retinanet_resnet50_fpn(num_classes = 65, pretrained=False, pretrained_backbone = True)
# model = torchvision.models.detection.retinanet_resnet50_fpn_v2(num_classes = 65, pretrained=False, pretrained_backbone = True)

#version update 0.8.1 -> 0.14
#pip install --upgrade torchvision==0.14
#retinanet_resnet50_fpn_v2
#fcos_resnet50_fpn 

model.to(device)

In [None]:
class Averager:
    def __init__(self):
        self.current_total = 0.0
        self.iterations = 0.0

    def send(self, value):
        self.current_total += value
        self.iterations += 1

    @property
    def value(self):
        if self.iterations == 0:
            return 0
        else:
            return 1.0 * self.current_total / self.iterations

    def reset(self):
        self.current_total = 0.0
        self.iterations = 0.0

# def collate_fn(batch):
#     return tuple(zip(*batch))

#https://github.com/IlyaOvodov/pytorch-retinanet/blob/af75970bac9faec8fedb37a21868bd308b5e9488/train.py
# import loss
from loss import FocalLoss
criterion = FocalLoss(num_classes=65)

t = torch.tensor([1, 2, 3])
tt = []
tt.append(t)
# tt.append(t)

l = torch.tensor([1, 2, 3])
ll = []
ll.append(l)
# ll.append(l)

loc_preds = torch.stack(tt)
loc_targets = torch.stack(tt)
cls_preds = torch.stack(ll)
cls_targets = torch.stack(ll)

print(loc_targets.shape, loc_preds.shape)
print(cls_targets.shape, cls_preds.shape)

# loss = criterion(loc_preds, loc_targets, cls_preds, cls_targets)
# print(loss)
        # loss.backward()
        # optimizer.step()

In [None]:
from tqdm import tqdm
import os
import numpy as np
import wandb

def train_fn(num_epochs, train_data_loader, val_data_loader, optimizer, model, device, lr, batch_size):

    wandb.init(project="braille", entity="zergswim_proj")    
    wandb.config = {
      "learning_rate": lr,
      "epochs": num_epochs,
      "batch_size": batch_size
    }    
    
    best_loss = 1000
    loss_hist = Averager()
    val_hist = Averager()
    for epoch in range(num_epochs):
        loss_hist.reset()

        model.train()
        # for images, targets, image_ids in tqdm(train_data_loader):
        for images, targets in tqdm(train_data_loader):

            # gpu 계산을 위해 image.to(device)
            images = list(image.float().to(device) for image in images)
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
            # targets = list(targets)
            # print(targets)

            # calculate loss
            # print(images[0].shape, targets[0]['boxes'], targets[0]['labels'])
            # print(targets)
            for idx, t in enumerate(targets):
                if len(t['boxes']) == 0:
                    targets[idx]['boxes'] = torch.tensor([[0.,0.,1.,1.]]).float().to(device)
                    targets[idx]['labels'] = torch.tensor([0]).to(device)
            
            # print(targets)
            loss_dict = model(images, targets)
            # torchvision.ops.sigmoid_focal_loss

            # print(type(loss_dict), len(loss_dict))
            # print(loss_dict)

            losses = sum(loss for loss in loss_dict.values())
            loss_value = losses.item()

            loss_hist.send(loss_value)

            # backward
            optimizer.zero_grad()
            losses.backward()
            optimizer.step()

        print(f"Train Epoch #{epoch+1} loss: {loss_hist.value}")

        with torch.no_grad():
            val_hist.reset()            
            # model.eval()
            for images, targets in tqdm(val_data_loader):

                images = list(image.float().to(device) for image in images)
                targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
                loss_dict = model(images, targets)

                losses = sum(loss for loss in loss_dict.values())
                loss_value = losses.item()

                val_hist.send(loss_value)

                # backward
                # optimizer.zero_grad()
                # losses.backward()
                # optimizer.step()

            print(f"Val. Epoch #{epoch+1} loss: {val_hist.value}")                

            wandb.log({"Epoch": epoch, 
                       "Loss": loss_hist.value,
                       "Val. Loss": val_hist.value,
                      })            
            
#                 loc_preds, loc_targets, cls_preds, cls_targets = [], [], [], []
                
#                 # print(targets[0]['boxes'])
                
#                 for idx, tar in enumerate(targets):
#                     tar_box = tar['boxes']
#                     tar_lbl = tar['labels'] #torch.nn.functional.one_hot(tar['labels'], num_classes=65)
#                     loc_targets.append(tar_box)
#                     cls_targets.append(tar_lbl)

#                     if len(outputs)-1 > idx:
#                         out_box = outputs[idx]['boxes']
#                         out_lbl = outputs[idx]['labels'] #torch.nn.functional.one_hot(outputs[idx]['labels'], num_classes=65)
#                         loc_preds.append(out_box)
#                         cls_preds.append(out_lbl)
#                     else:
#                         zero_boxes = torch.zeros_like(tar_box)
#                         zero_labels = torch.zeros_like(tar_lbl)
#                         loc_preds.append(zero_boxes)
#                         cls_preds.append(zero_labels)

#                 print(len(loc_targets), len(loc_preds), len(cls_targets), len(cls_preds))
                
#                 for l in loc_preds:
#                     print(l.shape)
                        
#                 loc_preds = torch.stack(loc_preds)
#                 loc_targets = torch.stack(loc_targets)
#                 cls_preds = torch.stack(cls_preds)
#                 cls_targets = torch.stack(cls_targets)
                
#                 print(loc_targets.shape, loc_preds.shape)
#                 print(cls_targets.shape, cls_preds.shape)

#                 loss = criterion(loc_preds, loc_targets, cls_preds, cls_targets)
#                 print(loss)
                
#                 # loss = torchvision.ops.sigmoid_focal_loss(output, targets)
#                 # print(loss)
#                 val_hist.update(loss.detach().item(), batch_size)
        
#             print(f"Val. Epoch #{epoch+1} loss: {val_hist.value}")

#         if val_hist.value < best_loss:
#             save_path = f'./save/retina_v2_e{epoch+1}.pth'
#             save_dir = os.path.dirname(save_path)
#             if not os.path.exists(save_dir):
#                 os.makedirs(save_dir)
            
#             torch.save(model.state_dict(), save_path)
#             best_loss = val_hist.value
#             print('file saved', val_hist.value, '(best:', best_loss,')')  

num_epochs = 100
batch_size = 16
lr = 0.00001
train_dataset = BrailleDataset(file_list=train_lst, mode='train', transform=None)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2, collate_fn=collate_fn) #False)

val_dataset = BrailleDataset(file_list=val_lst, mode='val', transform=None)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2, collate_fn=collate_fn) #False)

params = [p for p in model.parameters() if p.requires_grad]

# optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
optimizer = torch.optim.AdamW(params, lr=lr)

checkpoint = torch.load('./save/retina_v1_base.pth', map_location=device)
# state_dict = checkpoint.state_dict()
model.load_state_dict(checkpoint)
print("model loaded")

train_fn(num_epochs, train_loader, val_loader, optimizer, model, device, lr, batch_size=batch_size)

In [None]:
# save_path = f'./save/retina_v1_base.pth'
# torch.save(model.state_dict(), save_path)
# print('file saved')  

In [None]:
# model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
# # For training
# images, boxes = torch.rand(4, 3, 600, 1200), torch.rand(4, 11, 4)
# boxes[:, :, 2:4] = boxes[:, :, 0:2] + boxes[:, :, 2:4]
# labels = torch.randint(1, 91, (4, 11))
# images = list(image for image in images)
# targets = []
# for i in range(len(images)):
#     d = {}
#     d['boxes'] = boxes[i]
#     d['labels'] = labels[i]
#     targets.append(d)
    
# # print(type(targets), targets[0]['boxes'], targets[0]['labels'])#, targets['boxes'])
# # print(images[0].shape, targets)
# output = model(images, targets)
# print(output)
# # >>> # For inference
# # # >>> model.eval()
# # # >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
# # # >>> predictions = model(x)
# # # >>>
# # # >>> # optionally, if you want to export the model to ONNX:
# # # >>> torch.onnx.export(model, x, "faster_rcnn.onnx", opset_version = 11)

In [None]:

def show_result(image, limit = 0.5):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
    image /= 255.0
    image = torch.tensor(image).permute(2,0,1)
    image = image.unsqueeze(dim=0)
    # image = image.reshape(1,3,1376,1024)
    image = image.float().to(device)
    # print(image.shape)

    model.eval()
    output = model(image)
    # print(output)

    img = image.squeeze().permute(1,2,0).cpu()
    img = np.array(img)
    # print(img.shape)

    for idx, b in enumerate(output[0]['boxes']):
        box = b.cpu().detach().numpy()
        x1 = box[0]
        y1 = box[1]
        x2 = box[2]
        y2 = box[3]
        lbl = output[0]['labels'][idx].cpu().detach().numpy()
        scr = output[0]['scores'][idx].cpu().detach().numpy()
        # print(x1, y1, x2, y2, lbl, scr)
        if scr > limit:
            img = cv2.rectangle(img, (int(x1), int(y1)), (int(x2), int(y2)), (0,0,255), 1)
            img = cv2.putText(img, str(lbl) , (int(x1), int(y1)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,255), 1)

    plt.figure(figsize=(10,20))
    plt.imshow(img)
    
image = plt.imread("./books/chudo_derevo_redmi/IMG_20190715_112936.labeled.jpg")
# image = plt.imread("./books/chudo_derevo_redmi/IMG_20190715_112912.labeled.jpg")
print(type(image))
show_result(image, limit=0.5)

In [None]:
# image = plt.imread("/opt/ml/DSBI/data/The Second Volume of Ninth Grade Chinese Book 1/SVNGCB1+9.jpg")
image = plt.imread("/opt/ml/DSBI/data/Fundamentals of Massage/FM+1.jpg")
# image = plt.imread("Korean_Braille.jpg")
# image = plt.imread("test.jpg")
# image = cv2.resize(image, (512, 512))

show_result(image, limit=0.7)

In [None]:
image = plt.imread("Korean_Braille.jpg")
# image = plt.imread("test.jpg")
# image = cv2.resize(image, (512, 512))

show_result(image, limit=0.7)

In [None]:
img_path = 'https://test.narangdesign.com/mail/kbuwel/202011/images/news_01_img1.jpg'
from PIL import Image
import requests
from io import BytesIO
response = requests.get(img_path)
raw_img = Image.open(BytesIO(response.content))
# plt.imshow(raw_img)

image = np.array(raw_img)
image = cv2.resize(image, (1024, 1024))

show_result(image, limit=0.7)

In [None]:
# val_dataset = BrailleDataset(file_list=val_lst, mode='val', transform=None)
# val_data_loader = DataLoader(val_dataset, batch_size=1, shuffle=False, num_workers=1, collate_fn=collate_fn) #False)

# model.eval()
# # val_hist = Averager()
# with torch.no_grad():
#     # val_hist.reset()            
#     for images, targets in tqdm(val_data_loader):

#         images = list(image.float().to(device) for image in images)
#         targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
#         loss_dict = model(images, targets)
        
#         print(loss_dict)
#         break



# #         losses = sum(loss for loss in loss_dict.values())
# #         loss_value = losses.item()

# #         val_hist.send(loss_value)

#         # backward
#         # optimizer.zero_grad()
#         # losses.backward()
#         # optimizer.step()

#     # print(f"Val. Epoch #{epoch+1} loss: {val_hist.value}")  

In [None]:
# ucd = '\u2801'
# ucd2 = '\u2803'
# print(ucd, ord(ucd))
# print(ucd2, ord(ucd2))

bind = []
for i in range(1, 64):
    ucd = chr(10240+i)
    str = '{0:2}'.format(i)
    bind.append((str, ucd))
    if i%10==0:
        print(bind)
        bind = []
print(bind)        

In [None]:
c = bin(2800+1).encode('utf-8')
print(c)
print(c.decode('utf-8'))

In [None]:
s = "2진수: {0:#b}, 8진수: {0:#o}, 10진수: {0:#d}, 16진수: {0:#x}".format(60)
print(s)

In [None]:
# unichr('1')

In [None]:
from PIL import ImageFont, ImageDraw, Image
import cv2
import numpy as np

# Create black mask using Numpy and convert from BGR (OpenCV) to RGB (PIL)
# image = cv2.imread('1.png') # If you were using an actual image
image = np.zeros((500, 500, 3), dtype=np.uint8)
# image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
pil_image = Image.fromarray(image)

# Draw non-ascii text onto image
font = ImageFont.truetype("/usr/share/fonts/truetype/dejavu/DejaVuSerif-Bold.ttf", 20)
draw = ImageDraw.Draw(pil_image)
draw.text((100, 10), chr(10240+63), font=font)
plt.imshow(pil_image)

In [None]:
!fc-list 

In [None]:
!nvidia-smi