In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

$$\lim_{a\to\frac{\pi}{4}}\frac{\frac{d}{da}\left(\sin{a}+-6\sec{a}\right)}{\frac{d}{da}\left(a+-4\frac{\pi}{4}\right)}$$

In [None]:
%cd gdrive/MyDrive/faster

In [None]:
import pandas as pd
import numpy as np
import cv2
import os
import re
import ast
import random
import datetime
import pickle
import time
import errno
import math
import sys

from PIL import Image

import matplotlib.pyplot as pp
import albumentations as A
import transforms as T
from collections import defaultdict, deque
from albumentations.pytorch.transforms import ToTensor
from google.colab.patches import cv2_imshow

import torch
import torchvision
import torch.distributed as dist

from torchvision.transforms import functional as F
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
import torchvision.models.detection.mask_rcnn
from torchvision.models.detection.rpn import AnchorGenerator

from torch.utils.data import DataLoader, Dataset
from torch.utils.data.sampler import SequentialSampler

import matplotlib.pyplot as plt
import matplotlib.patches as patches

from tqdm import tqdm

from engine import train_one_epoch, evaluate
import utils

In [None]:
os.chdir('/content/gdrive/MyDrive/faster')
try:
    os.remove('result.csv')
except:
    pass

In [None]:
DIR_INPUT = '/content/gdrive/MyDrive/ocr-data/ocr-data/'

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
train_df = pd.read_csv(f'{DIR_INPUT}batch_1/JSON/csv_1.csv')

In [None]:
def make_float(df):
    df['x'] = df['x'].astype(np.float)
    df['y'] = df['y'].astype(np.float)
    df['w'] = df['w'].astype(np.float)
    df['h'] = df['h'].astype(np.float)
    return df

In [None]:
train_df = make_float(train_df)

In [None]:
visible_latex_chars = train_df['visible_latex_chars'].unique()
visible_char_map = train_df['visible_char_map'].unique()

char_to_map = {}
map_to_char = {}

first_iter = 0
for each_latex_chars in visible_latex_chars:
    each_latex_chars = ast.literal_eval(each_latex_chars)
    each_latex_map = ast.literal_eval(visible_char_map[first_iter])
    for second_iter in range(len(each_latex_chars)):
        char_to_map[each_latex_chars[second_iter]] = each_latex_map[second_iter]
        map_to_char[each_latex_map[second_iter]] = each_latex_chars[second_iter]
    first_iter += 1

In [None]:
class ExpressionDataset(Dataset):

    def __init__(self, dataframe, image_dir, transforms=None):
        super().__init__()
        
        self.df = dataframe
        self.filenames = dataframe['filename'].unique()
        self.image_dir = image_dir
        self.transforms = transforms

    def __getitem__(self, index: int):

        filename = self.filenames[index]
        records = self.df[self.df['filename'] == filename]
        maps = ast.literal_eval(records['visible_char_map'].unique()[0])

        image = cv2.imread(f'{self.image_dir}/batch_1/background_images/{filename}', cv2.COLOR_BGR2GRAY)
        image = image.astype(np.float32)
        image /= 255.0
        image = np.around(image)
        image *= 255.0
        image /= 255.0

        #image = cv2.imread(f'{self.image_dir}/batch_1/background_images/{filename}', cv2.IMREAD_COLOR)
        #image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        #image /= 255.0

        boxes = records[['x', 'y', 'w', 'h']].values
        boxes[:, 2] = boxes[:, 0] + boxes[:, 2]
        boxes[:, 3] = boxes[:, 1] + boxes[:, 3]

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
        area = torch.as_tensor(area, dtype=torch.float32)

        labels = torch.tensor(maps,dtype = torch.int64)
        
        iscrowd = torch.zeros((records.shape[0],), dtype=torch.int64)
        
        target = {}
        target['boxes'] = boxes
        target['labels'] = labels
        # target['masks'] = None
        target['image_id'] = torch.tensor([index])
        target['area'] = area
        target['iscrowd'] = iscrowd

        if self.transforms is not None:
            image, target = self.transforms(image, target)

        return image, target

    def __len__(self) -> int:
        return self.filenames.shape[0]

In [None]:
def get_transform(train):
    transforms = []
    transforms.append(T.ToTensor())
    #if train:
        # (역자주: 학습시 50% 확률로 학습 영상을 좌우 반전 변환합니다)
        #transforms.append(T.RandomHorizontalFlip(0.5))
    return T.Compose(transforms)

In [None]:
dataset = ExpressionDataset(train_df,DIR_INPUT,get_transform(train=True))
dataset_test = ExpressionDataset(train_df,DIR_INPUT,get_transform(train=False))

indices = torch.randperm(len(dataset)).tolist()
dataset = torch.utils.data.Subset(dataset, indices[:-2000])
dataset_test = torch.utils.data.Subset(dataset_test, indices[-2000:])

data_loader = torch.utils.data.DataLoader(
        dataset, batch_size=1, shuffle=True, num_workers=4,
        collate_fn=utils.collate_fn)

data_loader_test = torch.utils.data.DataLoader(
        dataset_test, batch_size=1, shuffle=False, num_workers=4,
        collate_fn=utils.collate_fn)

In [None]:
# load a model; pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)

In [None]:
num_classes = 81  # 1 class (wheat) + background

# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features

# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

In [None]:
model.to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

num_epochs = 5

In [None]:
for epoch in range(num_epochs):
    # 1 에포크동안 학습하고, 10회 마다 출력합니다
    train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq=10)
    # 학습률을 업데이트 합니다
    lr_scheduler.step()
    # 테스트 데이터셋에서 평가를 합니다
    evaluate(model, data_loader_test, device=device)

print("That's it!")

In [None]:
torch.save(model.state_dict(), 'fasterrcnn_resnet50_fpn.pth')

In [None]:
images, targets = next(iter(data_loader_test))
images = list(image.to(device) for image in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

images = list(img.to(device) for img in images)

sample = images[0].permute(1,2,0).cpu().numpy()

model.eval()
outputs = model(images)

cpu_device = torch.device("cpu")
outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]

empty_string = ''
for x in outputs[0]['labels'].tolist():
    empty_string += map_to_char[x]

print(empty_string)

fig, ax = plt.subplots(1, 1, figsize=(16, 8))
ax.imshow(sample)

In [None]:
def format_prediction_string(boxes, scores):
    pred_strings = []
    for j in zip(scores, boxes):
        pred_strings.append("{0:.4f} {1} {2} {3} {4}".format(j[0], j[1][0], j[1][1], j[1][2], j[1][3]))

    return " ".join(pred_strings)

In [None]:
results = []
detection_threshold = 0.6
for images, image_ids in data_loader_test:

    images = list(image.to(device) for image in images)
    outputs = model(images)

    for i, image in enumerate(images):

        boxes = outputs[i]['boxes'].data.cpu().numpy()
        scores = outputs[i]['scores'].data.cpu().numpy()
        
        boxes = boxes[scores >= detection_threshold].astype(np.int32)
        scores = scores[scores >= detection_threshold]
        image_id = image_ids[i]
        
        boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
        boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
        
        result = {
            'image_id': image_id,
            'PredictionString': format_prediction_string(boxes, scores)
        }

        
        results.append(result)

In [None]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image
import numpy as np

im = sample = images[0].permute(1,2,0).cpu().numpy().astype(np.float64)

boxes = outputs[0]['boxes'].data.cpu().numpy()
scores = outputs[0]['scores'].data.cpu().numpy()

boxes = boxes[scores >= detection_threshold].astype(np.int32)
# Create figure and axes
fig,ax = plt.subplots(1)

# Display the image
ax.imshow(im)

# Create a Rectangle patch
for box in boxes:
    rect = patches.Rectangle((box[0],box[1]),box[2]-box[0],box[3]-box[1],linewidth=1,edgecolor='r',facecolor='none')
    ax.add_patch(rect)

# Add the patch to the Axes
ax.add_patch(rect)

plt.show()