## 이미지 자르기 

In [None]:
import json
from glob import glob
import os
import matplotlib.pyplot as plt
from tqdm import tqdm_notebook
from PIL import Image
import numpy as np 

#json
def crop_frm_json(jsonpath, imgpath, dst) :
    
    assert jsonpath.endswith('.json')
    os.makedirs(dst, exist_ok=True)
    file_err = []
    files = glob(jsonpath)
    for file in tqdm_notebook(files) :
        with open(file, encoding='utf-8-sig') as f :
            dat = json.load(f)

            
            imgfile = os.path.join(imgpath, os.path.basename(file.replace('json','jpg')))

            if not os.path.isfile(imgfile) :
                imgfile = os.path.join(imgpath, os.path.basename(file.replace('json', 'tif')))
                img = Image.open(imgfile)
                img = np.asarray(img.convert('RGB'))/255.
                
                
                ext = '.tif'
                if not os.path.isfile(imgfile) :
                    print(file)

                    pass
            else :
                img = plt.imread(imgfile)/255.
                ext = '.jpg'

            for i, d in enumerate(dat['shapes']) :
                # @@@ 부분만 자를 수 있게
                if d['label'].startswith('@@@') :
                    text = d['label'][3:]
                    x_ls, y_ls = [],[]
                    for y,x in d['points'] :
                        x_ls.append(x)
                        y_ls.append(y)

                    x_min = max(0, int(min(x_ls)))
                    x_max = min(img.shape[0] ,int(max(x_ls)))
                    y_min = max(0, int(min(y_ls)))
                    y_max = min(img.shape[1] ,int(max(y_ls)))
                    
                    if len(img.shape)== 2 :
                        crop_img = img[x_min:x_max, y_min:y_max]
                    else :    
                        crop_img = img[x_min:x_max, y_min:y_max, :]
                    try :
                        if os.path.isfile(os.path.join(dst ,os.path.basename(imgfile).split('.')[0]+'-{}'.format(str(i)+'.jpg'))) :
                            continue
                        plt.imsave( os.path.join(dst ,os.path.basename(imgfile).split('.')[0]+'-{}.jpg'.format(str(i))),  crop_img   )
                    except :
                        file_err.append(file)
                        continue
                    
                    new_filename = os.path.join(os.path.abspath(dst), os.path.basename(file).split('.')[0]+'-{}.txt'.format(str(i)))
                    
                    if os.path.isfile(new_filename) : continue
                    else :
                        with open(new_filename, 'w') as f :
                            f.write( '\t'.join([new_filename, text]) )
                            
    return file_err

In [None]:
# txt 
def crop_frm_txt(txtpath, imgpath, dst) :
    
    assert txtpath.endswith('.txt')
    os.makedirs(dst, exist_ok=True)
    file_err = []
    files = glob(txtpath)
    for file in tqdm_notebook(files) :
        
        imgfile = os.path.join(imgpath, os.path.basename(file.replace('txt','jpg')))

        if not os.path.isfile(imgfile) :
            imgfile = os.path.join(imgpath, os.path.basename(file.replace('txt', 'tif')))
            img = Image.open(imgfile)
            img = np.asarray(img.convert('RGB'))/255.


            ext = '.tif'
            if not os.path.isfile(imgfile) :
                print(file)

                pass

        else :
            img = plt.imread(imgfile)/255.
            ext = '.jpg'
        
        with open(file, encoding='utf-8-sig') as f :
            data = f.read().split('\n')
            for i,d in enumerate(data) :
                coord, text = d.split('\t')[:7], d.split('\t')[-1]
                if text.startswith('@@@') :
                    text = text[3:]
                if len(coord) < 7 :
                    continue
                   
                x_max = int(coord[4])
                x_min = int(coord[0])
                y_max = int(coord[3])
                y_min = int(coord[1])
                
            
            
                if len(img.shape)== 2 :
                    crop_img = img[x_min:x_max, y_min:y_max]
                else :    
                    crop_img = img[x_min:x_max, y_min:y_max, :]
                try :
                    if os.path.isfile(os.path.join(dst ,os.path.basename(imgfile).split('.')[0]+'-{}'.format(str(i)+'.jpg'))) :
                        continue
                    plt.imsave( os.path.join(dst ,os.path.basename(imgfile).split('.')[0]+'-{}.jpg'.format(str(i))),  crop_img   )
                except :
                    file_err.append(file)
                    continue

                new_filename = os.path.join(os.path.abspath(dst), os.path.basename(file).split('.')[0]+'-{}.txt'.format(str(i)))

                if os.path.isfile(new_filename) : continue
                else :
                    with open(new_filename, 'w') as f :
                        f.write( '\t'.join([new_filename, text]) )
                            
    return file_err


## 특수문자

In [1]:
# 특수문자 해당 파일 찾기
import re

def find_file_w_spc(files) :
    file_spc = []
    special_chr = re.compile('\([0-9]+\)')
    for file in files :
        with open(file) as f :
            data = f.read().split('\t')[1]
            result = [h for h in special_chr.findall(data)]
            if result :
                file_spc.append(file)
            
    return file_spc


In [None]:
# 특수문자로 annotation 변경 후 저장
def rewrite_txt_spc(files) :
    
    
    spc_dict = {'('+str(i+1)+')':spc for i, spc in enumerate('①②③④⑤⑥⑦⑧⑨⑩⑪⑫⑬⑭⑮⑯⑰⑱⑲⑳') }
    
    
    for file in files :
        with open(file) as f :
            filename, txt = f.read().split('\t')
            result = txt
            for k,v in spc_dict.items() :
                if k in result :

                    result = result.replace(k, v)
        with open(file, 'w', encoding='utf-8') as f :
            f.write('\t'.join([filename, result]))
        