In [7]:
from bs4 import BeautifulSoup
import requests
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import time
import io
import cv2
import textwrap
import matplotlib.patches as patches
import math
import os
from pathlib import Path
from datetime import datetime
import json
import glob

seed = 42
np.random.seed(seed)

In [8]:
img_dir_path = Path("output", "images")
lbl_dir_path = Path("output", "labels")
jsn_dir_path = Path("output")
today = f'{(int(datetime.now().strftime("%m%d")) - int("1001")):04}'
imgfn_prefix = f"image_{today}_"
jsnfn_prefix = f"label_{today}"
startno = 0
endno = 1e8
wiki_maxnum = 100

datanum = 150
trainnum = int(datanum*0.7)
output_dir_path = Path("dataset_yolo")

In [9]:
def print_img(img, cvf = False):
    if cvf:
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 
        # img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    plt.figure(figsize=(10, 10))#figsize=(5,5))
    plt.imshow(img)

In [10]:
def checkImg(file_path):
    pass

def trans(pts1):
    res = []
    w, h = 640, 480
    for p in pts1:
        newp = [p[0], h-p[1]]
        res.append(newp)
    return np.float32(res)

def get_filenames(img_dir_path):
    res = glob.glob(str(img_dir_path / f"{imgfn_prefix}*.png"))
    return res

def read_json(json_file_path):  
    with open(json_file_path, 'r') as f:  
        data = json.load(f)  
    return data 

def calculate_distance(point1, point2):  
    return math.sqrt((point2[0] - point1[0])**2 + (point2[1] - point1[1])**2) 

def morf(img):
    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
    res = cv2.morphologyEx(img, cv2.MORPH_CLOSE, kernel, iterations=2)

    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
    res = cv2.morphologyEx(res, cv2.MORPH_OPEN, kernel, iterations=2)
    # kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
    # res = cv2.morphologyEx(dst, cv2.MORPH_CLOSE, kernel, iterations=2)
    return res

def preprocessing_image(img):
    gray_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    denoised_image = cv2.bilateralFilter(gray_image, 9, 75, 75)
    im_edges = cv2.Canny(denoised_image, 20, 10, L2gradient=True)
    mimg = morf(im_edges)
    return mimg

def read_labels(imfn):
    fn = Path(lbl_dir_path, Path(imfn).name[:-4] + ".txt")
    print("fn", fn)
    res = []
    wh = [640, 480, 640, 480]
    with open(fn, 'r') as f:
        for data in f.readlines():
            sd = [int(float(f) * wh[i]) for i, f in enumerate(data[:-2].split(" ")[1:])]
            s = [sd[0] - sd[2]/2, sd[1] + sd[3]/2, sd[0] + sd[2]/2, sd[1] - sd[3]/2]
            s = [int(ss) for ss in s]
            res.append(s)
            
    return res

def add_rect(img, fn):
    labelsdata = read_labels(fn)
    for l in labelsdata:
        ptt1 = (l[0], l[1])
        ptt2 = (l[2], l[3])
        cv2.rectangle(img, ptt1, ptt2, (255, 0, 255))
    return img

def main():
    print(cv2.__version__)
    filenames = get_filenames(img_dir_path)
    width, height = 640, 480
    for i in range(datanum):
        checkFlag = (i >= startno and i < endno)
        if not checkFlag:
            continue

        
        img =  cv2.imread(filenames[i])
        img = cv2.resize(img, (width, height))
        ppimg = preprocessing_image(img)

        # print_img(img, True)
        # print_img(ppimg, True)

        # ppimg = add_rect(ppimg, filenames[i])
        # print_img(ppimg)
        if i < trainnum:
            pt = output_dir_path / "train" / "images" / Path(filenames[i]).name
        else:
            pt = output_dir_path / "val" / "images" / Path(filenames[i]).name
        

        print("pt", pt)
        cv2.imwrite(pt, ppimg)

    # cv2.destroyAllWindows() 

main()


4.10.0
pt dataset_yolo\train\images\image_0016_0000000000.png
pt dataset_yolo\train\images\image_0016_0000000001.png
pt dataset_yolo\train\images\image_0016_0000000002.png
pt dataset_yolo\train\images\image_0016_0000000003.png
pt dataset_yolo\train\images\image_0016_0000000004.png
pt dataset_yolo\train\images\image_0016_0000000005.png
pt dataset_yolo\train\images\image_0016_0000000006.png
pt dataset_yolo\train\images\image_0016_0000000007.png
pt dataset_yolo\train\images\image_0016_0000000008.png
pt dataset_yolo\train\images\image_0016_0000000009.png
pt dataset_yolo\train\images\image_0016_0000000010.png
pt dataset_yolo\train\images\image_0016_0000000011.png
pt dataset_yolo\train\images\image_0016_0000000012.png
pt dataset_yolo\train\images\image_0016_0000000013.png
pt dataset_yolo\train\images\image_0016_0000000014.png
pt dataset_yolo\train\images\image_0016_0000000015.png
pt dataset_yolo\train\images\image_0016_0000000016.png
pt dataset_yolo\train\images\image_0016_0000000017.png
pt 