In [1]:
import cv2
import numpy as np
import pandas as pd
import os
import shutil
import random
import torch
import requests
from bs4 import BeautifulSoup
from PIL import Image

git clone https://github.com/ultralytics/yolov5.git

## 데이터 스크래핑

In [47]:
query = 'pink rose'

header = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
}

url = f'https://www.google.com/search?q={query}&tbm=isch'

req = requests.get(headers=header, url=url)

In [48]:
soup = BeautifulSoup(req.text, 'html.parser')

i = int(os.listdir('./0_scrapping_img')[-1][-8:-4])+1 if os.listdir('./0_scrapping_img') else 0

for img in soup.find_all(attrs={'jsname' : 'Q4LuWd'}):
    try:
        if img.get('data-src'):
            img_url = img.get('data-src') 
        else:
            continue
        response = requests.get(img_url)
        fn = str(i).rjust(4,'0')
        with open(f'./0_scrapping_img/rose_{fn}.jpg', 'wb') as f:
            f.write(response.content)
        i += 1
    except:
        print('error')

### scrapping_img 파일 idx 정리

In [49]:
f_list = os.listdir('./0_scrapping_img/')

i = 0
for f in f_list:
    num = str(i).rjust(4, '0')
    os.rename(f'./0_scrapping_img/{f}', f'./0_scrapping_img/{f[:-8]}{num}.jpg')
    i += 1

## hsv로 바운딩박스 생성

### 색깔 정의

In [15]:
# HSV 색깔 정의표

# 빨간색
# 빨간색의 색상 범위 정의 (1)
lower_red1 = np.array([0, 100, 100])
upper_red1 = np.array([10, 255, 255])

# 빨간색의 색상 범위 정의 (2)
lower_red2 = np.array([170, 100, 100])
upper_red2 = np.array([180, 255, 255])

# 노란색
lower_yellow = np.array([15, 100, 100])
upper_yellow = np.array([35, 255, 255])

# 보라색
lower_pupple = np.array([130, 100, 100])
upper_pupple = np.array([160, 255, 255])

# 파란색
lower_blue = np.array([130, 100, 100])
upper_blue = np.array([160, 255, 255])

# 주황색
lower_orange = np.array([10, 100, 100])
upper_orange = np.array([20, 255, 255])

# 흰색 - 연분홍
lower_ypink = np.array([0, 0, 175])
upper_ypink = np.array([180, 120, 255])

# 분홍
lower_pink = np.array([150, 50, 50])
upper_pink = np.array([170, 255, 255])

def get_mask(hsv, color):
    if color == 'red':
        mask1 = cv2.inRange(hsv, lower_red1, upper_red1)
        mask2 = cv2.inRange(hsv, lower_red2, upper_red2)
        mask = cv2.add(mask1, mask2)

    elif color == 'pupple':
        mask = cv2.inRange(hsv, lower_pupple, upper_pupple)
    
    elif color == 'yellow':
        mask = cv2.inRange(hsv, lower_yellow, upper_yellow)
    
    elif color == 'blue':
        mask = cv2.inRange(hsv, lower_blue, upper_blue)
        
    elif color == 'orange':
        mask = cv2.inRange(hsv, lower_orange, upper_orange)
    
    elif color == 'pink':
        mask = cv2.inRange(hsv, lower_pink, upper_pink)
    
    elif color == 'ypink':
        mask = cv2.inRange(hsv, lower_ypink, upper_ypink)
        
    else:
        raise Exception('색깔을 정확히 입력해주렴')
    
    return mask

### 분류할 색상 정의

In [79]:
color = 'pink'

### 색상 바운딩

In [80]:
img_list = os.listdir(f'./0_scrapping_img')

for f_name in img_list[815:]:
    # 이미지 로드
    img = cv2.imread(f'./0_scrapping_img/{f_name}')

    # 이미지를 HSV 색 공간으로 변환
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)

    mask = get_mask(hsv, color)
        
    # 블러 처리
    mask = cv2.GaussianBlur(mask, (5, 5), 0)

    # 바운딩 박스 검출
    contours, hierarchy = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    wh = 0
    for cnt in contours:
        # contour의 영역을 얻어 바운딩 박스 그리기
        x, y, w, h = cv2.boundingRect(cnt)
        if w + h > wh:
            rx, ry, rw, rh = x,y,w,h
            wh = w+h
            
    img = cv2.rectangle(img, (rx, ry), (rx + rw, ry + rh), (0, 255, 0), 4)
    
    cv2.imwrite(f'./1_bounding_img/{f_name}', img)

    # # 결과 이미지 출력
    # cv2.imshow('result', img)
    # cv2.waitKey(0)
    # cv2.destroyAllWindows()

'r'

### 0과 1을 대조하여, 1에 있는 0의 원본파일을 2로 복사

In [81]:
bounding_processing_img = os.listdir('./1_bounding_img')

scrapping_img = os.listdir('./0_scrapping_img')

for spi in scrapping_img:
    if spi in bounding_processing_img:
        shutil.copy(f'./0_scrapping_img/{spi}', './2_learning_img')

for bpi in bounding_processing_img:
    os.remove(f'./1_bounding_img/{bpi}')

### 2의 idx 리셋

In [14]:
cnt = 0
for f in os.listdir('./2_learning_img/'):
    num = str(cnt).rjust(4, '0')
    os.rename(f'./2_learning_img/{f}', f'./2_learning_img/rose_{num}.jpg')
    cnt += 1

## 데이터 증강

### 증강 함수 정의

In [4]:
def adjust_hue(image):
    hue_shift = random.randint(-90, 90)
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hsv_image[:,:,0] = (hsv_image[:,:,0] + hue_shift) % 180
    return cv2.cvtColor(hsv_image, cv2.COLOR_HSV2BGR)

# image = cv2.imread('image.jpg')
# hue_adjusted_image = adjust_hue(image)
# cv2.imwrite('hue_adjusted_image.jpg', hue_adjusted_image)

In [20]:
image_list = os.listdir(f'./2_learning_img')

color = ''

test_set = {}
for i in random.sample(image_list, len(image_list)//10 *2):
    test_set[i] = True

cnt = 0
for f_name in image_list:
    img = cv2.imread(f'./2_learning_img/{f_name}')

    # 이미지를 HSV 색 공간으로 변환
    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    
    if cnt < 7:
        color = 'yellow'
    elif 7 <= cnt and cnt < 26:
        color = 'ypink'
    elif 26 == cnt:
        color = 'yellow'
    elif 27 <= cnt and cnt <374:
        color = 'red'
    elif 374 <= cnt and cnt < 466:
        color = 'yellow'
    elif 456 <= cnt and cnt < 547:
        color = 'ypink'
    elif 547 <= cnt and cnt < 553:
        color = 'orange'
    elif 553 <= cnt and cnt <= 594:
        color = 'pink'

    mask = get_mask(hsv, color)
        
    # 블러 처리
    mask = cv2.GaussianBlur(mask, (5, 5), 0)

    # 바운딩 박스 검출
    contours, hierarchy = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    wh = 0
    rx, ry, rw, rh = 0,0,0,0
    for cnto in contours:
        # contour의 영역을 얻어 바운딩 박스 그리기
        x, y, w, h = cv2.boundingRect(cnto)
        if w + h > wh:
            rx, ry, rw, rh = x,y,w,h
            wh = w+h
        
    x_center = round((rx+(rw/2)) / img.shape[1], 4)
    y_center = round((ry+(rh/2)) / img.shape[0], 4)
    if x_center > 1 or y_center > 1:
        print('아직 문제가...')
    
    W = round(rw / img.shape[1], 4)
    H = round(rh / img.shape[0], 4)
    if W > 1 or H > 1:
        print('아직 문제가 있다...')
        
    if test_set.get(f_name):
        CLS = 'test'
    else:
        CLS = 'train'
        
    adjust_col_img = adjust_hue(img)
    cv2.imwrite(f'./3_one_time_data/{CLS}/images/{f_name[:-4]}_c.jpg', adjust_col_img)
    
    cv2.imwrite(f'./3_one_time_data/{CLS}/images/{f_name}', img)
    with open(f'./3_one_time_data/{CLS}/labels/{f_name[:-4]}.txt', 'w') as f:
        f.write(f'0 {x_center} {y_center} {W} {H}')

    shutil.copy(f'./3_one_time_data/{CLS}/labels/{f_name[:-4]}.txt', f'./3_one_time_data/{CLS}/labels/{f_name[:-4]}_c.txt')
    
    cnt += 1

In [None]:
adjust_col_img = adjust_hue(img)
cv2.imwrite(f'./2_learning_img/{f_name[:-4]}_c.jpg', adjust_col_img)

## HSV 바운딩 1 클래스 학습 시작

In [12]:
for x in ['train', 'test']:
    
    li = os.listdir(f'./3_one_time_data/{x}/images')
    
    for i in li:
        
        if i[-5].isdigit():
            shutil.move(f'./3_one_time_data/{x}/images/{i}', f'./2_learning_img/{i}')

python train.py --img 640 batch 16 --data one_data.yaml --cfg one_yolov5m.yaml --weight yolov5m.py --name 

## 세트 분할

In [None]:

image_list = os.listdir('./2_learning_img')

test_set = {}

for i in random.sample(image_list, len(image_list)//10 *2):
    test_set[i] = True

for f_name in image_list:
    path = ''
    
    if test_set.get(f_name):
        path = './3_one_time_data/test/images/'
    else:
        path = './3_one_time_data/train/images/'
    
    shutil.move(f'./2_learning_img/{f_name}', path+f_name)