In [1]:
import cv2
import numpy as np
import os
import math
from matplotlib import pyplot as plt



# def random_rotation(img, angle_range):
#     """
#     Функция для случайного поворота изображения на заданный угол.
#     """
#     rows, cols, _ = img.shape
#     angle = np.random.uniform(-angle_range, angle_range)
#     M = cv2.getRotationMatrix2D((cols / 2, rows / 2), angle, 1)
#     return cv2.warpAffine(img, M, (cols, rows))

# def random_brightness(img, brightness_range):
#     """
#     Функция для случайного изменения яркости изображения.
#     """
#     hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
#     brightness = np.random.randint(-brightness_range, brightness_range)
#     h, s, v = cv2.split(hsv)
#     v = np.where(v <= 255 - brightness, v + brightness, 255)
#     final_hsv = cv2.merge((h, s, v))
#     return cv2.cvtColor(final_hsv, cv2.COLOR_HSV2BGR)

# def pad_img(image):
#     return cv2.copyMakeBorder(image, image.shape[0] // 2, image.shape[0] // 2, image.shape[1] // 2, image.shape[1] // 2, cv2.BORDER_CONSTANT)

# def random_affine_transform(img, shear_range, translation_range):
#     """
#     Функция для случайного аффинного преобразования.
#     """
#     rows, cols, _ = img.shape

#     # Случайное смещение
#     tr_x = np.random.uniform(-translation_range, translation_range) * cols
#     tr_y = np.random.uniform(-translation_range, translation_range) * rows

#     # Искажение по осям x и y
#     pts1 = np.float32([[0, 0], [cols, 0], [0, rows]])
#     pt1 = 0 + shear_range * np.random.uniform(-1, 1)
#     pt2 = 0 + shear_range * np.random.uniform(-1, 1)
#     pts2 = np.float32([[pt1, 0], [cols + pt2, 0], [0, rows]])

#     # Матрица аффинного преобразования
#     M = cv2.getAffineTransform(pts1, pts2)
#     return cv2.warpAffine(img, M, (cols, rows))

# def augment_image(doc_img, background_img, shear_range, translation_range, angle_range, scale_range, brightness_range):
#     """
#     Функция для аугментации изображения.
#     """
#     # Накладываем документ на фон
#     foreground = cv2.subtract(cv2.add(doc_img, background_img), cv2.bitwise_and(doc_img, background_img))
#     mask = cv2.cvtColor(foreground, cv2.COLOR_BGR2GRAY)
#     mask = cv2.threshold(mask, 30, 255, cv2.THRESH_BINARY)[1]
#     mask = cv2.merge((mask, mask, mask))
#     doc_with_background = cv2.bitwise_or(foreground, cv2.bitwise_and(background_img, cv2.bitwise_not(mask)))


# # plt.imshow(random_affine_transform(doc_img, 200, 100))
# padded = pad_img(doc_img)
# transformed = random_affine_transform(padded, 200, 100)
# plt.imshow(transformed)




In [115]:
from scipy import ndimage

def preprocess_doc_image(image: cv2.Mat):
    gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blurred_image = cv2.GaussianBlur(gray_image, (31, 31), 0)

    paper_texture_image = cv2.absdiff(gray_image, blurred_image)
    paper_texture_mask = cv2.cvtColor(paper_texture_image, cv2.COLOR_GRAY2BGR)

    output_image = cv2.addWeighted(image, 0.8, paper_texture_mask, 0.2, 0)

    return output_image

def put_image_on_background(img: cv2.Mat, back):
    x,y = 0,0 
    if (back.shape[0] < back.shape[1]): 
        back = cv2.rotate(back, cv2.ROTATE_90_CLOCKWISE)

    scale = min(back.shape[1] / 2 / img.shape[1], back.shape[0] / 2 / img.shape[0])
    x_start = np.random.randint(0, back.shape[0] // 3)
    y_start = np.random.randint(0, back.shape[1] // 3)
    new_img = cv2.resize(img, list(map(int, (scale * img.shape[1], scale * img.shape[0]))))
    back[x_start:x_start+new_img.shape[0],y_start:y_start+new_img.shape[1]] = new_img

    return back

def project_image(img: cv2.Mat):
    points_src = np.float32([[0, 0], [img.shape[1], 0], [0, img.shape[0]], [img.shape[1], img.shape[0]]])

    points_dst = np.float32([[np.random.randint(0, img.shape[1] // 10),np.random.randint(0, img.shape[0] // 10)],
                         [np.random.randint(img.shape[1] // 10 * 9,img.shape[1] - 1),np.random.randint(0, img.shape[0] // 10)],
                         [np.random.randint(0, img.shape[1] // 10),np.random.randint(img.shape[0] // 10 * 9, img.shape[0] - 1)],
                         [np.random.randint(img.shape[1] // 10 * 9,img.shape[1] - 1),np.random.randint(img.shape[0] // 10 * 9, img.shape[0] - 1)]])
    M = cv2.getPerspectiveTransform(points_src, points_dst)

    perspective_image = cv2.warpPerspective(img, M, (img.shape[1], img.shape[0]))

    points_dst = points_dst.astype(np.int32)
    top_x = max(points_dst[0][0], points_dst[2][0])
    bottom_x = min(points_dst[1][0], points_dst[3][0])
    top_y = max(points_dst[0][1], points_dst[1][1])
    bottom_y = min(points_dst[2][1], points_dst[3][1])
    return perspective_image[top_y:bottom_y:,top_x:bottom_x]


def crop_image(image, angle):
    h, w = image.shape[:2]
    tan_a = abs(np.tan(angle * np.pi / 180))
    b = int(tan_a / (1 - tan_a ** 2) * (h - w * tan_a))
    d = int(tan_a / (1 - tan_a ** 2) * (w - h * tan_a))
    return image[d:h - d, b:w - b]

def rotate_bound(image, angle):
    result = ndimage.rotate(image, angle)
    return crop_image(result, angle)

def add_light(img):
    x, y = np.random.randint(0, img.shape[1]), np.random.randint(0, img.shape[0])
    radius = 3000

    mask = np.zeros_like(img[:,:,0]).astype(np.float32)
    for i in range(mask.shape[0]):
        for j in range(mask.shape[1]):
            dist = np.sqrt((i-y)**2 + (j-x)**2)
            if dist < radius:
                mask[i,j] = (1 - (dist/radius) ** 2)

    result = cv2.merge([mask]*3) * img

    return result.astype(np.uint8)

def process(doc_img, back_img):
    preprocessed = preprocess_doc_image(doc_img)
    random_angle_1 = np.random.randint(-10, 10)
    random_angle_2 = np.random.randint(-10, 10)
    rotated_back = rotate_bound(back_img, random_angle_1)
    combined = put_image_on_background(preprocessed, rotated_back)

    projected = project_image(combined)
    rotated = rotate_bound(projected, random_angle_2)
    result = add_light(rotated)

    return result



In [118]:
for i in range(10):
    doc_img = cv2.imread(os.path.join("data", f"{i + 1}.png"))
    print(f"doc={i + 1}")
    for j in range(10):
        print(f"\tback={j + 1}")
        back_img = cv2.imread(os.path.join("data", "background", f"{j + 1}.jpg"))
        result = process(doc_img, back_img)
        result.save(f"result/doc_{i + 1}_back{j + 1}.png")

doc=1
	back=1
