In [1]:
%matplotlib inline
import pandas as pd
import torch, copy, json, requests, os, time, cv2, glob, random, shutil
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim import lr_scheduler
from io import BytesIO
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt
from tqdm import tqdm
from torchvision import datasets, models, transforms
from sklearn.neighbors import NearestNeighbors

In [2]:
base_path = './data/images/'
f = open('./data/Locofy_data.json')
data = json.load(f)

df = pd.DataFrame.from_dict(data)
df.head()

Unnamed: 0,id,data,annotations
0,65179,{'image': 'https://locofy-ai-task-production.s...,"[{'original_width': 1382, 'original_height': 4..."
1,66738,{'image': 'https://locofy-ai-task-production.s...,"[{'original_width': 1440, 'original_height': 6..."
2,65282,{'image': 'https://locofy-ai-task-production.s...,"[{'original_width': 1440, 'original_height': 5..."
3,66760,{'image': 'https://locofy-ai-task-production.s...,"[{'original_width': 1920, 'original_height': 1..."
4,65876,{'image': 'https://locofy-ai-task-production.s...,"[{'original_width': 1194, 'original_height': 7..."


In [3]:
for cat in ['grid', 'popup', 'progress_bar', 'none', 'raw']:
    create_path = base_path + cat 
    if not os.path.exists(create_path): os.mkdir(create_path)

In [4]:
failed_batch = []
for num in tqdm(range(df.shape[0])):
    try:
        id, image_path, annot_ls = df.loc[num].values
        if not os.path.isfile(base_path + f'raw/{str(id)}.png'):
            res = requests.get(image_path['image'])
            img = Image.open(BytesIO(res.content))
            img.save(base_path + f'raw/{str(id)}.png')
    except:
        failed_batch.append(id)

100%|██████████████████████████████████████████████████████████████████████████████| 1955/1955 [00:00<00:00, 28549.77it/s]


In [5]:
df = df[~df['id'].isin(failed_batch)].copy()

In [6]:
df = df.explode('annotations')

In [7]:
def extract_info(input_dict):
    if pd.isna(input_dict): return (np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan)
    else: 
        o_w, o_h = input_dict['original_width'], input_dict['original_height']
        x, y = input_dict['value']['x']/100, input_dict['value']['y']/100
        w, h = input_dict['value']['width']/100, input_dict['value']['height']/100
        label = input_dict['value']['label']
        return (x * o_w, y * o_h, w * o_w , h * o_h , label, o_w, o_h)

In [8]:
df[['x', 'y', 'width', 'height', 'label', 'img_width', 'img_height' ]] = df['annotations'].map(extract_info).apply(pd.Series)

In [9]:
df['img_path'] = df['data'].apply(lambda x : x['image'])

In [10]:
df.reset_index(drop=True, inplace=True)

In [11]:
df['label'].value_counts(dropna=False)

label
NaN             1274
grid             737
progress bar      99
slider            29
pop-up            21
google maps        2
Name: count, dtype: int64

In [12]:
df

Unnamed: 0,id,data,annotations,x,y,width,height,label,img_width,img_height,img_path
0,65179,{'image': 'https://locofy-ai-task-production.s...,"{'original_width': 1382, 'original_height': 41...",195.000000,1704.000000,977.000000,623.000000,grid,1382.0,4189.0,https://locofy-ai-task-production.s3.ap-southe...
1,66738,{'image': 'https://locofy-ai-task-production.s...,"{'original_width': 1440, 'original_height': 67...",55.923832,1184.000000,1325.266588,698.177150,grid,1440.0,6712.0,https://locofy-ai-task-production.s3.ap-southe...
2,66738,{'image': 'https://locofy-ai-task-production.s...,"{'original_width': 1440, 'original_height': 67...",62.677191,2752.721437,1316.221013,425.454396,grid,1440.0,6712.0,https://locofy-ai-task-production.s3.ap-southe...
3,66738,{'image': 'https://locofy-ai-task-production.s...,"{'original_width': 1440, 'original_height': 67...",60.655346,5497.915278,1328.352083,593.610181,grid,1440.0,6712.0,https://locofy-ai-task-production.s3.ap-southe...
4,65282,{'image': 'https://locofy-ai-task-production.s...,"{'original_width': 1440, 'original_height': 55...",728.000000,2193.994550,620.000000,525.005450,grid,1440.0,5501.0,https://locofy-ai-task-production.s3.ap-southe...
...,...,...,...,...,...,...,...,...,...,...,...
2157,75413,{'image': 'https://locofy-ai-task-production.s...,,,,,,,,,https://locofy-ai-task-production.s3.ap-southe...
2158,75638,{'image': 'https://locofy-ai-task-production.s...,,,,,,,,,https://locofy-ai-task-production.s3.ap-southe...
2159,76495,{'image': 'https://locofy-ai-task-production.s...,,,,,,,,,https://locofy-ai-task-production.s3.ap-southe...
2160,75247,{'image': 'https://locofy-ai-task-production.s...,,,,,,,,,https://locofy-ai-task-production.s3.ap-southe...


### Create None Class
1. use adaptive thresholding binary inverse thresholding and find a suitable area of interest by k nearest neighbour to group different features together
2. None class create sufficient images to compare against slider vs progress bar and popup 

In [13]:
df_na_annotations = df[df['annotations'].isna()].copy()

In [None]:
for id in df_na_annotations['id'][-100:-20].tolist():
    img = cv2.cvtColor(cv2.imread(f'./data/images/raw/{str(id)}.png'), cv2.COLOR_BGR2GRAY)
    mean_value = img.mean()
    blurred = cv2.GaussianBlur(img, (5, 5), 0)
    adaptive_thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, blockSize=11, C=5)
    (T, threshInv) = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)
    _, binary_image = cv2.threshold(blurred, 127, 255, cv2.THRESH_BINARY_INV)
    print('binary inverse', T, _)
    
    fig, ax = plt.subplots(1,4, figsize=(15,5))
    ax[0].imshow(img,  cmap='gray', vmin=0, vmax=255)
    ax[1].imshow(threshInv,  cmap='gray', vmin=0, vmax=255)
    ax[2].imshow(adaptive_thresh,  cmap='gray', vmin=0, vmax=255)
    ax[3].imshow(binary_image,  cmap='gray', vmin=0, vmax=255)
    plt.tight_layout()
    plt.show()

In [15]:
df_na_annotations.reset_index(drop=True, inplace=True)

In [20]:
from src import *

In [32]:
for id in tqdm(df_na_annotations['id'].tolist()[200:300]):
    img = cv2.imread(f'./data/images/raw/{str(id)}.png')
    grey = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(grey, (3, 3), 0)
    adaptive_thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, blockSize=11, C=5)
    contours, _ = cv2.findContours(adaptive_thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    bounding_boxes = [cv2.boundingRect(contour) for contour in contours]
    counter = 0
    while len(bounding_boxes) > 100 and counter < 3:
        
        box_centers = np.array([(x + w // 2, y + h // 2) for (x, y, w, h) in bounding_boxes])
        
        nbrs = NearestNeighbors(n_neighbors=10, radius=200).fit(box_centers)
        distances, indices = nbrs.kneighbors(box_centers)
        
        merged_boxes = []
        distance_threshold = 500 
        for i, (x, y, w, h) in enumerate(bounding_boxes):
            if distances[i][1] < distance_threshold:
                neighbor_idx = indices[i][1]
                x2, y2, w2, h2 = bounding_boxes[neighbor_idx]
                
                new_x = min(x, x2)
                new_y = min(y, y2)
                new_w = max(x + w, x2 + w2) - new_x
                new_h = max(y + h, y2 + h2) - new_y
                merged_boxes.append((new_x, new_y, new_w, new_h))
            else:
                merged_boxes.append((x, y, w, h))
        
        bounding_boxes = non_max_suppression(merged_boxes, 0.1)
        counter += 1
    final_boxes = sorted(bounding_boxes, key=lambda box: box[2] * box[3], reverse=True)  # Sort by area in descending order
    for (count, (x, y, w, h)) in enumerate(final_boxes[:5]):
        cv2.imwrite(f'./data/images/none/largest_cut_v2_{str(id)}_{str(count)}.png', img[y:y+h, x:x+w])
    # print()
    # print(f'{id=}')
    # for (x, y, w, h) in final_boxes[:3]:
    #     print(x, y, w, h)
    #     fig, ax = plt.subplots(1,3, figsize=(15,5))
    #     ax[0].imshow(img,  cmap='gray', vmin=0, vmax=255)
    #     ax[1].imshow(adaptive_thresh,  cmap='gray', vmin=0, vmax=255)
    #     ax[2].imshow(img[y:y+h, x:x+w], cmap='gray', vmin=0, vmax=255)
    #     plt.tight_layout()
    #     plt.show()

100%|███████████████████████████████████████████████████████████████████████████████████| 100/100 [00:24<00:00,  4.11it/s]


### popup class 
1. rotation will be randomly done during transform.compose
2. consider increase number of images by 

In [34]:
popup_ls = glob.glob('./data/images/popup/*.png')

In [103]:
import cv2
import numpy as np
import random

def adjust_hue(image, delta=0.1):
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hsv_image = np.float32(hsv_image)
    hsv_image[:, :, 0] = (hsv_image[:, :, 0] + delta * 180) % 180
    hsv_image = np.clip(hsv_image, 0, 255).astype(np.uint8)
    return cv2.cvtColor(hsv_image, cv2.COLOR_HSV2BGR)

def random_rgb_swap(image):
    r,g,b = cv2.split(image)
    channels =[r,g,b]
    random.shuffle(channels)  # Shuffle the channels randomly
    swapped_image = cv2.merge(channels)
    return swapped_image
    
def adjust_contrast(image, factor=1.5):
    adjusted = np.clip(image * factor, 0, 255).astype(np.uint8)
    return adjusted

def random_cutout(image, max_fraction=0.3):
    img_height, img_width = image.shape[:2]

    max_area = int(img_height * img_width * max_fraction)
    cutout_area = random.randint(1, max_area)
    for _ in range(30):
        cutout_height = random.randint(1, img_height)
        cutout_width = cutout_area // cutout_height
        if cutout_width <= img_width:
            break
        else:
            cutout_height, cutout_width = 1, 1

    cutout_x = random.randint(0, img_width - cutout_width)
    cutout_y = random.randint(0, img_height - cutout_height)

    image[cutout_y:cutout_y + cutout_height, cutout_x:cutout_x + cutout_width] = 0

    return image

In [109]:
popup_ls[0]

'./data/images/popup/popup_4_49720.png'

In [110]:
id = popup_ls[0].split('_')[-1].split('.')[0]
id

'49720'

In [113]:
for path in popup_ls:
    id = path.split('_')[-1].split('.')[0]
    img = cv2.imread(path)
    for i in range(3):
        img = random_rgb_swap(img)
        img = adjust_contrast(img, factor=10)
        img = adjust_hue(img)
        img = random_cutout(img)
        cv2.imwrite(f'./data/images/popup/data_augment_{str(i)}_{id}.png', img)

### progressbar

In [114]:
progress_bar_ls = glob.glob('./data/images/progress_bar/*.png')

In [117]:
for path in progress_bar_ls:
    id = path.split('_')[-1].split('.')[0]
    img = cv2.imread(path)
    for i in range(3):
        img = random_rgb_swap(img)
        img = adjust_contrast(img, factor=2)
        img = adjust_hue(img)
        cv2.imwrite(f'./data/images/progress_bar/data_augment_{id}_{str(i)}.png', img)

### Slider bar within None Class

In [118]:
slider_ls = glob.glob('./data/images/none/none*.png')

In [119]:
for path in slider_ls:
    id = path.split('_')[-1].split('.')[0]
    img = cv2.imread(path)
    for i in range(3):
        img = random_rgb_swap(img)
        img = adjust_contrast(img, factor=2)
        img = adjust_hue(img)
        cv2.imwrite(f'./data/images/none/data_augment_{id}_{str(i)}.png', img)