In [None]:
!nvidia-smi

In [None]:
!pip --version

### **Connect to drive** ###



In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
%cd /content/gdrive/MyDrive/yolov5/utils

In [None]:
!pip3 install torch torchvision

### **Extract Dataset**

In [None]:
# !unzip -uq /content/gdrive/MyDrive/abnormal_detection/vinbigdata-chest-xray-abnormalities-detection "train.csv" -d "/content/gdrive/MyDrive/abnormal_detection"

### **Import Library**

In [None]:
!pip install pydicom
!pip install iterative-stratification

In [None]:
import math
import os
import shutil
import sys

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import glob
import pydicom
import cv2
import PIL

import plotly
import plotly.express as px
import plotly.graph_objects as go
import plotly.offline as pyo

import argparse
import time
import random
import torch
import csv

from iterstrat.ml_stratifiers import MultilabelStratifiedKFold

from os import path

from tqdm import tqdm
from pydicom.pixel_data_handlers.util import apply_voi_lut

from sklearn.model_selection import train_test_split

In [None]:
random_stat = 123
np.random.seed(random_stat)

In [None]:
DATA_DIR = '/content/gdrive/MyDrive/abnormal_detection'

In [None]:
train_dcm_dir = os.path.join(DATA_DIR, "train")
test_dcm_dir = os.path.join(DATA_DIR, "test")
testing_dcm_dir = os.path.join(DATA_DIR, "testing_images")
training_dcm_dir = os.path.join(DATA_DIR, "train_image")

img_dir = os.path.join(DATA_DIR, 'images')
label_dir = os.path.join(DATA_DIR, 'labels')
metadata_dir = os.path.join(DATA_DIR, 'metadata')

for directory in [img_dir, label_dir, metadata_dir]:
  if os.path.isdir(directory):
    continue
  os.mkdir(directory)

#### **Load CSV**

In [None]:
annots = pd.read_csv(os.path.join(DATA_DIR, "train.csv"))
annots.head()

In [None]:
annots.class_name.value_counts()

In [None]:
size_data = pd.read_csv("/content/gdrive/MyDrive/abnormal_detection/train_meta.csv")
size_data.columns = ['image_id', 'h', 'w']

annots = annots.merge(size_data, on='image_id', how='left')
annots[['x_min', 'y_min']] = annots[['x_min', 'y_min']].fillna(0)
annots[['x_max', 'y_max']] = annots[['x_max', 'y_max']].fillna(1)

annots.tail()

In [None]:
data = annots[annots.class_id != 14].reset_index(drop = True)
data.tail(100)

In [None]:
data.class_name.value_counts()

In [None]:
data.class_name.value_counts().plot()

### **Extract all patient data from DICOM**

In [None]:
import pydicom as dicom
import os
import PIL # optional
import pandas as pd
import csv
# list of attributes available in dicom image
# download this file from the given github link
dicom_image_description = pd.read_csv("dicom_image_description.csv")
# Specify the .dcm folder path
folder_path = "/content/gdrive/MyDrive/Pneumonia_dataset/testdicom"
images_path = os.listdir(folder_path)
# Patient's information will be stored in working directory #'Patient_Detail.csv'
with open('Patient_Detail.csv', 'w', newline ='') as csvfile:
    fieldnames = list(dicom_image_description["Description"])
    writer = csv.writer(csvfile, delimiter=',')
    writer.writerow(fieldnames)
    for n, image in enumerate(images_path):
        ds = dicom.dcmread(os.path.join(folder_path, image))
        rows = []
        for field in fieldnames:
            if ds.data_element(field) is None:
                rows.append('')
            else:
                x = str(ds.data_element(field)).replace("'", "")
                y = x.find(":")
                x = x[y+2:]
                rows.append(x)
        writer.writerow(rows)

In [None]:
from pydicom.data import get_testdata_files
from pydicom import dcmread

In [None]:
fpath = get_testdata_files("0a0b773c653cea6653a1e02faf1566a5.dicom")
ds = dcmread(fpath)

In [None]:
print(ds)

### **Visualize Dataset**

In [None]:
def Visualize_class(df, feature, title):
  num_image = df[feature].value_counts().rename_axis(feature).reset_index(name="num_image")
  fig = px.bar(num_image[::-1], x='num_image', y=feature, orientation='h', color='num_image')
  fig.update_layout(
      title={
          'text' : title,
          'y' : 0.95,
          'x' : 0.5,
          'xanchor' : 'center',
          'yanchor' : 'top'})
  fig.show()

In [None]:
Visualize_class(annots, feature="class_name", title="Types of abnormal labels")

In [None]:
Visualize_class(annots, feature="rad_id", title="Types of radiologist")

##**1. Data Migration**

### **Generates Images**

In [None]:
print(new_image_id)

In [None]:
def save_img_from_dcm(dcm_dir, img_dir, image_id, voi_lut=True, fix_monochrome=True):
  img_fp = os.path.join(img_dir, "{}.png".format(image_id))
  if os.path.exists(img_fp):
    return
  dcm_fp = os.path.join(dcm_dir, "{}.dicom".format(image_id))
  print(dcm_fp)
  dicom = pydicom.read_file(dcm_fp)

  if voi_lut:
    data = apply_voi_lut(dicom.pixel_array, dicom)
  else:
    data = dicom.pixel_array

  if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
    data = np.amax(data) - data
  
  data = data - np.min(data)
  data = data / np.max(data)
  data = (data * 255).astype(np.uint8)

  img_fp = os.path.join(img_dir, "{}.png".format(image_id))
  cv2.imwrite(img_fp, data)

  
def save_img_from_abnormal(dcm_dir, img_dir, annots):
  for row in tqdm(annots.image_id.unique()):
    image_id = row

    img_fp = os.path.join(img_dir, "{}.png".format(image_id))
    if os.path.exists(img_fp):
      continue
    
    save_img_from_dcm(dcm_dir, img_dir, image_id)


In [None]:
save_img_from_abnormal(train_dcm_dir, img_dir, data)

### **Generates Images Testing**

In [None]:
new_image_id = []
for filename in tqdm(os.listdir("/content/gdrive/MyDrive/abnormal_detection/test")):
  file = filename.split('.')
  if file[-1] == "dicom":
     new_image_id.append(file[0])

In [None]:
def save_img_from_testing(dcm_dir, img_dir, annots):
  for row in tqdm(annots):
    image_id = row

    img_fp = os.path.join(img_dir, "{}.jpg".format(image_id))
    if os.path.exists(img_fp):
      continue
    
    save_img_from_dcm(dcm_dir, img_dir, image_id)

In [None]:
save_img_from_testing(test_dcm_dir, img_dir, new_image_id)

###**Generating Labels**

In [None]:
from PIL import Image
from sklearn import preprocessing
from bokeh.plotting import figure as bokeh_figure
from bokeh.io import output_notebook, show, output_file
from bokeh.models import ColumnDataSource, HoverTool, Panel
from bokeh.models.widgets import Tabs
import random
from random import randint

In [None]:
def get_bbox_area(row):
  return (row['x_max']-row['x_min'])*(row['y_max']-row['y_min'])

le = preprocessing.LabelEncoder()

annots['rad_label'] = le.fit_transform(annots['rad_id'])

finding_df = annots[(annots['class_name'] != 'No finding')]
finding_df['bbox_area'] = finding_df.apply(get_bbox_area, axis=1)
finding_df.head()

In [None]:
def dicom2array(path, voi_lut=True, fix_monochrome=True):
    dicom = pydicom.read_file(path)
    # VOI LUT (if available by DICOM device) is used to
    # transform raw DICOM data to "human-friendly" view
    if voi_lut:
        data = apply_voi_lut(dicom.pixel_array, dicom)
    else:
        data = dicom.pixel_array
    # depending on this value, X-ray may look inverted - fix that:
    if fix_monochrome and dicom.PhotometricInterpretation == "MONOCHROME1":
        data = np.amax(data) - data
    data = data - np.min(data)
    data = data / np.max(data)
    data = (data * 255).astype(np.uint8)
    return data
  
def plot_imgs(imgs, cols=4, size=7, is_rgb=True, title="", cmap='gray', img_size=(500,500)):
  rows = len(imgs)//cols + 1
  fig = plt.figure(figsize=(cols*size, rows*size))
  for i, img in enumerate(imgs):
      if img_size is not None:
          img = cv2.resize(img, img_size)
      fig.add_subplot(rows, cols, i+1)
      plt.imshow(img, cmap=cmap)
  plt.suptitle(title)
  plt.show()

In [None]:
imgs = []
img_ids = finding_df['image_id'].values
class_ids = finding_df['class_id'].unique()

# map label_id to specify color
label2color = {class_id:[randint(0,255) for i in range(3)] for class_id in class_ids}
thickness = 3
scale = 5


for i in range(8):
    img_id = random.choice(img_ids)
    img_path = os.path.join(train_dcm_dir, "{}.dicom".format(img_id))
    img = dicom2array(path=img_path)
    img = cv2.resize(img, None, fx=1/scale, fy=1/scale)
    img = np.stack([img, img, img], axis=-1)
    
    boxes = finding_df.loc[finding_df['image_id'] == img_id, ['x_min', 'y_min', 'x_max', 'y_max']].values/scale
    labels = finding_df.loc[finding_df['image_id'] == img_id, ['class_id']].values.squeeze()
    
    for label_id, box in zip(labels, boxes):
      color = label2color[label_id]
      img = cv2.rectangle(
          img,
          (int(box[0]), int(box[1])),
          (int(box[2]), int(box[3])),
          color, thickness)

    img = cv2.resize(img, (500,500))
    imgs.append(img)
    
plot_imgs(imgs, cmap=None)

In [None]:
testing_train = pd.read_csv('/content/testing_train.csv')
testing_train.head()

In [None]:
files = []
for filename in tqdm(os.listdir("/content/gdrive/MyDrive/abnormal_detection/testing_images")):
  file = filename.split(".")
  files.append(file[0])

print(files)

In [None]:
def save_label_from_abnormal(testing_dir, annots):
  for row in tqdm(annots.values):
    image_id = row[0]

    label_dir = "/content/gdrive/MyDrive/abnormal_detection/images"

    image = PIL.Image.open(os.path.join(label_dir, "{}.jpg".format(image_id)))
    width, height = image.size
    label_fp = os.path.join(label_dir, "{}.txt".format(image_id))

    f = open(label_fp, "a")
    if row[2] is 14:
      x_min = 0
      y_min = 0
      x_max = 1
      y_max = 1
    else:
      x_min = row[4]
      y_min = row[5]
      x_max = row[6]
      y_max = row[7]

    dw = 1./width
    dh = 1./height
    x = (x_min + x_max)/2.0
    y = (y_min + y_max)/2.0
    w = x_max - x_min
    h = y_max - y_min

    x = x*dw
    w = w*dw
    y = y*dh
    h = h*dh

    line = "{} {} {} {} {}\n".format(row[2], x, y, w, h)      

    f.write(line)
    f.close

In [None]:
save_label_from_abnormal(img_dir, annots)

## **2. Data Selection**

### **Weighted Box Fusion**

In [None]:
!pip install ensemble_boxes

In [None]:
from ensemble_boxes import *

In [None]:
IMG_SIZE = (512, 512)
list_remove = [34843, 21125, 647, 18011, 2539, 22373, 12675, 7359, 20642, 5502, 19818, 5832, 28056, 28333, 20758,
               925, 43, 2199, 4610, 21306, 16677, 1768, 17232, 1378, 24949, 30203, 31410, 87, 25318, 92, 31724,
               118, 17687, 12605, 26157, 33875, 7000, 3730, 18776, 13225, 1109, 2161, 33627, 15500, 28633, 28152,
               10114, 10912, 9014,  4427, 25630, 11464, 6419, 22164, 4386, 17557, 15264, 21853, 33142, 32895, 9733,
               33010, 17493, 32128, 28802, 11658, 8841, 29557, 4802, 8591, 778, 9935, 12359, 5210, 7556, 24505, 5664,
               28670, 27820, 19359, 9817, 7800, 32934, 34098, 27931, 16074, 27308, 30645, 31029, 35697, 6199, 27065,
               1771, 14689, 31860, 1975, 29294, 2304, 34018, 23406, 26501, 26011, 2479, 32796, 25836, 3032, 31454,
               32066, 19722, 15997, 6049, 9458, 11005, 23151, 24503, 35411, 18092, 23815, 30742, 33942, 34542, 7655,
               25345, 3750, 17046, 3844, 5958, 4250, 18823, 14898, 22581, 25805, 9651, 33194, 36007, 30160, 24459,
               10838, 16544, 31252, 8053, 28487, 6208, 25244, 8470, 10089, 24813, 14769, 34305, 34047, 23366, 8049,
               13276, 22380, 32797, 32440, 11031, 18304, 33692, 21349, 26333, 34331, 9110, 21092, 34882, 35626, 10203,
               25648, 30754, 29567, 33542, 15146, 26759, 20846, 22493, 33187, 22813, 30219, 14548, 14627, 20494, 28332,
               15930, 31347, 33489, 35005, 34032, 24183, 18643, 18536, 29754, 20380, 29750, 20539, 35791, 27275, 32248]

In [None]:
def label_resize(org_size, img_size, *bbox):
    x0, y0, x1, y1 = bbox
    x0_new = int(np.round(x0*img_size[1]/org_size[1]))
    y0_new = int(np.round(y0*img_size[0]/org_size[0]))
    x1_new = int(np.round(x1*img_size[1]/org_size[1]))
    y1_new = int(np.round(y1*img_size[0]/org_size[0]))
    return x0_new, y0_new, x1_new, y1_new

In [None]:
train_abnormal = annots[annots['class_name'] != 'No finding'].reset_index(drop=True)
train_abnormal[['x_min_resize', 'y_min_resize', 'x_max_resize', 'y_max_resize']] = train_abnormal.apply(lambda x: label_resize(x[['h','w']].values, IMG_SIZE, *x[['x_min', 'y_min', 'x_max', 'y_max']].values), axis=1, result_type="expand")
train_abnormal[['x_min', 'y_min', 'x_max', 'y_max']] = train_abnormal[['x_min', 'y_min', 'x_max', 'y_max']]
train_abnormal['x_center'] = 0.5*(train_abnormal['x_min_resize'] + train_abnormal['x_max_resize'])
train_abnormal['y_center'] = 0.5*(train_abnormal['y_min_resize'] + train_abnormal['y_max_resize'])
train_abnormal['width'] = train_abnormal['x_max_resize'] - train_abnormal['x_min_resize']
train_abnormal['height'] = train_abnormal['y_max_resize'] - train_abnormal['y_min_resize']
train_abnormal['area'] = train_abnormal.apply(lambda x: (x['x_max_resize'] - x['x_min_resize'])*(x['y_max_resize']-x['y_min_resize']), axis=1)
train_abnormal = train_abnormal[~train_abnormal.index.isin(list_remove)].reset_index(drop=True)

train_abnormal.tail()

In [None]:
SIZE = 512
def Preprocess_wbf(df, size=SIZE, iou_thr=0.5, skip_box_thr=0.0001):
    list_image = []
    list_boxes = []
    list_cls = []
    list_h, list_w = [], []
    new_df = pd.DataFrame()
    for image_id in tqdm(df['image_id'].unique(), leave=False):
        image_df = df[df['image_id']==image_id].reset_index(drop=True)
        h, w = image_df.loc[0, ['h', 'w']].values
        boxes = image_df[['x_min_resize', 'y_min_resize',
                          'x_max_resize', 'y_max_resize']].values.tolist()
        boxes = [[j/(size-1) for j in i] for i in boxes]
        scores = [1.0]*len(boxes)
        labels = [float(i) for i in image_df['class_id'].values]
        boxes, scores, labels = weighted_boxes_fusion([boxes], [scores], [labels],
                                                      weights=None,
                                                      iou_thr=iou_thr,
                                                      skip_box_thr=skip_box_thr)
        list_image.extend([image_id]*len(boxes))
        list_h.extend([h]*len(boxes))
        list_w.extend([w]*len(boxes))
        list_boxes.extend(boxes)
        list_cls.extend(labels.tolist())
    list_boxes = [[int(j*(size-1)) for j in i] for i in list_boxes]
    new_df['image_id'] = list_image
    new_df['class_id'] = list_cls
    new_df['h'] = list_h
    new_df['w'] = list_w
    new_df['x_min_resize'], new_df['y_min_resize'], new_df['x_max_resize'], new_df['y_max_resize'] = np.transpose(list_boxes)
    new_df['x_center'] = 0.5*(new_df['x_min_resize'] + new_df['x_max_resize'])
    new_df['y_center'] = 0.5*(new_df['y_min_resize'] + new_df['y_max_resize'])
    new_df['width'] = new_df['x_max_resize'] - new_df['x_min_resize']
    new_df['height'] = new_df['y_max_resize'] - new_df['y_min_resize']
    new_df['area'] = new_df.apply(lambda x: (x['x_max_resize']-x['x_min_resize'])*(x['y_max_resize']-x['y_min_resize']), axis=1)
    return new_df

new_train_abnormal = Preprocess_wbf(train_abnormal)
new_train_abnormal.tail()

In [None]:
new_train_abnormal.to_csv('abnormal.csv', index=False)

### **Fold**

In [None]:
def split_df(df):
  kf = MultilabelStratifiedKFold(n_splits=5, shuffle=True, random_state=89)
  df['id'] = df.index
  annot_pivot = pd.pivot_table(df, index=['image_id'], columns=['class_id'], values='id', fill_value=0, aggfunc='count').reset_index().rename_axis(None, axis=1)
  for fold, (train_idx, val_idx) in enumerate(kf.split(annot_pivot, annot_pivot.iloc[:, 1:(1+df['class_id'].nunique())])):
    annot_pivot[f'fold_{fold}'] = 0
    annot_pivot.loc[val_idx, f'fold_{fold}'] = 1
  return annot_pivot

size_df = pd.read_csv('/content/gdrive/MyDrive/abnormal_detection/train_meta.csv')
size_df.columns = ['image_id', 'h', 'w']

fold_csv = split_df(new_train_abnormal)
fold_csv = fold_csv.merge(size_df, on='image_id', how='left')
fold_csv.head(100)

### **Drop "No finding" and move data to new folder for train images**

In [None]:
new_dataset = "/content/gdrive/MyDrive/abnormal_detection/dataset_baru"

In [None]:
def write_images_list(target_dir, img_dir, series):
  for image_id in series:
    images_dir = os.path.join(img_dir, '{}.jpg'.format(image_id))
    
    shutil.copyfile(images_dir, os.path.join(target_dir, '{}.jpg'.format(image_id)))

In [None]:
id_series = new_train_abnormal["image_id"].drop_duplicates()
write_images_list(new_dataset, img_dir, id_series)

### **Generate Label**

In [None]:
def save_label_from_abnormal(target_dir, annots):
  SIZE = 512
  for _, row in tqdm(annots.iterrows()):
    image_id = row['image_id']

    label_fp = os.path.join(target_dir, "{}.txt".format(image_id))

    f = open(label_fp, "a")

    dw = 1./SIZE
    dh = 1./SIZE
    x_center = row['x_center']
    y_center = row['y_center']
    width = row['width']
    height = row['height']

    x_center = x_center*dw
    width = width*dw
    y_center = y_center*dh
    height = height*dh

    line = "{} {} {} {} {}\n".format(int(row['class_id']), x_center, y_center, width, height)      

    f.write(line)
    f.close

In [None]:
dataset_path = "/content/gdrive/MyDrive/abnormal_detection/images"
save_label_from_abnormal(dataset_path, new_train_abnormal)

## **3. Data Preprocessing**

### **Resize 640**

In [None]:
def resize_640(directory):
  for filename in tqdm(os.listdir(directory)):
    file = filename.split('.')
    if file[-1] == 'png':
      image_name = os.path.join(directory, "{}.png".format(file[0]))
      img = cv2.imread(image_name, 0)

      scale_percent = 40

      width = int(img.shape[1] * scale_percent/100)
      height = int(img.shape[0] * scale_percent/100)

      dsize = (width, height)

      output = cv2.resize(img, dsize)

      cv2.imwrite(image_name, output)

In [None]:
resize_640('/content/gdrive/MyDrive/abnormal_detection/training_7/images/val')

## **4. Data Augmentation**

In [None]:
def brightness_augment(img, factor=0.5): 
    hsv = cv2.cvtColor(img, cv2.COLOR_RGB2HSV) #convert to hsv
    hsv = np.array(hsv, dtype=np.float64)
    hsv[:, :, 2] = hsv[:, :, 2] * (factor + np.random.uniform()) #scale channel V uniformly
    hsv[:, :, 2][hsv[:, :, 2] > 255] = 255 #reset out of range values
    rgb = cv2.cvtColor(np.array(hsv, dtype=np.uint8), cv2.COLOR_HSV2RGB)
    return rgb


img_id = "ffeffc54594debf3716d6fcd2402a99f"
folder_path = "/content/gdrive/MyDrive/abnormal_detection/testing_images"

img_path = os.path.join(folder_path, "{}.jpg".format(img_id))
bbox_path = os.path.join(folder_path, "{}.txt".format(img_id))

f = open(bbox_path, 'r')
f1 = f.readlines()
print(f1)

img = cv2.imread(img_path)

(height, width) = img.shape[:2]

quarter_height, quarter_width = height/4, width/4

M = np.float32([[1, 0, quarter_height], [0, 1, quarter_width]])

# Translation
# img = cv2.warpAffine(img, M, (width, height))

# Scaling
# img = cv2.resize(img, (int(width / 2), int(height / 2)), interpolation = cv2.INTER_CUBIC)
# img = cv2.resize(img, None, fx=2, fy=2, interpolation = cv2.INTER_AREA)

# Random Brightness
# equ = cv2.equalizeHist(img)
ycrcb = cv2.cvtColor(img, cv2.COLOR_BGR2YCR_CB)
channels = cv2.split(ycrcb)
# cv2.equalizeHist(channels[0], channels[0])
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
clahe.apply(channels[0], channels[0])
cv2.merge(channels, ycrcb)
cv2.cvtColor(ycrcb, cv2.COLOR_YCR_CB2BGR, img)
# clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
# equ = clahe.apply(img)

plt.imshow(img)

In [None]:
img_path = os.path.join(img_dir, "{}.png".format("0b4c3cb187f7582fa482b8f0fded6bd7"))
txt_path = os.path.join(img_dir, "{}.txt".format("0b4c3cb187f7582fa482b8f0fded6bd7"))

img = cv2.imread(img_path,1)
H, W = img.shape[:2]

f = open(txt_path, 'r')
f1 = f.readlines()
new_bbox = []
for x in f1:
  bbox = x.strip('\n').split(' ')
  if len(bbox) > 1:
    (center_x, center_y, bbox_width, bbox_height) = yoloFormattocv(float(bbox[1]), float(bbox[2]), float(bbox[3]), float(bbox[4]), H, W)
    new_bbox.append([bbox[0], center_x, center_y, bbox_width, bbox_height])

class_ids = data['class_id'].unique()

# map label_id to specify color
label2color = {class_id:[randint(0,255) for i in range(3)] for class_id in class_ids}
thickness = 15
scale = 5

for box in new_bbox:
  color = label2color[int(box[0])]
  img = cv2.rectangle(
      img,
      (int(box[1]), int(box[2])),
      (int(box[3]), int(box[4])),
      color, thickness)

img = cv2.resize(img, (W,H))
    
plt.imshow(img)
plt.show()

### **Function yoloFormattocv and cvFormattoYolo**

In [None]:
#convert from Yolo_mark to opencv format
def yoloFormattocv(x1, y1, x2, y2, H, W):
    bbox_width = x2 * W
    bbox_height = y2 * H
    center_x = x1 * W
    center_y = y1 * H
    voc = []
    voc.append(center_x - (bbox_width / 2))
    voc.append(center_y - (bbox_height / 2))
    voc.append(center_x + (bbox_width / 2))
    voc.append(center_y + (bbox_height / 2))
    return [int(v) for v in voc]

In [None]:
# Convert from opencv format to yolo format
# H,W is the image height and width
def cvFormattoYolo(corner, H, W):
    bbox_W = corner[3] - corner[1]
    bbox_H = corner[4] - corner[2]
    center_bbox_x = (corner[1] + corner[3]) / 2
    center_bbox_y = (corner[2] + corner[4]) / 2
    return corner[0], round(center_bbox_x / W, 6), round(center_bbox_y / H, 6), round(bbox_W / W, 6),round(bbox_H / H, 6)

### **Rotate 20 and 340 degree**

In [None]:
class yoloRotatebbox:
    def __init__(self, filename, label_name, image_ext, angle):
        assert os.path.isfile(filename + image_ext)
        assert os.path.isfile(label_name)
        
        self.filename = filename
        self.label_name = label_name
        self.image_ext = image_ext
        self.angle = angle
        
        # Read image using cv2
        self.image = cv2.imread(self.filename + self.image_ext, 1)
        
        rotation_angle = self.angle * np.pi / 180
        self.rot_matrix = np.array([[np.cos(rotation_angle), -np.sin(rotation_angle)], [np.sin(rotation_angle), np.cos(rotation_angle)]])
        
    def rotateYolobbox(self):
        new_height, new_width = self.rotate_image().shape[:2]
        f = open(self.label_name, 'r')
        f1 = f.readlines()
        new_bbox = []
        H, W = self.image.shape[:2]
        for x in f1:
            bbox = x.strip('\n').split(' ')
            if len(bbox) > 1:
                (center_x, center_y, bbox_width, bbox_height) = yoloFormattocv(float(bbox[1]), float(bbox[2]), float(bbox[3]), float(bbox[4]), H, W)
                upper_left_corner_shift = (center_x - W / 2, -H / 2 + center_y)
                upper_right_corner_shift = (bbox_width - W / 2, -H / 2 + center_y)
                lower_left_corner_shift = (center_x - W / 2, -H / 2 + bbox_height)
                lower_right_corner_shift = (bbox_width - W / 2, -H / 2 + bbox_height)
                new_lower_right_corner = [-1, -1]
                new_upper_left_corner = []
                for i in (upper_left_corner_shift, upper_right_corner_shift, lower_left_corner_shift,
                          lower_right_corner_shift):
                    new_coords = np.matmul(self.rot_matrix, np.array((i[0], -i[1])))
                    x_prime, y_prime = new_width / 2 + new_coords[0], new_height / 2 - new_coords[1]
                    if new_lower_right_corner[0] < x_prime:
                        new_lower_right_corner[0] = x_prime
                    if new_lower_right_corner[1] < y_prime:
                        new_lower_right_corner[1] = y_prime
                    if len(new_upper_left_corner) > 0:
                        if new_upper_left_corner[0] > x_prime:
                            new_upper_left_corner[0] = x_prime
                        if new_upper_left_corner[1] > y_prime:
                            new_upper_left_corner[1] = y_prime
                    else:
                        new_upper_left_corner.append(x_prime)
                        new_upper_left_corner.append(y_prime)
                #             print(x_prime, y_prime)

                new_bbox.append([bbox[0], new_upper_left_corner[0], new_upper_left_corner[1],
                                 new_lower_right_corner[0], new_lower_right_corner[1]])
        return new_bbox
        
    def rotate_image(self):
        """
        Rotates an image (angle in degrees) and expands image to avoid cropping
        """
        height, width = self.image.shape[:2]  # image shape has 3 dimensions
        image_center = (width / 2, height / 2)  # getRotationMatrix2D needs coordinates in reverse order (width, height) compared to shape
        rotation_mat = cv2.getRotationMatrix2D(image_center, self.angle, 1.)
        # rotation calculates the cos and sin, taking absolutes of those.
        abs_cos = abs(rotation_mat[0, 0])
        abs_sin = abs(rotation_mat[0, 1])
        # find the new width and height bounds
        bound_w = int(height * abs_sin + width * abs_cos)
        bound_h = int(height * abs_cos + width * abs_sin)
        # subtract old image center (bringing image back to origin) and adding the new image center coordinates
        rotation_mat[0, 2] += bound_w / 2 - image_center[0]
        rotation_mat[1, 2] += bound_h / 2 - image_center[1]
        # rotate image with the new bounds and translated rotation matrix
        rotated_mat = cv2.warpAffine(self.image, rotation_mat, (bound_w, bound_h))
        return rotated_mat

In [None]:
if __name__ == "__main__":
    angels=180
    image_id_translation = tr_series
    img_path = "/content/gdrive/MyDrive/abnormal_detection/training_7/images/train"
    label_path = "/content/gdrive/MyDrive/abnormal_detection/training_7/labels/train"
    index = 0
    # 
    for image_id in tqdm(tr_series):
      # split_image = image_ids.split('.')
      # image_id = split_image[0]
      # image_ext = "."+split_image[1]
      # if index > 1:
      #   index = 0

      image_ext = ".png"

      image_name = os.path.join(img_path, image_id)
      label_name = os.path.join(label_path, image_id)
      print(image_id)
      if not os.path.exists(image_name+image_ext):
        continue
      im = yoloRotatebbox(image_name, label_name+'.txt', image_ext, angels)
      bbox = im.rotateYolobbox()
      image = im.rotate_image()
      # to write rotateed image to disk
      cv2.imwrite(image_name+'_' + str(angels) + '.png', image)
      file_name = label_name+'_' + str(angels) + '.txt'

      #print("For angle "+str(angle))
      if os.path.exists(file_name):
          os.remove(file_name)
      # to write the new rotated bboxes to file
      for i in bbox:
          with open(file_name, 'a') as fout:
            fout.writelines(' '.join(map(str, cvFormattoYolo(i, im.rotate_image().shape[0], im.rotate_image().shape[1]))) + '\n')

In [None]:
image_id_translation[0]

### **Translation**

In [None]:
class yoloShiftbbox:
  def __init__(self, filename, label_name, image_ext, shift):
    assert os.path.isfile(filename + image_ext)
    assert os.path.isfile(label_name)

    self.filename = filename
    self.label_name = label_name
    self.image_ext = image_ext

    # Read image using cv2
    self.image = cv2.imread(self.filename + self.image_ext, 1)

    self.shift = shift
    self.shape_of_out_img = self.image.shape


  def shiftYolobbox(self):
    x_distance = self.shift[0]
    y_distance = self.shift[1]

    f = open(self.label_name , 'r')
    f1 = f.readlines()
    new_bbox = []
    H, W = self.image.shape[:2]
    for x in f1:
      bbox = x.strip('\n').split(' ')
      if len(bbox) > 1:
        (center_x, center_y, bbox_width, bbox_height) = yoloFormattocv(float(bbox[1]), float(bbox[2]), float(bbox[3]), float(bbox[4]), H, W)
        # bbox scale formula
        x1 = center_x + x_distance
        y1 = center_y + y_distance
        x2 = bbox_width + x_distance
        y2 = bbox_height + y_distance
      
      # (objek, bbox_x_1, bbox_y_1, bbox_x_2, bbox_x_2) = cvFormattoYolo([bbox[0], x1, y1, x2, y2], H, W)
      # if (bbox_x_1 < 0 or bbox_y_1 < 0 or bbox_x_2 < 0 or bbox_x_2 < 0):
      #   continue

      new_bbox.append([bbox[0], x1, y1, x2, y2])

    return new_bbox

  def translation_image(self):
    h,w = self.image.shape[:2]
    x_distance = self.shift[0]
    y_distance = self.shift[1]
    ts_mat = np.float32([[1,0,x_distance], [0,1,y_distance]])

    out_img = np.zeros(self.shape_of_out_img, dtype='u1')
    out_img = cv2.warpAffine(self.image, ts_mat, (w, h))

    # for i in range(h):
    #   for j in range(w):
    #     origin_x = j
    #     origin_y = i
    #     origin_xy = np.array([origin_x, origin_y, 1])

    #     new_xy = np.dot(ts_mat, origin_xy)
    #     new_x = new_xy[0]
    #     new_y = new_xy[1]

    #     if 0 < new_x < w and 0 < new_y < h:
    #       out_img[new_y, new_x] = self.image[i, j]
    return out_img


In [None]:
if __name__ == "__main__":
  image_id_translation = tr_series
  img_path = "/content/gdrive/MyDrive/abnormal_detection/training_7/images/train"
  label_path = "/content/gdrive/MyDrive/abnormal_detection/training_7/labels/train"
  shift = [[-500, 500],[300, 400],[-300, 400]]
  i_for_shift = 0
  # 
  for image_id in tqdm(image_id_translation):
    if i_for_shift > 2:
      i_for_shift = 0

    image_ext = ".png"
    image_name = os.path.join(img_path, image_id)
    label_name = os.path.join(label_path, image_id)

    if not os.path.exists(image_name+image_ext):
      continue

    im = yoloShiftbbox(image_name, label_name+".txt", image_ext, shift[i_for_shift])
    bbox = im.shiftYolobbox()
    image = im.translation_image()
    # to write rotateed image to disk
    cv2.imwrite(image_name+'_' + "translation" + '.png', image)
    file_name = label_name+'_' + "translation" + '.txt'
    #print("For angle "+str(angle))
    i_for_shift+=1
    if os.path.exists(file_name):
        os.remove(file_name)
    # to write the new rotated bboxes to file
    for i in bbox:
        with open(file_name, 'a') as fout:
          fout.writelines(' '.join(map(str, cvFormattoYolo(i, im.translation_image().shape[0], im.translation_image().shape[1]))) + '\n')

### **Histogram Equalization & CLAHE (Contrast Limited Adaptive Histogram Equalization)** 

In [None]:
class yoloHistogramEqualization:
  def __init__(self, filename, label_name, image_ext):
    assert os.path.isfile(filename + image_ext)
    assert os.path.isfile(label_name + '.txt')

    self.filename = filename
    self.label_name = label_name
    self.image_ext = image_ext

    # Read image using cv2
    self.image = cv2.imread(self.filename + self.image_ext, 1)

  def histogramYolobbox(self):
    f = open(self.label_name + '.txt', 'r')
    f1 = f.readlines()
    new_bbox = []
    H, W = self.image.shape[:2]
    for x in f1:
      bbox = x.strip('\n').split(' ')
      if len(bbox) > 1:
        (center_x, center_y, bbox_width, bbox_height) = yoloFormattocv(float(bbox[1]), float(bbox[2]), float(bbox[3]), float(bbox[4]), H, W)
        # bbox scale formula
        x1 = center_x
        y1 = center_y
        x2 = bbox_width
        y2 = bbox_height
      new_bbox.append([bbox[0], x1, y1, x2, y2])

    return new_bbox

  def histogram_image(self):
    ycrcb = cv2.cvtColor(self.image, cv2.COLOR_BGR2YCR_CB)
    channels = cv2.split(ycrcb)
    cv2.equalizeHist(channels[0], channels[0])
    cv2.merge(channels, ycrcb)
    cv2.cvtColor(ycrcb, cv2.COLOR_YCR_CB2BGR, self.image)

    return self.image



In [None]:
if __name__ == "__main__":
  image_id_scale = data['image_id'].drop_duplicates()
  folder_path = "/content/gdrive/MyDrive/abnormal_detection/images_jpg"
  augmentation_path = '/content/gdrive/MyDrive/abnormal_detection/augmentation'
  iteration = 0
  for image_id in tqdm(image_id_scale):
    iteration+=1
    image_ext = ".jpg"

    image_name = os.path.join(folder_path, "old_"+image_id)
    label_name = os.path.join(folder_path, "old_"+image_id)
    target_name = os.path.join(augmentation_path, image_id)

    if not (os.path.exists(image_name+".jpg") or os.path.exists(image_name+".txt")):
        continue

    im = yoloHistogramEqualization(image_name, label_name, image_ext)
    bbox = im.histogramYolobbox()
    image = im.histogram_image()
    # to write rotateed image to disk
    cv2.imwrite(target_name+'_' + "he" + '.jpg', image)
    file_name = target_name+'_' + "he" + '.txt'
    
    if os.path.exists(file_name):
        os.remove(file_name)
    # to write the new rotated bboxes to file
    for i in bbox:
        with open(file_name, 'a') as fout:
          fout.writelines(' '.join(map(str, cvFormattoYolo(i, image.shape[0], image.shape[1]))) + '\n')

In [None]:
class yoloClahe:
  def __init__(self, filename, image_ext):
    assert os.path.isfile(filename + image_ext)

    self.filename = filename
    self.image_ext = image_ext

    # Read image using cv2
    self.image = cv2.imread(self.filename + self.image_ext, 1)

  def claheYolobbox(self):
    f = open(self.label_name + '.txt', 'r')
    f1 = f.readlines()
    new_bbox = []
    H, W = self.image.shape[:2]
    for x in f1:
      bbox = x.strip('\n').split(' ')
      if len(bbox) > 1:
        (center_x, center_y, bbox_width, bbox_height) = yoloFormattocv(float(bbox[1]), float(bbox[2]), float(bbox[3]), float(bbox[4]), H, W)
        # bbox scale formula
        x1 = center_x
        y1 = center_y
        x2 = bbox_width
        y2 = bbox_height
      new_bbox.append([bbox[0], x1, y1, x2, y2])

    return new_bbox

  def clahe_image(self):
    ycrcb = cv2.cvtColor(self.image, cv2.COLOR_BGR2YCR_CB)
    channels = cv2.split(ycrcb)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    clahe.apply(channels[0], channels[0])
    cv2.merge(channels, ycrcb)
    cv2.cvtColor(ycrcb, cv2.COLOR_YCR_CB2BGR, self.image)

    return self.image

In [None]:
if __name__ == "__main__":
  folder_path = '/content/gdrive/MyDrive/abnormal_detection/images'
  images_train_dir = '/content/gdrive/MyDrive/abnormal_detection/training_6/images/train'
  images_test_dir = '/content/gdrive/MyDrive/abnormal_detection/training_6/images/val'

  for image_ids in tqdm(os.listdir(folder_path)):
    split_image = image_ids.split('.')
    image_id = split_image[0]
    image_ext = "."+split_image[1]

    if image_ext == ".txt":
      continue
    
    image_name = os.path.join(folder_path, "{}".format(image_id))

    im = yoloClahe(image_name, image_ext)
    image = im.clahe_image()
    # to write rotateed image to disk
    cv2.imwrite(image_name + '.png', image)

In [None]:
  # Read image using cv2
  filename = "/content/0a16dc6491142ff8c7c36f3b3f4ebd02 (1).png"
  image = cv2.imread(filename, 1)
  ycrcb = cv2.cvtColor(image, cv2.COLOR_BGR2YCR_CB)
  channels = cv2.split(ycrcb)
  clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
  cl_img = clahe.apply(channels[0], channels[0])

  plt.hist(cl_img.flat, bins=100, range=(100, 255))

In [None]:
if __name__ == "__main__":
  folder_path = '/content/gdrive/MyDrive/abnormal_detection/testing_dataset'

  for image_ids in tqdm(new_image_id):
    path_train = os.path.join(folder_path, "{}.jpg".format(image_ids))
    
    image = clahe_image(path_train)
    # to write rotateed image to disk
    cv2.imwrite(path_train, image)
    

### **Zoom Data**

In [None]:
!pip install -U albumentations

In [None]:
import albumentations as A

In [None]:
source_path = "/content/gdrive/MyDrive/abnormal_detection/training_6/images/train"
label_source_path = "/content/gdrive/MyDrive/abnormal_detection/training_6/labels/train"

for img in tqdm(new_image_id):
  img_name = "old_{}.png".format(img)
  label_name = "old_{}.txt".format(img)
  img_random_name = "random_{}.png".format(img)
  label_random_name = "random_{}.txt".format(img)
  img_path = os.path.join(source_path, img_name)
  label_path = os.path.join(label_source_path, label_name)

  img_destination = os.path.join("/content/gdrive/MyDrive/abnormal_detection/training_6/images/train", img_random_name)
  label_destination = os.path.join("/content/gdrive/MyDrive/abnormal_detection/training_6/labels/train", label_random_name)

  transform = A.Compose([
    A.IAAPiecewiseAffine(p=0.2),
    A.IAASharpen(p=0.2),
    A.RandomGamma(gamma_limit=(70, 130), p=0.3),
    A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.75)
  ])

  image = cv2.imread(img_path)
  image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
  transformed = transform(image=image)
  transformed_image = transformed["image"]
  cv2.imwrite(img_destination, transformed_image)
  shutil.copyfile(label_path, label_destination)

## **5. Training Dataset**

### **Generate train/test for training data**

In [None]:
new_image_id = []
for filename in tqdm(os.listdir("/content/gdrive/MyDrive/abnormal_detection/training_7/images/train")):
  file = filename.split('_')
  if file[0] == "old":
    img_split = file[-1].split(".")
    new_image_id.append(img_split[0])
  


In [None]:
print(len(new_image_id))

In [None]:
def write_train_list(target_images_dir, target_labels_dir, img_dir, series):
  for image_id in series:
    images_dir = os.path.join(img_dir, '{}.png'.format(image_id))
    labels_dir = os.path.join(img_dir, '{}.txt'.format(image_id))

    if os.path.exists(images_dir):    
      shutil.copyfile(images_dir, os.path.join(target_images_dir, '{}.png'.format(image_id)))
      shutil.copyfile(labels_dir, os.path.join(target_labels_dir, '{}.txt'.format(image_id)))

In [None]:
images_train_dir = '/content/gdrive/MyDrive/abnormal_detection/training_6/images/train'
labels_train_dir = '/content/gdrive/MyDrive/abnormal_detection/training_6/labels/train'
images_test_dir = '/content/gdrive/MyDrive/abnormal_detection/training_6/images/val'
labels_test_dir = '/content/gdrive/MyDrive/abnormal_detection/training_6/labels/val'

path_dataset = '/content/gdrive/MyDrive/abnormal_detection/augmentation'
# test_images_dir = os.path.join(testing_dir, 'images')
# test_labels_dir = os.path.join(testing_dir, 'labels')

image_id_series = pd.Series(new_image_id)
tr_series, val_series = train_test_split(image_id_series, test_size=0.5, random_state=random_stat)
print("The # of train set : {}, The # of validation set : {}".format(tr_series.shape[0], val_series.shape[0]))

write_train_list(images_train_dir, labels_train_dir, path_dataset, tr_series)

In [None]:
files = 0
for filename in tqdm(os.listdir("/content/gdrive/MyDrive/abnormal_detection/training_6/images/val")):
  files += 1

print(files)

### **Fold Dataset**

In [None]:
list_image_train = fold_csv[fold_csv[f'fold_{1}'] == 0]['image_id']
train_df = new_train_abnormal[new_train_abnormal['image_id'].isin(list_image_train)].reset_index(drop=True)
val_df = new_train_abnormal[~new_train_abnormal['image_id'].isin(list_image_train)].reset_index(drop=True)

In [None]:
tr_series = train_df.image_id.unique()
tr_series

In [None]:
images_train_dir = '/content/gdrive/MyDrive/abnormal_detection/training_7/images/train'
labels_train_dir = '/content/gdrive/MyDrive/abnormal_detection/training_7/labels/train'
images_test_dir = '/content/gdrive/MyDrive/abnormal_detection/training_7/images/val'
labels_test_dir = '/content/gdrive/MyDrive/abnormal_detection/training_7/labels/val'

path_dataset = '/content/gdrive/MyDrive/abnormal_detection/images'
# test_images_dir = os.path.join(testing_dir, 'images')
# test_labels_dir = os.path.join(testing_dir, 'labels')

# image_id_series = pd.Series(new_image_id)
# img_id_series = train_df.image_id.unique() 
tr_series = train_df.image_id.unique() 
val_series = val_df.image_id.unique()
# tr_series, val_series = train_test_split(img_id_series, test_size=0.7, random_state=random_stat)
print("The # of train set : {}, The # of validation set : {}".format(len(tr_series), len(val_series)))

write_train_list(images_train_dir, labels_train_dir, path_dataset, tr_series)
write_train_list(images_test_dir, labels_test_dir, path_dataset, val_series)

### **Clone Yolov5**

In [None]:
!git clone https://github.com/ultralytics/yolov5

In [None]:
from IPython.display import Image
from utils.google_utils import gdrive_download
print('torch %s %s' %(torch.__version__, torch.cuda.get_device_properties(0) if torch.cuda.is_available() else 'CPU'))

### **Remove File**

In [None]:
from PIL import Image

In [None]:
def delete_scale_image(folder_path, ext_path):
  for filename in tqdm(os.listdir(folder_path)):
    filepath = os.path.join(folder_path, filename)
    scale_name = filename.split("_")
    if scale_name[-1] == ext_path:
      os.remove(filepath)
    else:
      continue

def delete_df(folder_path, label_path, image_ids, imgext, labelext):
  i=0
  for image_id in tqdm(image_ids):  
    imgpath = os.path.join(folder_path, "{}_{}.png".format(imgext, image_id))
    # labelpath = os.path.join(label_path, "{}_{}.txt".format(labelext, image_id))
    if os.path.exists(imgpath):
      os.remove(imgpath)
      # os.remove(labelpath)
    else:
      continue

def convert_jpg_to_png(folder_path):
  for filename in tqdm(os.listdir(folder_path)):
    filepath = os.path.join(folder_path, filename)
    scale_name = filename.split(".")
    savepath = os.path.join(folder_path, "{}.png".format(scale_name[0]))
    if scale_name[-1] == "jpg":
      im1 = Image.open(filepath)
      im1.save(savepath)
    else:
      continue

In [None]:
images_train_dir = '/content/gdrive/MyDrive/abnormal_detection/training_7/images/train'
labels_train_dir = '/content/gdrive/MyDrive/abnormal_detection/training_7/labels/train'
images_test_dir = '/content/gdrive/MyDrive/abnormal_detection/training_7/images/val'
labels_test_dir = '/content/gdrive/MyDrive/abnormal_detection/training_7/labels/val'

# [20, 90,270, 340, 180]
augpath = '/content/gdrive/MyDrive/abnormal_detection/augmentation'

delete_scale_image(images_train_dir, "translation.png")
delete_scale_image(labels_train_dir, "translation.txt")
# delete_scale_image(augpath, "jpg")
# delete_scale_image(augpath, "he.txt")
# convert_jpg_to_png(augpath)

In [None]:
image_id = data["image_id"].drop_duplicates()
print(len(image_id))

In [None]:
img_training = "/content/gdrive/MyDrive/abnormal_detection/training_6/images/train"
img_val = "/content/gdrive/MyDrive/abnormal_detection/training_6/images/val"
label_training = "/content/gdrive/MyDrive/abnormal_detection/training_6/labels/train"
label_val = "/content/gdrive/MyDrive/abnormal_detection/training_6/labels/val"
ext_label = "old"
ext_image = "old"

delete_df(img_training, label_training, image_id, ext_image, ext_label)

### **Train Model**

In [None]:
%cd /content/gdrive/MyDrive/yolov5

In [None]:
!pip install -U -r requirements.txt

In [None]:
!pip install wandb

In [None]:
import wandb
wandb.login()

In [None]:
!python train.py --img 640 --batch-size 8 --epochs 100 --data '/content/gdrive/MyDrive/yolov5/data/data.yaml' --cfg '/content/gdrive/MyDrive/yolov5/models/yolov5s.yaml' --weights yolov5s.pt --cache

In [None]:
!python train.py --resume

In [None]:
!python detect.py --weights /content/gdrive/MyDrive/yolov5/runs/train/exp14/weights/best.pt --img 640 --conf-thres 0.005 --source /content/gdrive/MyDrive/abnormal_detection/testing_dataset --iou-thres 0.45 --save-txt --save-conf

### **Predict**

In [None]:
size_df = pd.read_csv("/content/gdrive/MyDrive/abnormal_detection/test_meta.csv")
size_df.columns = ['image_id', 'h', 'w']

sub_df = pd.read_csv("/content/gdrive/MyDrive/abnormal_detection/sample_submission.csv")
sub_df = sub_df.merge(size_df, on='image_id', how='left')
sub_df.head()

In [None]:
#convert from Yolo_mark to opencv format
def yoloFormattocv(x1, y1, x2, y2, H, W):
    bbox_width = x2 * W
    bbox_height = y2 * H
    center_x = x1 * W
    center_y = y1 * H
    voc = []
    voc.append(center_x - (bbox_width / 2))
    voc.append(center_y - (bbox_height / 2))
    voc.append(center_x + (bbox_width / 2))
    voc.append(center_y + (bbox_height / 2))
    return [int(v) for v in voc]

In [None]:
def predictYolobbox(filename, H, W):
    f = open(filename, 'r')
    f1 = f.readlines()
    new_bbox = []
    PredictionString = ""
    for x in f1:
      bbox = x.strip('\n').split(' ')
      if len(bbox) > 1:
        (x1, y1, x2, y2) = yoloFormattocv(float(bbox[1]), float(bbox[2]), float(bbox[3]), float(bbox[4]), H, W)

        new_bbox.append([bbox[0], bbox[5], x1, y1, x2, y2])
    
    for bbox in new_bbox:
      predict_box = str(bbox[0])+" "+str(bbox[1])+" "+str(bbox[2])+" "+str(bbox[3])+" "+str(bbox[4])+" "+str(bbox[5])+" "
      PredictionString = PredictionString + predict_box

    return PredictionString

In [None]:
def populationYolobbox(image_id, filename, H, W):
    split_filename = image_id.split(".")
    f = open(filename, 'r')
    f1 = f.readlines()
    new_bbox = []
    population_data = []
    for x in f1:
      bbox = x.strip('\n').split(' ')
      if len(bbox) > 1:
        # (x1, y1, x2, y2) = yoloFormattocv(float(bbox[1]), float(bbox[2]), float(bbox[3]), float(bbox[4]), H, W)
        if float(bbox[1]) > 0.2:
          new_bbox.append([split_filename[0], float(bbox[0]), float(bbox[1])])
    
    
    # for bbox in new_bbox:
    #   predict_box = split_filename[0]+" "+str(bbox[0])+" "+str(bbox[1])
    #   population_data.append(predict_box)

    return new_bbox

In [None]:
%cd /content/gdrive/MyDrive/abnormal_detection

In [None]:
import random
import statistics

In [None]:
label_path = "/content/gdrive/MyDrive/yolov5/runs/detect/exp14/labels"
submission = []
for _, row in tqdm(sub_df.iterrows()):
  filename = os.path.join(label_path, "{}.txt".format(row['image_id']))
  predictionString = [[row['image_id'], 14, 0]]
  if path.exists(filename):
    predictionString = populationYolobbox(row['image_id'], filename, row['h'], row['w'])
  for population in predictionString:
    submission.append(population) 

# submission_df = pd.DataFrame(submission, columns=['image_id', 'PredictionString'])
submission_df = pd.DataFrame(submission, columns=['image_id', 'id_class', 'prediction'])


In [None]:
def Visualize_class(df, feature, title):
  num_image = df[feature].value_counts().rename_axis(feature).reset_index(name="num_image")
  fig = px.bar(num_image[::-1], x='num_image', y=feature, orientation='h', color='num_image')
  fig.update_layout(
      title={
          'text' : title,
          'y' : 0.95,
          'x' : 0.5,
          'xanchor' : 'center',
          'yanchor' : 'top'})
  fig.show()

In [None]:
Visualize_class(submission_df, feature="id_class", title="Types of abnormal labels")

In [None]:
submission_df

In [None]:
column_name = "prediction"
sum_sub = list(submission_df[column_name])
length_population = len(sum_sub)

In [None]:
print(length_population)

In [None]:
jumlah_population = math.fsum(list(sum_sub))
rata2_population = jumlah_population/length_population
print(jumlah_population)
print(rata2_population)

In [None]:
sample_random = random.sample(sum_sub, 500)
jumlah_sample = math.fsum(sample_random)
length_sample = len(sample_random)
rata2_sample = jumlah_sample/length_sample
stdev_sample = statistics.stdev(sample_random)
print(jumlah_sample)
print(rata2_sample)
print(stdev_sample)

In [None]:
stdev_x = 0
for x in sample_random:
  stdev_x = stdev_x + (x - rata2_sample)**2
print(stdev_x)
stdev_sample = math.sqrt(stdev_x/499)
print(stdev_sample)

In [None]:
koef_t = (rata2_sample-rata2_population)/(stdev_sample/math.sqrt(length_sample))
print(koef_t)

In [None]:
submission_df.to_csv('uji_hipotesis.csv', index=False)

## **Summary**

In [None]:
model = torch.hub.load('/content/yolov5', 'custom', path='/content/gdrive/MyDrive/abnormal_detection/hasil_training/5-6-2021_ep43_512_8_yolov5x_preprocessing/train/exp9/weights/best.pt', source='local')

In [None]:
img = cv2.imread('/content/gdrive/MyDrive/yolov5/runs/detect/exp/008bdde2af2462e86fd373a445d0f4cd.jpg')
results = model(img) 

results.print()

results.xyxy[0]
bbox = results.pandas().xyxy[0]

In [None]:
print(bbox['xmin'])

In [None]:
pneumonia = pd.read_csv("/content/gdrive/MyDrive/Pneumonia_dataset/stage_2_train_labels.csv")

In [None]:
pneumonia.head()

In [None]:
testing = pneumonia[pneumonia.Target == 1].drop_duplicates()

In [None]:
testing.head()

In [None]:
len(testing)