In [1]:
from PIL import Image
import numpy as np
import cv2 as cv
import pandas as pd
import os
from pathlib import Path
import boto3

np.random.seed(0)

In [2]:
train_df_key = 'CheXpert-v1.0-small/train.csv'

In [3]:
train_df = pd.read_csv(train_df_key)

In [4]:
def modify_img_with_coin(img):
    
    circle_radius = 10

    width, height = img.size
    
    x = int((width* (0.5+ np.random.uniform(low = -1, high = 1)*0.01)))
    y = int((height*(0.15+ np.random.uniform(low = -1, high = 1)*0.1)))

    circle_centre = (x,y)

    img = cv.circle(np.array(img), (x,y), circle_radius, 255, -1)
    
    img = Image.fromarray(img)
    
    return img

In [5]:
def modify_img_with_magnets(img):
    
    
    width, height = img.size
    circle_radius = 5

    x1 = int((width* (0.7+ np.random.uniform(low = -1, high = 1)*0.14)))
    y1 = int((height*(0.8+ np.random.uniform(low = -1, high = 1)*0.05)))

    theta = np.random.uniform(low = 0, high = 2*np.pi)

    x2 = int(x1 + 2*circle_radius*np.cos(theta))
    y2 = int(y1 + 2*circle_radius*np.sin(theta))

    circle_centre1 = (x1,y1)
    circle_centre2 = (x2,y2)

    img = cv.circle(np.array(img), circle_centre1, circle_radius, 255, -1)
    img = cv.circle(np.array(img), circle_centre2, circle_radius, 255, -1)

    img = Image.fromarray(img)

    return img

In [6]:
def modify_img_with_bullet(img):
    
    width, height = img.size
    
    img = np.array(img)

    img_overlay = np.zeros_like(img)

    # Setting constant shape variables
    circle_radius = 5
    rectangle_length = 20
    rectangle_width = 10


    # Setting random centre of rectangle
    x1 = int((width* (0.5+ np.random.uniform(low = -1, high = 1)*0.3)))
    y1 = int((height*(0.4+ np.random.uniform(low = -1, high = 1)*0.3)))

    rect_x_centre = x1
    rect_y_centre = y1

    # Drawing with angle of rotation 0 to begin with, for simplicity, and then rotation later by affine matrix
    theta = 0

    rect = ((rect_x_centre, rect_y_centre), (rectangle_length, rectangle_width), 0)
    box = cv.boxPoints(rect) # cv2.boxPoints(rect) for OpenCV 3.x
    box = np.int0(box)
    cv.drawContours(img_overlay,[box],0,(255),-1)

    # bullet circular 'front' placed at appropriate length along rectangular body
    circle_x1 = int(x1 + rectangle_length//2)
    circle_y1 = int(y1)

    circle_centre1 = (circle_x1,circle_y1)

    img_overlay = cv.circle(img_overlay, circle_centre1, circle_radius, 255, -1)
    
    theta = np.random.uniform(low = 0, high = 2*np.pi)

    # print(f'Theta is: {theta*180/np.pi} degrees')

    (h, w) = img_overlay.shape[:2]

    # Rotation matrix! Thanks Stephen Elston!
    M = cv.getRotationMatrix2D((rect_x_centre, rect_y_centre), theta*180/np.pi, 1.0)
    
    # Resultant bullet is rotated around its (object) centre, not the centre of the whole image.
    # print(f"Rotation matrix is: \n{M}")
    rotated = cv.warpAffine(img_overlay, M, (w, h))
    
    # Bullet aded into original image.

    img[np.where(rotated == 255)] = 255

    img = Image.fromarray(img)
    
    return img

In [7]:
def return_modified_image_random(img, mod_option = None):
    
    mod_options = {'coin':modify_img_with_coin, 'magnets':modify_img_with_magnets, 'bullet':modify_img_with_bullet}
    
    if mod_option is None:
        mod_option = np.random.choice(list(mod_options.keys()))
        
    mod_func = mod_options[mod_option]
    
    mod_img = mod_func(img)
    
    return mod_img, mod_option
    
    

In [8]:
def modify_save_and_record_image(filename, modify = True):
    
    img = Image.open(Path(filename))
    
    if modify == True:
    
        mod_img, mod_option = return_modified_image_random(img, mod_option = None)

    if modify == False:
        
        mod_img, mod_option = img, 'no_mod'
    
    mod_df.loc[mod_df['Path'] == filename, mod_option] = True

    p = Path(filename)

    save_path = Path('CheXpert-v1.0-small-MOD')/Path(*p.parts[1:-1])/Path(p.stem + f'_mod_{mod_option}' + p.suffix)
    
    save_path.parent.mkdir(parents=True, exist_ok=True)
    mod_img.save(save_path)
    
    return 

In [9]:
mod_df = train_df.copy()
filenames = mod_df['Path'].values
i = 0
j = 0

for filename in filenames:
    modify = np.random.choice([True,False])
    
    try:
        # Do not modify lateral images
        if 'lateral' in filename:
            modify_save_and_record_image(filename, modify = False)

        else:
            modify_save_and_record_image(filename, modify)
    
    i += 1
    if i % 10000 == 0:
        print(i)

In [18]:
mod_df.to_csv('mod_df.csv')

In [19]:
mod_df

Unnamed: 0,Path,Sex,Age,Frontal/Lateral,AP/PA,No Finding,Enlarged Cardiomediastinum,Cardiomegaly,Lung Opacity,Lung Lesion,...,Atelectasis,Pneumothorax,Pleural Effusion,Pleural Other,Fracture,Support Devices,magnets,no_mod,bullet,coin
0,CheXpert-v1.0-small/train/patient00001/study1/...,Female,68,Frontal,AP,1.0,,,,,...,,0.0,,,,1.0,True,,,
1,CheXpert-v1.0-small/train/patient00002/study2/...,Female,87,Frontal,AP,,,-1.0,1.0,,...,-1.0,,-1.0,,1.0,,,True,,
2,CheXpert-v1.0-small/train/patient00002/study1/...,Female,83,Frontal,AP,,,,1.0,,...,,,,,1.0,,,True,,
3,CheXpert-v1.0-small/train/patient00002/study1/...,Female,83,Lateral,,,,,1.0,,...,,,,,1.0,,,True,,
4,CheXpert-v1.0-small/train/patient00003/study1/...,Male,41,Frontal,AP,,,,,,...,,0.0,,,,,,,True,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
223409,CheXpert-v1.0-small/train/patient64537/study2/...,Male,59,Frontal,AP,,,,-1.0,,...,-1.0,0.0,1.0,,,,,,,
223410,CheXpert-v1.0-small/train/patient64537/study1/...,Male,59,Frontal,AP,,,,-1.0,,...,-1.0,,-1.0,,,,,,,
223411,CheXpert-v1.0-small/train/patient64538/study1/...,Female,0,Frontal,AP,,,,,,...,,,,,,,,,,
223412,CheXpert-v1.0-small/train/patient64539/study1/...,Female,0,Frontal,AP,,,1.0,1.0,,...,1.0,0.0,,,,0.0,,,,


No image files available beyond index 178755

In [25]:
mod_df.iloc[:178755].to_csv('mod_df_clipped.csv')