In [1]:
import cv2 
import os
import numpy as np
import skimage.feature as feature
import pandas as pd
from skimage.feature import local_binary_pattern
from sklearn.preprocessing import MinMaxScaler

In [2]:
def GT_Mask(cropped_gt):
    ''' 
    Finds the contours of the ground truth and creates a binary image per each mass

    Arg:
      cropped_gt (numpy.ndarray): cropped ground truth image

    Output:
      vector_gt_mask (list): list that contains one binary image per mass
      contours_gt (list): contours of the ground truth
    '''

    vector_gt_mask = []

    contours_gt, _ = cv2.findContours(cropped_gt, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)

    # Create a binary mask per mass
    for i in range(len(contours_gt)):
      mask = np.zeros_like(cropped_gt)
      cv2.drawContours(mask, contours_gt, i, (255, 255, 255), cv2.FILLED)
      vector_gt_mask.append(mask)

    return vector_gt_mask,contours_gt

In [3]:
def crop_candidate_ROI(image, contour):
  ''' 
  Crops the image to the bounding box containing the contour.

  Args:
    image (numpy.ndarray): the image to be cropped.
    contour (list): the extracted contour from the segmentation
  Output:
    croppedImg (numpy.ndarray): the cropped image
  '''
  bbox = cv2.boundingRect(contour)
  croppedImg = image[bbox[1]:bbox[1]+bbox[3], bbox[0]:bbox[0]+bbox[2]]
  return croppedImg

In [4]:
def extract_LBP(roi_img, P, R):
    ''' 
    Extracts the histogram of the LBP image for a certain number of neighbours and radius.

    Args:
        roi_img (numpy.ndarray): the image from which the LBP features will be extracted (cropped ROI)
        P (int): number of neighbours
        R (int): radius
    Output:
        hist (numpy.array): histogram of the LBP image
    '''
    n_points = P * R
    lbp_image = local_binary_pattern(roi_img, n_points, R, method='uniform')

    # Calculate the histogram of the LBP image
    hist, _ = np.histogram(lbp_image.ravel(), bins=np.arange(0, n_points + 3), range=(0, n_points + 2), density=True)

    # Normalize the histogram
    hist /= np.sum(hist)

    return hist

Feature extraction for positive images

In [20]:
segmented_images_path = '/Users/clara/Desktop/MAIA/AIA/segmented_images_new/'
cropped_images_upsampled_dir = '/Users/clara/Desktop/MAIA/AIA/cropped_imgs_upsampled_all/'
cropped_gt_dir = '/Users/clara/Desktop/MAIA/AIA/cropped_gt_all/'

num_classes = 11

filenames = sorted(file for file in os.listdir(cropped_gt_dir) if file != ".DS_Store")

# Create an empty DataFrame to store the features
features_df = pd.DataFrame()
# Create an empty list to store the dictionaries
data = []

for index, filename in enumerate(filenames):
    print("Processing image number: ", index)

    cropped_gt = cv2.imread(cropped_gt_dir +  filename, cv2.IMREAD_GRAYSCALE)
    cropped_img_upsampled = cv2.imread(cropped_images_upsampled_dir+filename, cv2.IMREAD_GRAYSCALE) # this is the raw image (original scale) cropped to only the breast area
    
    vector_gt_mask, contours_gt = GT_Mask(cropped_gt)

    for k in range(len(vector_gt_mask)):
        # Threshold the image to convert it to binary format (0 or 255)
        _, vector_gt_mask[k] = cv2.threshold(vector_gt_mask[k], 128, 255, cv2.THRESH_BINARY) # to ensure the image is binary
        vector_gt_mask[k] = vector_gt_mask[k]/255 # to make the image values be 0 or 1

        for i in range(num_classes):
            segmented_img = cv2.imread(segmented_images_path+filename[:-4]+"_"+str(i)+".tif", cv2.IMREAD_GRAYSCALE) # these are the segmented (binary) images for one scale per loop
            # Find the contours of the segmented image
            contours_segm, _ = cv2.findContours(segmented_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
            for j in range(len(contours_segm)):
                # Obtain a binary mask for the candidate region
                mask_candidate = np.zeros_like(cropped_img_upsampled)
                cv2.drawContours(mask_candidate, [contours_segm[j]], 0, (255, 255, 255), cv2.FILLED)
                _, mask_candidate = cv2.threshold(mask_candidate, 128, 255, cv2.THRESH_BINARY) # to ensure the image is binary
                mask_candidate = mask_candidate / 255 # to make the values be 0 or 1

                # Compute the intersection and union images
                intersection = cv2.bitwise_and(mask_candidate, vector_gt_mask[k])
                union = cv2.bitwise_or(mask_candidate, vector_gt_mask[k])

                # Compute the number of white pixels in the intersection and union images
                intersection_pixels = cv2.countNonZero(intersection)
                union_pixels = cv2.countNonZero(union)

                # Compute the IoU
                iou = intersection_pixels / union_pixels
                
                if iou >= 0.5:
                    label = 1
                else:
                    label = 0

                cropped_boundingBox = crop_candidate_ROI(cropped_img_upsampled, contours_segm[j])

                ###### TEXTURE FEATURES ########

                # Obtain the GLCM matrix from the bounding box (ROI) for different angles (in radians) and distance offset = 1
                graycom = feature.graycomatrix(cropped_boundingBox, [1], [0, np.pi/4, np.pi/2, 3*np.pi/4], levels=256)

                # GLCM FEATURES
                contrast = feature.graycoprops(graycom, 'contrast')
                dissimilarity = feature.graycoprops(graycom, 'dissimilarity')
                homogeneity = feature.graycoprops(graycom, 'homogeneity')
                energy = feature.graycoprops(graycom, 'energy')
                correlation = feature.graycoprops(graycom, 'correlation')
                ASM = feature.graycoprops(graycom, 'ASM')

                # LBP FEATURES
                hist_LBP_8_1 = extract_LBP(cropped_boundingBox, P = 8, R = 1) # histogram of the LBP image (8, 1)
                hist_LBP_16_2 = extract_LBP(cropped_boundingBox, P = 16, R = 2) # histogram of the LBP image (16, 2)

                ###### SHAPE FEATURES ########

                # Calculate area
                area = cv2.contourArea(contours_segm[j])
                # Calculate perimeter
                perimeter = cv2.arcLength(contours_segm[j], True)
                # Calculate circularity
                circularity = (4 * np.pi * area) / (perimeter ** 2)
                # Calculate density
                density = area / (perimeter ** 2)

                ###### GENERAL FEATURES ########
                # Create a mask image of zeros with the same shape as the original image
                mask = np.zeros_like(cropped_img_upsampled)

                # Draw the contour on the mask image
                cv2.drawContours(mask, [contours_segm[j]], 0, (255), thickness=cv2.FILLED)

                # Apply the mask to the original image to extract the region of interest
                roi = cv2.bitwise_and(cropped_img_upsampled, cropped_img_upsampled, mask=mask)

                # Calculate the mean intensity and standard deviation within the ROI
                mean_intensity = np.mean(roi)
                std_deviation = np.std(roi)



                # Create a dictionary for the current image
                image_data = {}
                for p, angle in enumerate([0, np.pi/4, np.pi/2, 3*np.pi/4]):
                    image_data[f'Contrast_{angle}'] = contrast[0][p]
                    image_data[f'Dissimilarity_{angle}'] = dissimilarity[0][p]
                    image_data[f'Homogeneity_{angle}'] = homogeneity[0][p]
                    image_data[f'Energy_{angle}'] = energy[0][p]
                    image_data[f'Correlation_{angle}'] = correlation[0][p]
                    image_data[f'ASM_{angle}'] = ASM[0][p]
                image_data['Area'] = area
                image_data['Perimeter'] = perimeter
                image_data['Circularity'] = circularity
                image_data['Density'] = density
                image_data['Mean_Intensity'] = mean_intensity
                image_data['Std_Dev_Intensity'] = std_deviation


                for m in range(len(hist_LBP_8_1)):
                    image_data[f'LBP_8_1_{m}'] = hist_LBP_8_1[m]

                for m in range(len(hist_LBP_16_2)):
                    image_data[f'LBP_16_2_{m}'] = hist_LBP_16_2[m]

                image_data['image_id'] = f'{filename}_{k}'
              
                # Save in a Label column the label that is 1 if candidate has IOU >= 0.2 (if img is positive) and 0 if IOU < 0.2
                image_data['Label'] = label
                
                # Append the dictionary to the list
                data.append(image_data)

features_df = pd.DataFrame(data)

features_df.to_csv('/Users/clara/Desktop/MAIA/AIA/CleanCode/pos_extracted_features_lbp_05.csv', index=False)

Processing image number:  0
Processing image number:  1
Processing image number:  2
Processing image number:  3
Processing image number:  4
Processing image number:  5
Processing image number:  6
Processing image number:  7
Processing image number:  8
Processing image number:  9
Processing image number:  10
Processing image number:  11
Processing image number:  12
Processing image number:  13
Processing image number:  14
Processing image number:  15
Processing image number:  16
Processing image number:  17
Processing image number:  18
Processing image number:  19
Processing image number:  20
Processing image number:  21
Processing image number:  22
Processing image number:  23
Processing image number:  24
Processing image number:  25
Processing image number:  26
Processing image number:  27
Processing image number:  28
Processing image number:  29
Processing image number:  30
Processing image number:  31
Processing image number:  32
Processing image number:  33
Processing image number:

Feature extraction for negative images

In [8]:
segmented_images_path = '/Users/clara/Desktop/MAIA/AIA/segmented_images_new/'
cropped_images_upsampled_dir = '/Users/clara/Desktop/MAIA/AIA/cropped_imgs_upsampled_all/'
cropped_gt_dir = '/Users/clara/Desktop/MAIA/AIA/cropped_gt_all/'

num_classes = 11

filenames = sorted(file for file in os.listdir(cropped_images_upsampled_dir) if file != ".DS_Store")

# Create an empty DataFrame to store the features
features_df = pd.DataFrame()
# Create an empty list to store the dictionaries
data = []

for index, filename in enumerate(filenames):
    print("Processing image number: ", index)
    if not os.path.exists(os.path.join(cropped_gt_dir, filename)):
        cropped_img_upsampled = cv2.imread(cropped_images_upsampled_dir+filename, cv2.IMREAD_GRAYSCALE) # this is the raw image (original scale) cropped to only the breast area

        for i in range(num_classes):
            segmented_img = cv2.imread(segmented_images_path+filename[:-4]+"_"+str(i)+".tif", cv2.IMREAD_GRAYSCALE) # these are the segmented (binary) images for one scale per loop
            # Find the contours of the segmented image
            contours_segm, _ = cv2.findContours(segmented_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
            for j in range(len(contours_segm)):
                cropped_boundingBox = crop_candidate_ROI(cropped_img_upsampled, contours_segm[j])

                ###### TEXTURE FEATURES ########

                # Obtain the GLCM matrix from the bounding box (ROI) for different angles (in radians) and distance offset = 1
                graycom = feature.graycomatrix(cropped_boundingBox, [1], [0, np.pi/4, np.pi/2, 3*np.pi/4], levels=256)

                # GLCM FEATURES
                contrast = feature.graycoprops(graycom, 'contrast')
                dissimilarity = feature.graycoprops(graycom, 'dissimilarity')
                homogeneity = feature.graycoprops(graycom, 'homogeneity')
                energy = feature.graycoprops(graycom, 'energy')
                correlation = feature.graycoprops(graycom, 'correlation')
                ASM = feature.graycoprops(graycom, 'ASM')

                # LBP FEATURES
                hist_LBP_8_1 = extract_LBP(cropped_boundingBox, P = 8, R = 1) # histogram of the LBP image (8, 1)
                hist_LBP_16_2 = extract_LBP(cropped_boundingBox, P = 16, R = 2) # histogram of the LBP image (16, 2)

                ###### SHAPE FEATURES ########

                # Calculate area
                area = cv2.contourArea(contours_segm[j])
                # Calculate perimeter
                perimeter = cv2.arcLength(contours_segm[j], True)
                # Calculate circularity
                circularity = (4 * np.pi * area) / (perimeter ** 2)
                # Calculate density
                density = area / (perimeter ** 2)

                ###### GENERAL FEATURES ########
                # Create a mask image of zeros with the same shape as the original image
                mask = np.zeros_like(cropped_img_upsampled)

                # Draw the contour on the mask image
                cv2.drawContours(mask, [contours_segm[j]], 0, (255), thickness=cv2.FILLED)

                # Apply the mask to the original image to extract the region of interest
                roi = cv2.bitwise_and(cropped_img_upsampled, cropped_img_upsampled, mask=mask)

                # Calculate the mean intensity and standard deviation within the ROI
                mean_intensity = np.mean(roi)
                std_deviation = np.std(roi)



                # Create a dictionary for the current image
                image_data = {}
                for p, angle in enumerate([0, np.pi/4, np.pi/2, 3*np.pi/4]):
                    image_data[f'Contrast_{angle}'] = contrast[0][p]
                    image_data[f'Dissimilarity_{angle}'] = dissimilarity[0][p]
                    image_data[f'Homogeneity_{angle}'] = homogeneity[0][p]
                    image_data[f'Energy_{angle}'] = energy[0][p]
                    image_data[f'Correlation_{angle}'] = correlation[0][p]
                    image_data[f'ASM_{angle}'] = ASM[0][p]
                image_data['Area'] = area
                image_data['Perimeter'] = perimeter
                image_data['Circularity'] = circularity
                image_data['Density'] = density
                image_data['Mean_Intensity'] = mean_intensity
                image_data['Std_Dev_Intensity'] = std_deviation


                for m in range(len(hist_LBP_8_1)):
                    image_data[f'LBP_8_1_{m}'] = hist_LBP_8_1[m]

                for m in range(len(hist_LBP_16_2)):
                    image_data[f'LBP_16_2_{m}'] = hist_LBP_16_2[m]

                label = 0
                image_data['image_id'] = f'{filename}'
                
                # Save in a Label column the label that is 1 if candidate has IOU >= 0.2 (if img is positive) and 0 if IOU < 0.2 or if image is negative
                image_data['Label'] = label

                # Append the dictionary to the list
                data.append(image_data)

features_df = pd.DataFrame(data)

features_df.to_csv('/Users/clara/Desktop/MAIA/AIA/CleanCode/neg_extracted_features_lbp_02_newImagesID.csv', index=False)

Processing image number:  0
Processing image number:  1
Processing image number:  2
Processing image number:  3
Processing image number:  4
Processing image number:  5
Processing image number:  6
Processing image number:  7
Processing image number:  8
Processing image number:  9
Processing image number:  10
Processing image number:  11
Processing image number:  12
Processing image number:  13
Processing image number:  14
Processing image number:  15
Processing image number:  16
Processing image number:  17
Processing image number:  18
Processing image number:  19
Processing image number:  20
Processing image number:  21
Processing image number:  22
Processing image number:  23
Processing image number:  24
Processing image number:  25
Processing image number:  26
Processing image number:  27
Processing image number:  28
Processing image number:  29
Processing image number:  30
Processing image number:  31
Processing image number:  32
Processing image number:  33
Processing image number:

Scale the features of positive and negative images together

In [None]:
features_positives = pd.read_csv('/Users/clara/Desktop/MAIA/AIA/CleanCode/pos_extracted_features_lbp_05.csv')
features_negatives = pd.read_csv('/Users/clara/Desktop/MAIA/AIA/CleanCode/neg_extracted_features_lbp_05.csv')

# Create a new column called 'Is_Positive' that tells if the feature corresponds to a positive or negative image, to keep track in following performance assessment steps
features_positives['Is_Positive'] = 1
features_negatives['Is_Positive'] = 0

### CONSIDERING ONLY POSITIVE CANDIDATES OF POSITIVE IMAGES AND NEGATIVE CANDIDATES OF NEGATIVE IMAGES
# Extract the features from only the positive candidates of the positive images 
features_positives_only = features_positives[features_positives['Label']==1].copy()
# Merge the positive candidates with the negative candidates in a single DataFrame
merged_df = pd.concat([features_positives_only, features_negatives], ignore_index=True)

### CONSIDERING ALSO THE NEGATIVE CANDIDATES OF POSITIVE IMAGES
# Merge the positive candidates with the negative candidates in a single DataFrame
# merged_df = pd.concat([features_positives, features_negatives], ignore_index=True)


# Select the columns to exclude from scaling
exclude_cols = ['image_id', 'Label']

# Create a new dataframe with only the columns to be scaled for positive images
cols_to_scale = [col for col in merged_df.columns if col not in exclude_cols]
scaled_features = merged_df.copy()  
scaled_features[cols_to_scale] = MinMaxScaler().fit_transform(merged_df[cols_to_scale])

positives = scaled_features[scaled_features['Label']==1].copy()
negatives = scaled_features[scaled_features['Label']==0].copy()

## CLEAN THE DATAFRAME FOR MACHINE LEARNING STEPS

# List of columns to delete
columns_to_delete = ['Is_Positive', 'image_id', 'Label']

# Delete the specified columns from 'positives' dataframe
positives.drop(columns=columns_to_delete, inplace=True)

# Delete the specified columns from 'negatives' dataframe
negatives.drop(columns=columns_to_delete, inplace=True)

print(positives.shape)
print(negatives.shape)

# Save 'positives' dataframe to CSV without headers
positives.to_csv('/Users/clara/Desktop/MAIA/AIA/positives.csv', header=False, index=False)

# Save 'negatives' dataframe to CSV without headers
negatives.to_csv('/Users/clara/Desktop/MAIA/AIA/negatives.csv', header=False, index=False)