# Read data

In [5]:
# Import libraries
import numpy as np
import pandas as pd
import os
import cv2
from matplotlib import pyplot as plt
import glob

In [6]:
from google.colab import drive
drive.mount('/content/drive')

#https://drive.google.com/drive/folders/13DHKkGJhXh2RKNjGqqsjm_E9R2MjAupD?usp=drive_link

images_path = '/content/drive/MyDrive/w281/images (1)'
segmentation_path = '/content/drive/MyDrive/w281/masks (1)'

image_files = sorted(glob.glob('/'.join([images_path, '*.jpg'])))
segmentation_files = sorted(glob.glob('/'.join([segmentation_path, '*.png'])))

print(f"Number of image files found: {len(image_files)}")
print(f"Number of image files found: {len(segmentation_files)}")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Number of image files found: 942
Number of image files found: 4503


In [7]:
metadata = pd.read_csv('/content/drive/My Drive/HAM10000_metadata.csv')
# get paths for image and segmentation files
img_files = sorted(glob.glob('/'.join([images_path, '*.jpg'])))
segmentation_files = sorted(glob.glob('/'.join([segmentation_path, '*.png'])))
# get the image ID
img_files_df = pd.DataFrame({'img_file_path': img_files})
img_files_df['image_id'] = img_files_df['img_file_path'].apply(lambda x: x.split('/')[-1].replace('.jpg', ''))
# get the segmentation ID
segmentation_files_df = pd.DataFrame({'segmentation_file_path': segmentation_files})
segmentation_files_df['image_id'] = segmentation_files_df['segmentation_file_path'].apply(lambda x: x.split('/')[-1].replace('_segmentation.png', ''))

metadata = pd.merge(metadata, img_files_df, left_on= 'image_id', right_on = 'image_id')
metadata = pd.merge(metadata, segmentation_files_df, left_on = 'image_id', right_on = 'image_id')

metadata.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,img_file_path,segmentation_file_path
0,HAM_0001728,ISIC_0033539,bkl,histo,60.0,male,back,/content/drive/MyDrive/w281/images (1)/ISIC_00...,/content/drive/MyDrive/w281/masks (1)/ISIC_003...
1,HAM_0000344,ISIC_0034318,bkl,histo,55.0,male,trunk,/content/drive/MyDrive/w281/images (1)/ISIC_00...,/content/drive/MyDrive/w281/masks (1)/ISIC_003...
2,HAM_0004332,ISIC_0033785,bkl,histo,55.0,male,trunk,/content/drive/MyDrive/w281/images (1)/ISIC_00...,/content/drive/MyDrive/w281/masks (1)/ISIC_003...
3,HAM_0002040,ISIC_0033900,bkl,histo,50.0,female,lower extremity,/content/drive/MyDrive/w281/images (1)/ISIC_00...,/content/drive/MyDrive/w281/masks (1)/ISIC_003...
4,HAM_0005299,ISIC_0033466,bkl,histo,55.0,female,lower extremity,/content/drive/MyDrive/w281/images (1)/ISIC_00...,/content/drive/MyDrive/w281/masks (1)/ISIC_003...


# Comparing Average Pixel Color Inside/Outside Mask

In [13]:
import cv2
import numpy as np
import pandas as pd

hue_data = []

for index, row in metadata.iterrows():
    img_path = row['img_file_path']
    seg_path = row['segmentation_file_path']

    img = cv2.imread(img_path)
    mask = cv2.imread(seg_path, cv2.IMREAD_GRAYSCALE)
    _, mask = cv2.threshold(mask, 127, 255, cv2.THRESH_BINARY)
    img_masked_inside = cv2.bitwise_and(img, img, mask=mask)

    mask_inv = cv2.bitwise_not(mask)
    img_masked_outside = cv2.bitwise_and(img, img, mask=mask_inv)

    hsv_inside = cv2.cvtColor(img_masked_inside, cv2.COLOR_BGR2HSV)
    hsv_outside = cv2.cvtColor(img_masked_outside, cv2.COLOR_BGR2HSV)

    hue_channel_inside = hsv_inside[:,:,0]
    hue_channel_outside = hsv_outside[:,:,0]

    mean_hue_inside_mask = np.mean(hue_channel_inside[np.nonzero(mask)])
    median_hue_inside_mask = np.median(hue_channel_inside[np.nonzero(mask)])
    mean_hue_outside_mask = np.mean(hue_channel_outside[np.nonzero(mask_inv)])
    median_hue_outside_mask = np.median(hue_channel_outside[np.nonzero(mask_inv)])

    hue_data.append({
        'image_id': row['image_id'],
        'mean_hue_inside_mask': mean_hue_inside_mask,
        'median_hue_inside_mask': median_hue_inside_mask,
        'mean_hue_outside_mask': mean_hue_outside_mask,
        'median_hue_outside_mask': median_hue_outside_mask
    })

hue_df = pd.DataFrame(hue_data)
metadata = pd.merge(metadata, hue_df, on='image_id')

display(metadata.head())

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,img_file_path,segmentation_file_path,mean_hue,median_hue,mean_hue_inside_mask_x,median_hue_inside_mask_x,mean_hue_outside_mask_x,median_hue_outside_mask_x,mean_hue_inside_mask_y,median_hue_inside_mask_y,mean_hue_outside_mask_y,median_hue_outside_mask_y
0,HAM_0001728,ISIC_0033539,bkl,histo,60.0,male,back,/content/drive/MyDrive/w281/images (1)/ISIC_00...,/content/drive/MyDrive/w281/masks (1)/ISIC_003...,156.255798,165.0,156.255798,165.0,14.544667,6.0,156.255798,165.0,14.544667,6.0
1,HAM_0000344,ISIC_0034318,bkl,histo,55.0,male,trunk,/content/drive/MyDrive/w281/images (1)/ISIC_00...,/content/drive/MyDrive/w281/masks (1)/ISIC_003...,170.327865,172.0,170.327865,172.0,149.322818,151.0,170.327865,172.0,149.322818,151.0
2,HAM_0004332,ISIC_0033785,bkl,histo,55.0,male,trunk,/content/drive/MyDrive/w281/images (1)/ISIC_00...,/content/drive/MyDrive/w281/masks (1)/ISIC_003...,142.184371,166.0,142.184371,166.0,132.110349,129.0,142.184371,166.0,132.110349,129.0
3,HAM_0002040,ISIC_0033900,bkl,histo,50.0,female,lower extremity,/content/drive/MyDrive/w281/images (1)/ISIC_00...,/content/drive/MyDrive/w281/masks (1)/ISIC_003...,107.604787,166.0,107.604787,166.0,130.239018,143.0,107.604787,166.0,130.239018,143.0
4,HAM_0005299,ISIC_0033466,bkl,histo,55.0,female,lower extremity,/content/drive/MyDrive/w281/images (1)/ISIC_00...,/content/drive/MyDrive/w281/masks (1)/ISIC_003...,79.76868,5.0,79.76868,5.0,141.508135,152.0,79.76868,5.0,141.508135,152.0


In [None]:
# img = cv2.imread(images_path)
# mask = cv2.imread(segmentation_path, cv2.IMREAD_GRAYSCALE)

# image_id = os.path.splitext(os.path.basename(images_path))[0]

# overlay_color = [0, 255, 0]
# colored_mask = cv2.merge((mask, mask, mask))
# colored_mask = cv2.cvtColor(colored_mask, cv2.COLOR_BGR2RGB)
# colored_mask[mask == 0] = [0, 0, 0]

# alpha = 0.5
# result = cv2.addWeighted(img, 1 - alpha, colored_mask, alpha, 0)

# plt.figure(figsize=(10, 5))
# plt.subplot(1, 2, 1)
# plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
# plt.title("Original Image")

# plt.subplot(1, 2, 2)
# plt.imshow(result)
# plt.title("Image with Mask Overlay")

# plt.tight_layout()
# plt.show()

# Comparing Average Pixel Colors in Lesion by Quadrant

In [14]:
quadrant_hue_data = []

for index, row in metadata.iterrows():
    img_path = row['img_file_path']
    seg_path = row['segmentation_file_path']
    img = cv2.imread(img_path)
    mask = cv2.imread(seg_path, cv2.IMREAD_GRAYSCALE)
    _, mask = cv2.threshold(mask, 127, 255, cv2.THRESH_BINARY)
    height, width = img.shape[:2]

    quadrants = [
        (0, 0, width // 2, height // 2),  # Top-left
        (width // 2, 0, width, height // 2),  # Top-right
        (0, height // 2, width // 2, height),  # Bottom-left
        (width // 2, height // 2, width, height)  # Bottom-right
    ]
    quadrant_data = {'image_id': row['image_id']}
    for i, (x1, y1, x2, y2) in enumerate(quadrants):
        img_quadrant = img[y1:y2, x1:x2]
        mask_quadrant = mask[y1:y2, x1:x2]

        hsv_quadrant = cv2.cvtColor(img_quadrant, cv2.COLOR_BGR2HSV)
        hue_channel_quadrant = hsv_quadrant[:,:,0]
        mean_hue = np.mean(hue_channel_quadrant[np.nonzero(mask_quadrant)])
        median_hue = np.median(hue_channel_quadrant[np.nonzero(mask_quadrant)])
        quadrant_data[f'mean_hue_q{i+1}'] = mean_hue
        quadrant_data[f'median_hue_q{i+1}'] = median_hue

    quadrant_hue_data.append(quadrant_data)

quadrant_hue_df = pd.DataFrame(quadrant_hue_data)

display(metadata.head())

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,img_file_path,segmentation_file_path,mean_hue,median_hue,mean_hue_inside_mask_x,median_hue_inside_mask_x,mean_hue_outside_mask_x,median_hue_outside_mask_x,mean_hue_inside_mask_y,median_hue_inside_mask_y,mean_hue_outside_mask_y,median_hue_outside_mask_y
0,HAM_0001728,ISIC_0033539,bkl,histo,60.0,male,back,/content/drive/MyDrive/w281/images (1)/ISIC_00...,/content/drive/MyDrive/w281/masks (1)/ISIC_003...,156.255798,165.0,156.255798,165.0,14.544667,6.0,156.255798,165.0,14.544667,6.0
1,HAM_0000344,ISIC_0034318,bkl,histo,55.0,male,trunk,/content/drive/MyDrive/w281/images (1)/ISIC_00...,/content/drive/MyDrive/w281/masks (1)/ISIC_003...,170.327865,172.0,170.327865,172.0,149.322818,151.0,170.327865,172.0,149.322818,151.0
2,HAM_0004332,ISIC_0033785,bkl,histo,55.0,male,trunk,/content/drive/MyDrive/w281/images (1)/ISIC_00...,/content/drive/MyDrive/w281/masks (1)/ISIC_003...,142.184371,166.0,142.184371,166.0,132.110349,129.0,142.184371,166.0,132.110349,129.0
3,HAM_0002040,ISIC_0033900,bkl,histo,50.0,female,lower extremity,/content/drive/MyDrive/w281/images (1)/ISIC_00...,/content/drive/MyDrive/w281/masks (1)/ISIC_003...,107.604787,166.0,107.604787,166.0,130.239018,143.0,107.604787,166.0,130.239018,143.0
4,HAM_0005299,ISIC_0033466,bkl,histo,55.0,female,lower extremity,/content/drive/MyDrive/w281/images (1)/ISIC_00...,/content/drive/MyDrive/w281/masks (1)/ISIC_003...,79.76868,5.0,79.76868,5.0,141.508135,152.0,79.76868,5.0,141.508135,152.0


# Comparing Min/Max Pixel Colors in Lesion

In [15]:
pixel_data = []

for index, row in metadata.iterrows():
    img_path = row['img_file_path']
    seg_path = row['segmentation_file_path']
    img = cv2.imread(img_path)
    mask = cv2.imread(seg_path, cv2.IMREAD_GRAYSCALE)

    _, mask = cv2.threshold(mask, 127, 255, cv2.THRESH_BINARY)

    hsv_image = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
    hue_channel = hsv_image[:,:,0]
    non_zero_pixels = np.nonzero(mask)
    valid_hue_values = hue_channel[non_zero_pixels]
    darkest_hue = np.min(valid_hue_values[valid_hue_values > 0])  # Exclude hue=0 (black)
    lightest_hue = np.max(valid_hue_values)

    darkest_coords = np.where(hue_channel == darkest_hue)
    lightest_coords = np.where(hue_channel == lightest_hue)

    pixel_data.append({
        'image_id': row['image_id'],
        'darkest_hue': darkest_hue,
        'darkest_x': darkest_coords[1][0],  # Assuming only one darkest pixel
        'darkest_y': darkest_coords[0][0],
        'lightest_hue': lightest_hue,
        'lightest_x': lightest_coords[1][0],  # Assuming only one lightest pixel
        'lightest_y': lightest_coords[0][0]
    })

pixel_df = pd.DataFrame(pixel_data)

display(metadata.head())

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization,img_file_path,segmentation_file_path,mean_hue,median_hue,mean_hue_inside_mask_x,median_hue_inside_mask_x,mean_hue_outside_mask_x,median_hue_outside_mask_x,mean_hue_inside_mask_y,median_hue_inside_mask_y,mean_hue_outside_mask_y,median_hue_outside_mask_y
0,HAM_0001728,ISIC_0033539,bkl,histo,60.0,male,back,/content/drive/MyDrive/w281/images (1)/ISIC_00...,/content/drive/MyDrive/w281/masks (1)/ISIC_003...,156.255798,165.0,156.255798,165.0,14.544667,6.0,156.255798,165.0,14.544667,6.0
1,HAM_0000344,ISIC_0034318,bkl,histo,55.0,male,trunk,/content/drive/MyDrive/w281/images (1)/ISIC_00...,/content/drive/MyDrive/w281/masks (1)/ISIC_003...,170.327865,172.0,170.327865,172.0,149.322818,151.0,170.327865,172.0,149.322818,151.0
2,HAM_0004332,ISIC_0033785,bkl,histo,55.0,male,trunk,/content/drive/MyDrive/w281/images (1)/ISIC_00...,/content/drive/MyDrive/w281/masks (1)/ISIC_003...,142.184371,166.0,142.184371,166.0,132.110349,129.0,142.184371,166.0,132.110349,129.0
3,HAM_0002040,ISIC_0033900,bkl,histo,50.0,female,lower extremity,/content/drive/MyDrive/w281/images (1)/ISIC_00...,/content/drive/MyDrive/w281/masks (1)/ISIC_003...,107.604787,166.0,107.604787,166.0,130.239018,143.0,107.604787,166.0,130.239018,143.0
4,HAM_0005299,ISIC_0033466,bkl,histo,55.0,female,lower extremity,/content/drive/MyDrive/w281/images (1)/ISIC_00...,/content/drive/MyDrive/w281/masks (1)/ISIC_003...,79.76868,5.0,79.76868,5.0,141.508135,152.0,79.76868,5.0,141.508135,152.0
