In [10]:
import cv2
from PIL import Image
import numpy as np
import pandas as pd
import os

img_dir = 'train'
mask_dir = 'train/masks'

# Load images
images = []
for i in range(1, 251):
    img_path = os.path.join(img_dir, f"{i}.jpg")
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)  # Convert from BGR to RGB format
    images.append(img)

# Load masks
masks = []
for i in range(1, 251):
    mask_path = os.path.join(mask_dir, f"binary_{i}.tif")
    mask = Image.open(mask_path)
    mask = np.array(mask)
    masks.append(mask)

print("Loaded", len(images), "images and", len(masks), "masks.")

#loading classif excel
excel_file = 'train/classif.xlsx'
classif_df = pd.read_excel(excel_file)

print(classif_df.head())



Loaded 250 images and 250 masks.
   ID bug type         species
0   1      Bee  Apis mellifera
1   2      Bee  Apis mellifera
2   3      Bee  Apis mellifera
3   4      Bee  Apis mellifera
4   5      Bee  Apis mellifera


In [11]:
def calculate_symmetry_index(mask):
    # Flip the mask horizontally
    flipped_mask = np.fliplr(mask)
    # Calculate the symmetry as the inverse of the normalized sum of absolute differences
    symmetry = 1.0 - (np.sum(np.abs(mask - flipped_mask)) / (2 * np.sum(mask)))
    return symmetry

symmetry_indices = [calculate_symmetry_index(mask) for mask in masks]


In [12]:
def find_longest_line(mask):
    # Find all nonzero (bug) pixel positions
    indices = np.argwhere(mask)
    min_y, min_x = indices.min(axis=0)
    max_y, max_x = indices.max(axis=0)
    return (max_x - min_x), (max_y - min_y)

def ratio_of_longest_lines(mask):
    width, height = find_longest_line(mask)
    return min(width, height) / max(width, height)

ratios_of_longest_lines = [ratio_of_longest_lines(mask) for mask in masks]


ValueError: too many values to unpack (expected 2)

In [13]:
def calculate_pixel_ratio(image, mask):
    bug_pixels = np.sum(mask)
    total_pixels = image.shape[0] * image.shape[1]
    return bug_pixels / total_pixels

pixel_ratios = [calculate_pixel_ratio(images[i], masks[i]) for i in range(len(images))]


In [14]:
def calculate_color_stats(image, mask):
    masked_image = image[mask > 0]  # Apply mask to image
    min_vals = masked_image.min(axis=0)
    max_vals = masked_image.max(axis=0)
    mean_vals = masked_image.mean(axis=0)
    return min_vals, max_vals, mean_vals

color_stats = [calculate_color_stats(images[i], masks[i]) for i in range(len(images))]


In [15]:
def calculate_color_median_std(image, mask):
    masked_image = image[mask > 0]  # Apply mask to image
    median_vals = np.median(masked_image, axis=0)
    std_vals = masked_image.std(axis=0)
    return median_vals, std_vals

color_medians_stds = [calculate_color_median_std(images[i], masks[i]) for i in range(len(images))]


In [18]:
from skimage.feature import graycomatrix, graycoprops
from skimage.color import rgb2gray
from skimage.measure import shannon_entropy

# Example custom feature: texture contrast and entropy
def calculate_texture_features(image, mask):
    # Convert image to grayscale
    gray_image = rgb2gray(image)
    # Calculate Gray-Level Co-occurrence Matrix (GLCM) and derive texture properties
    glcm = graycomatrix((gray_image * 255).astype('uint8'), distances=[5], angles=[0], symmetric=True, normed=True)
    contrast = graycoprops(glcm, 'contrast')[0, 0]
    entropy = shannon_entropy(mask)
    return contrast, entropy

texture_features = [calculate_texture_features(images[i], masks[i]) for i in range(len(images))]


In [19]:
# Symmetry index
symmetry_index = calculate_symmetry_index(mask)
print(f"Symmetry index: {symmetry_index}")

# Ratio of longest orthogonal lines
#longest_line_ratio = ratio_of_longest_lines(mask)
#print(f"Ratio of longest orthogonal lines: {longest_line_ratio}")

# Pixel ratio
pixel_ratio = calculate_pixel_ratio(img, mask)
print(f"Ratio of bug pixels to total pixels: {pixel_ratio}")

# Color statistics
min_vals, max_vals, mean_vals = calculate_color_stats(img, mask)
print(f"Min RGB values within the bug mask: {min_vals}")
print(f"Max RGB values within the bug mask: {max_vals}")
print(f"Mean RGB values within the bug mask: {mean_vals}")

# Median and standard deviation for RGB
median_vals, std_vals = calculate_color_median_std(img, mask)
print(f"Median RGB values within the bug mask: {median_vals}")
print(f"Standard deviation of RGB values within the bug mask: {std_vals}")

# Custom features (texture contrast and entropy)
contrast, entropy = calculate_texture_features(img, mask)
print(f"Texture contrast: {contrast}")
print(f"Texture entropy: {entropy}")

Symmetry index: 0.4980392156862745
Ratio of bug pixels to total pixels: 5.9585
Min RGB values within the bug mask: [0 0 0]
Max RGB values within the bug mask: [255 255 255]
Mean RGB values within the bug mask: [127.06082382 101.62600571  77.42946683]
Median RGB values within the bug mask: [125.  96.  63.]
Standard deviation of RGB values within the bug mask: [62.59178458 60.36123553 59.83946465]
Texture contrast: 121.04977894078397
Texture entropy: 0.15994742607236198
