In [5]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
from deap import base, creator, tools, algorithms
import random
import cv2  
from skimage import color
from skimage import io
from skimage import morphology
from skimage import transform
from skimage import util
from skimage.feature import greycomatrix
from skimage.filters import threshold_otsu
from skimage.restoration import inpaint
from skimage import color, io, transform, util, feature

In [None]:
df = pd.read_csv('HAM10000_metadata.csv')  # Replace 'your_dataset.csv' with your actual dataset

image_directory = 'PycharmProjects/pythonProject/data/all_images'  # Replace with the actual path to your image directory

# Combine directory path with image file names to create full paths
df['image_path_column'] = os.path.join(image_directory, df['image_id'])
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df['image_path_column'], df['dx'], test_size=0.2, random_state=42)

# Balancing the training set using Random Oversampling
oversampler = RandomOverSampler(random_state=42)
X_train_resampled, y_train_resampled = oversampler.fit_resample(X_train.to_frame(), y_train)

# Convert resampled DataFrame back to Series
X_train_resampled = X_train_resampled.squeeze()
y_train_resampled = pd.Series(y_train_resampled)

In [7]:
def preprocess_image(image_path, target_size=(512, 512)):
    
    img = cv2.imread(image_path)
    img_resized = cv2.resize(img, target_size)
    img_no_hair = remove_hair(img_resized)
    img_smoothed = remove_noise(img_no_hair)

    return img_smoothed

In [8]:
def remove_hair(image):

    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    black_hat = cv2.morphologyEx(gray, cv2.MORPH_BLACKHAT, kernel)
    _, mask = cv2.threshold(black_hat, 10, 255, cv2.THRESH_BINARY)
    inpainted_image = cv2.inpaint(image, mask, inpaintRadius, flags=cv2.INPAINT_TELEA)

    return inpainted_image

In [10]:
def remove_noise(image, kernel_size=7):

    img_smoothed = cv2.GaussianBlur(image, (kernel_size, kernel_size), 0)

    return img_smoothed

In [11]:
def automatic_grabcut(image):
    
    hsv_image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    green_mask = extract_green_color_mask(hsv_image)
    threshold = compute_mask_threshold(green_mask)
    if np.sum(green_mask) > threshold:
        rectangle = generate_rectangle(image)
        grabcut_result = grabcut_segmentation(image, rectangle)
    else:
        grabcut_result = grabcut_segmentation(image, green_mask)

    return grabcut_result

In [12]:
def extract_green_color_mask(hsv_image):

    lower_green = np.array([40, 40, 40])
    upper_green = np.array([80, 255, 255])
    green_mask = cv2.inRange(hsv_image, lower_green, upper_green)
    green_mask = green_mask // 255  

    return green_mask

In [14]:
def compute_mask_threshold(mask):

    image_area = mask.size
    mask_area = np.sum(mask)
    threshold = 0.7 * image_area  

    return threshold

In [15]:
def generate_rectangle(image):

    Hr = int(image.shape[0] - 0.3 * image.shape[0])
    Wr = int(image.shape[1] - 0.3 * image.shape[1])
    rectangle = np.zeros(image.shape[:2], dtype=np.uint8)
    rectangle[:Hr, :Wr] = 1

    return rectangle

In [16]:
def grabcut_segmentation(image, mask):

    bgd_model = np.zeros((1, 65), dtype=np.float64)
    fgd_model = np.zeros((1, 65), dtype=np.float64)
    rect = (0, 0, image.shape[1], image.shape[0])
    cv2.grabCut(image, mask, rect, bgd_model, fgd_model, 5, cv2.GC_INIT_WITH_MASK)
    grabcut_mask = np.where((mask == 2) | (mask == 0), 0, 1).astype('uint8')

    grabcut_result = image * grabcut_mask[:, :, np.newaxis]

    return grabcut_result

In [18]:
def extract_glcm_features(image):
    gray_image = color.rgb2gray(image)
    gray_image = img_as_ubyte(gray_image)

    distances = [1, 2, 3]
    angles = [0, np.pi/4, np.pi/2, 3*np.pi/4]

    glcm = greycomatrix(gray_image, distances=distances, angles=angles, symmetric=True, normed=True)

    contrast = np.sum(glcm[:, :, 0, 0] * (np.arange(0, glcm.shape[0]) - np.arange(0, glcm.shape[0])).reshape(-1, 1))
    energy = np.sum(glcm[:, :, 0, 0] ** 2)
    entropy = -np.sum(glcm[:, :, 0, 0] * np.log(glcm[:, :, 0, 0] + 1e-10))
    correlation = np.sum(((np.arange(0, glcm.shape[0]) - np.mean(np.arange(0, glcm.shape[0]))) / np.std(np.arange(0, glcm.shape[0]))) * \
                          ((np.arange(0, glcm.shape[0]) - np.mean(np.arange(0, glcm.shape[0]))) / np.std(np.arange(0, glcm.shape[0]))).reshape(-1, 1) * glcm[:, :, 0, 0])
    homogeneity = np.sum(glcm[:, :, 0, 0] / (1 + np.abs(np.arange(0, glcm.shape[0]) - np.arange(0, glcm.shape[0])).reshape(-1, 1)))

    return contrast, energy, entropy, correlation, homogeneity

In [8]:
def extract_statistical_features(image):
    mean_values = np.mean(image, axis=(0, 1))
    variance_values = np.var(image, axis=(0, 1))
    std_dev_values = np.std(image, axis=(0, 1))
    rms_values = np.sqrt(np.mean(image ** 2, axis=(0, 1)))

    return mean_values, variance_values, std_dev_values, rms_values

In [10]:
def extract_features(image):
    glcm=extract_glcm_features(image)
    stats=extract_statistical_features(image)
    return np.concatenate([glcm,stats])

In [21]:
pip install -U imbalanced-learn

Collecting imbalanced-learn
  Downloading imbalanced_learn-0.11.0-py3-none-any.whl (235 kB)
     ---------------------------------------- 0.0/235.6 kB ? eta -:--:--
     -------------------------------------- 235.6/235.6 kB 7.3 MB/s eta 0:00:00
Installing collected packages: imbalanced-learn
  Attempting uninstall: imbalanced-learn
    Found existing installation: imbalanced-learn 0.10.1
    Uninstalling imbalanced-learn-0.10.1:
      Successfully uninstalled imbalanced-learn-0.10.1
Successfully installed imbalanced-learn-0.11.0
Note: you may need to restart the kernel to use updated packages.


In [22]:
from imblearn.over_sampling import RandomOverSampler

In [23]:
pip install deap

Collecting deapNote: you may need to restart the kernel to use updated packages.

  Downloading deap-1.4.1-cp39-cp39-win_amd64.whl (109 kB)
     ---------------------------------------- 0.0/109.9 kB ? eta -:--:--
     ---------------------------------------- 109.9/109.9 kB ? eta 0:00:00
Installing collected packages: deap
Successfully installed deap-1.4.1


In [26]:
from deap import base, creator, tools, algorithms
import random

In [27]:
def optimize_classifier(individual, classifier, param_grid):
    params = {param: individual[i] for i, param in enumerate(param_grid)}
    classifier.set_params(**params)
    classifier.fit(X_train, y_train)
    predictions = classifier.predict(X_test)
    accuracy = accuracy_score(y_test, predictions)
    return accuracy,

In [28]:
def create_toolbox(param_grid):
    creator.create("FitnessMax", base.Fitness, weights=(1.0,))
    creator.create("Individual", list, fitness=creator.FitnessMax)
    toolbox = base.Toolbox()
    toolbox.register("individual", tools.initCycle, creator.Individual, param_grid, n=1)
    toolbox.register("population", tools.initRepeat, list, toolbox.individual)
    toolbox.register("mate", tools.cxBlend, alpha=0.5)
    toolbox.register("mutate", tools.mutGaussian, mu=0, sigma=1, indpb=0.2)
    toolbox.register("select", tools.selTournament, tournsize=3)
    toolbox.register("evaluate", optimize_classifier, classifier=classifier, param_grid=param_grid)
    return toolbox

In [None]:
knn_param_grid = {'n_neighbors': [3, 5, 7]}
knn_classifier = KNeighborsClassifier()
knn_toolbox = create_toolbox(knn_param_grid)
_, best_params = algorithms.eaSimple(knn_toolbox.population(n=10), knn_toolbox, cxpb=0.7, mutpb=0.2, ngen=5, stats=None, halloffame=None, verbose=True)
knn_classifier.set_params(**{param: best_params[i] for i, param in enumerate(knn_param_grid)})
knn_classifier.fit(X_train, y_train)
knn_predictions = knn_classifier.predict(X_test)
knn_accuracy = accuracy_score(y_test, knn_predictions)
print(f"KNN Accuracy: {knn_accuracy}, Best Parameters: {best_params[0]}")


In [None]:
svm_param_grid = {'C': [0.1, 1, 10], 'kernel': ['linear', 'rbf']}
svm_classifier = SVC()
svm_toolbox = create_toolbox(svm_param_grid)
_, best_params = algorithms.eaSimple(svm_toolbox.population(n=10), svm_toolbox, cxpb=0.7, mutpb=0.2, ngen=5, stats=None, halloffame=None, verbose=True)
svm_classifier.set_params(**{param: best_params[i] for i, param in enumerate(svm_param_grid)})
svm_classifier.fit(X_train, y_train)
svm_predictions = svm_classifier.predict(X_test)
svm_accuracy = accuracy_score(y_test, svm_predictions)
print(f"SVM Accuracy: {svm_accuracy}, Best Parameters: {best_params[0]}")


In [None]:
dt_param_grid = {'max_depth': [None, 5, 10]}
dt_classifier = DecisionTreeClassifier()
dt_toolbox = create_toolbox(dt_param_grid)
_, best_params = algorithms.eaSimple(dt_toolbox.population(n=10), dt_toolbox, cxpb=0.7, mutpb=0.2, ngen=5, stats=None, halloffame=None, verbose=True)
dt_classifier.set_params(**{param: best_params[i] for i, param in enumerate(dt_param_grid)})
dt_classifier.fit(X_train, y_train)
dt_predictions = dt_classifier.predict(X_test)
dt_accuracy = accuracy_score(y_test, dt_predictions)
print(f"Decision Tree Accuracy: {dt_accuracy}, Best Parameters: {best_params[0]}")

In [None]:
for image_path, label in zip(X_test, y_test):
    # Preprocess image
    processed_image = preprocess_image(image_path)

    # Hair Removal
    hair_removed_image = remove_hair(processed_image)

    # Image Resizing
    resized_image = resize_image(hair_removed_image)

    # Segmentation
    segmented_image = segmentation(resized_image)

    # Feature Extraction
    features = extract_features(segmented_image)

    # Classification
    knn_prediction = knn_classifier.predict([features])[0]
    svm_prediction = svm_classifier.predict([features])[0]
    dt_prediction = dt_classifier.predict([features])[0]

In [1]:
pip install h5py

Note: you may need to restart the kernel to use updated packages.


In [4]:
pip install joblib

Note: you may need to restart the kernel to use updated packages.


In [7]:
import h5py
import joblib

In [None]:
svm_model_filename = 'model.h5'
with h5py.File(svm_model_filename, 'w') as hf:
    for key, value in svm_classifier.__dict__.items():
        if isinstance(value, np.ndarray):
            hf.create_dataset(key, data=value)
        elif isinstance(value, list):
            hf.create_dataset(key, data=np.array(value))
        elif isinstance(value, (int, float)):
            hf.create_dataset(key, data=value)