In [117]:
import skimage.io
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import cv2
from skimage import exposure, color
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier

In [118]:
def calcular_promedio_color(imagen):

    promedio_color = imagen.mean(axis=(0,1))
    std = imagen.std(axis=(0,1))

    data = {
        'intensidad_Rojo': promedio_color[0], 'contraste_Rojo': std[0],
        'intensidad_Verde': promedio_color[1], 'contraste_Verde': std[1],
        'intensidad_Azul': promedio_color[2], 'contraste_Azul': std[2]
    }

    df = pd.DataFrame(data, index=[0])

    return df

In [119]:
def media_imagen_threshold(imagen):
    img_gray = color.rgb2gray(imagen)
    img_ampliacion_rango = exposure.rescale_intensity(img_gray, in_range='image', out_range=(0, 1))
    smoothed_image = cv2.GaussianBlur(img_ampliacion_rango, (5, 5), 0)
    imagen_umbralizada_globulos_blancos = (smoothed_image > 0.25).astype(np.uint8) * 255
    imagen_umbralizada_globulos_negros = 255 - imagen_umbralizada_globulos_blancos
    media_negro = np.mean(imagen_umbralizada_globulos_negros)
    std_negro = np.std(imagen_umbralizada_globulos_negros)
    return media_negro, std_negro

In [120]:
carpeta = 'data'
lista_dfs_res = []

for carpetita in os.listdir(carpeta):
    ruta_carpetita = os.path.join(carpeta,carpetita)
    resultados = []

    for nombre_archivo in os.listdir(ruta_carpetita):
        ruta_imagen = os.path.join(ruta_carpetita, nombre_archivo)

        if os.path.isfile(ruta_imagen) and nombre_archivo.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):
            imagen = skimage.io.imread(ruta_imagen)
            
            df_resultado = calcular_promedio_color(imagen)
            media_binario, std_binario = media_imagen_threshold(imagen) 
            df_resultado['media_binario'] = media_binario
            df_resultado['std_binario'] = std_binario
            resultados.append(df_resultado)

    df_res = pd.concat(resultados, ignore_index=True)
    if carpetita == 'healthy':
        df_res['label'] = 0
    else:
        df_res['label'] = 1

    print(f"Media contraste rojo {df_res['contraste_Rojo'].mean()}")
    print(f"Media contraste verde {df_res['contraste_Verde'].mean()}")
    print(f"Media contraste binario {df_res['std_binario'].mean()}")
    print(f"Media media binaria {df_res['media_binario'].mean()}")

    lista_dfs_res.append(df_res)

df = pd.concat([lista_dfs_res[0], lista_dfs_res[1]], ignore_index=True)
df = df.sample(frac=1).reset_index(drop=True)

Media contraste rojo 9.722840283756138
Media contraste verde 16.456536599322998
Media contraste binario 13.411783893828273
Media media binaria 0.7922000049354379
Media contraste rojo 17.7486866991024
Media contraste verde 27.880811487551586
Media contraste binario 43.810653838456744
Media media binaria 9.081737970078281


In [121]:
df.shape

(108, 9)

In [125]:
X = df.drop('label', axis=1)
y = df['label']

X_train_total, X_test, y_train_total, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train_total, y_train_total, test_size=0.2, random_state=42)

clasificador = LogisticRegression()

clasificador.fit(X_train, y_train)
pred = clasificador.predict(X_val)

accuracy = accuracy_score(y_val, pred)
print(f"Precisi贸n del modelo: {accuracy:.4f}")

Precisi贸n del modelo: 0.9444


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [126]:
coefs = pd.Series(clasificador.coef_[0], index=X.columns)

coefs_sorted = coefs.abs().sort_values(ascending=False)

print("Coeficientes ordenados por importancia:")
print(coefs_sorted)

Coeficientes ordenados por importancia:
std_binario         0.601124
intensidad_Azul     0.530245
intensidad_Rojo     0.375462
contraste_Verde     0.194176
media_binario       0.076189
intensidad_Verde    0.031459
contraste_Rojo      0.022528
contraste_Azul      0.010735
dtype: float64


In [130]:
X = df.drop('label', axis=1)
X = X[coefs_sorted.index[:4]]
y = df['label']

X_train_total, X_test, y_train_total, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train_total, y_train_total, test_size=0.2, random_state=42)

clasificador = LogisticRegression()

clasificador.fit(X_train, y_train)
pred = clasificador.predict(X_val)

accuracy = accuracy_score(y_val, pred)
print(f"Precisi贸n del modelo: {accuracy:.4f}")

Precisi贸n del modelo: 0.9444
