In [1]:
from matplotlib import pyplot as plt
import os 
import matplotlib.image as mpimg
import cv2
import pandas as pd

In [2]:
PATH = 'surfaces'
classes = ['Ok', 'Defects']

In [3]:
piezas = os.listdir(PATH)
piezas.sort()
piezas

['Piece01', 'Piece02', 'Piece03', 'Piece04']

In [4]:
piezas_imgs = []
for p in piezas:
    imgs = [f for f in os.listdir(os.path.join(PATH,p)) if os.path.isfile(os.path.join(PATH,p,f))]
    piezas_imgs.append(os.path.join(PATH,p,imgs[0]))

In [5]:
columns = ['pieza',  'type', 'path', 'file']
df = pd.DataFrame(columns=columns)
for p in piezas:
    for c in classes:
        images = [[p,c,os.path.join(PATH,p,c,i), i]for i in os.listdir(os.path.join(PATH,p,c))]
        dfp = pd.DataFrame(images, columns=columns)
        df = pd.concat([df,dfp])

In [6]:
df.head()

Unnamed: 0,pieza,type,path,file
0,Piece01,Defects,surfaces\Piece01\Defects\foto211.tif,foto211.tif
1,Piece01,Defects,surfaces\Piece01\Defects\foto316.tif,foto316.tif
2,Piece01,Defects,surfaces\Piece01\Defects\foto284.tif,foto284.tif
3,Piece01,Defects,surfaces\Piece01\Defects\foto359.tif,foto359.tif
4,Piece01,Defects,surfaces\Piece01\Defects\foto623.tif,foto623.tif


In [7]:
from sklearn.utils import resample
from sklearn.model_selection import StratifiedKFold
from collections import Counter
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [14]:
# Undersampling: igualar clase mayoritaria al tamaño menor
def undersample(df, target_col='type'):
    df_majority = df[df[target_col] == 'Defects']
    df_minority = df[df[target_col] == 'Ok']
    df_majority_downsampled = resample(df_majority, replace=False,
                                      n_samples=len(df_minority),
                                      random_state=42)
    df_balanced = pd.concat([df_majority_downsampled, df_minority])
    return df_balanced.sample(frac=1).reset_index(drop=True)  # barajar

# Data augmentation (configuración ejemplo, aplicar en carga imágenes)
def get_augmentation_generator():
    return ImageDataGenerator(rotation_range=20,
                              width_shift_range=0.1,
                              height_shift_range=0.1,
                              horizontal_flip=True)

# Validación cruzada estratificada
def stratified_cross_val(df, target_col='type', n_splits=5):
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
    for train_idx, test_idx in skf.split(df, df[target_col]):
        df_train = df.iloc[train_idx]
        df_test = df.iloc[test_idx]
        print('Train class distribution:', Counter(df_train[target_col]))
        print('Test class distribution:', Counter(df_test[target_col]))
        # Aquí se puede insertar código de entrenamiento y evaluación

In [15]:
print('Distribución original:', Counter(df['type']))

balanced_df = undersample(df)
print('Distribución tras undersampling:', Counter(balanced_df['type']))

Distribución original: Counter({'Defects': 440, 'Ok': 153})
Distribución tras undersampling: Counter({'Defects': 153, 'Ok': 153})


In [16]:
stratified_cross_val(df)

Train class distribution: Counter({'Defects': 352, 'Ok': 122})
Test class distribution: Counter({'Defects': 88, 'Ok': 31})
Train class distribution: Counter({'Defects': 352, 'Ok': 122})
Test class distribution: Counter({'Defects': 88, 'Ok': 31})
Train class distribution: Counter({'Defects': 352, 'Ok': 122})
Test class distribution: Counter({'Defects': 88, 'Ok': 31})
Train class distribution: Counter({'Defects': 352, 'Ok': 123})
Test class distribution: Counter({'Defects': 88, 'Ok': 30})
Train class distribution: Counter({'Defects': 352, 'Ok': 123})
Test class distribution: Counter({'Defects': 88, 'Ok': 30})
