In [93]:
import numpy as np
import pandas as pd
import albumentations as A
from albumentations.pytorch import ToTensorV2
from skimage import io as skio
import torch
import torch.nn as nn
from efficientnet_pytorch import model as efficientnet_model
from concurrent.futures import ThreadPoolExecutor

In [94]:
dataset_dir="../../dataset"
data_csv_path=f"../data"
output_dimensions = 5

In [95]:
df_train = pd.read_csv(f"{data_csv_path}/train.csv")
df_val = pd.read_csv(f"{data_csv_path}/val.csv")
df_test = pd.read_csv(f"{data_csv_path}/test.csv")

In [96]:
transforms_train = A.Compose([
    A.Transpose(p=0.5),
    A.VerticalFlip(p=0.5),
    A.HorizontalFlip(p=0.5),
    A.ToFloat(),
    ToTensorV2()
])

In [97]:
def extract_features(args):
    image_path, label, index, transform, model = args
    print(f"Processando item: {index+1}")

    image = skio.imread(image_path)
    
    if transform:
        image = transform(image=image)["image"]
    
    # image = torch.from_numpy(image).float()
    image /= 255
    image = image.unsqueeze(0)
    
    with torch.no_grad():
        features = model(image)
    
    features = features.flatten(start_dim=1)
    features_np = features.squeeze().numpy()
    
    return features_np, label

In [98]:
class EfficientNetFeatureExtractor(nn.Module):    
    def __init__(self):
        super().__init__()
        self.efficient_net = efficientnet_model.EfficientNet.from_pretrained("efficientnet-b0")        
        self.efficient_net.load_state_dict(
            torch.load("../pre-trained-models/efficientnet-b0-08094119.pth", weights_only=True)
        )
        self.efficient_net._fc = nn.Identity()

    def extract(self, inputs):
        return self.efficient_net.extract_features(inputs)

    def forward(self, inputs):
        return self.extract(inputs)        


In [None]:
model = EfficientNetFeatureExtractor()
model.eval()
IMG_DIR = f"{dataset_dir}/tiles"

params = [
    (f"{IMG_DIR}/{row['image_id']}.jpg", row['isup_grade'], index, transforms_train, model) 
    for index, row in df_train.iterrows()
]

with ThreadPoolExecutor(max_workers=5) as executor:
  results = list(executor.map(extract_features, params))

Loaded pretrained weights for efficientnet-b0
Processando item: 1
Processando item: 2


In [102]:
all_features = []
all_labels = []
# Coletar as características e rótulos
for features, label in results:
    all_features.append(features)
    all_labels.append(label)

# Converter listas para arrays numpy
all_features_np = np.array(all_features)
all_labels_np = np.array(all_labels)

# Verificar as formas
print(f"Características forma: {all_features_np.shape}")
print(f"Rótulos forma: {all_labels_np.shape}")

# Empilhar as características e as classes em um único array 2D (features + classe)
data = np.column_stack((all_features_np, all_labels_np))

Características forma: (2, 2949120)
Rótulos forma: (2,)


In [112]:
# Salvar os dados em um arquivo .npy
np.save('efficientnet_data.npy', data)

print("Arquivo 'feaefficientnet_data.npy' gerado com sucesso!")

Arquivo 'feaefficientnet_data.npy' gerado com sucesso!
