In [27]:
import numpy as np
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os
import pandas as pd

In [6]:
SELECTED_DATA = os.path.join('data', 'selected_dataset')
ORIGINAL_DATA = os.path.join('data', 'cleaned_dataset')
PROCESSED_DATASET = os.path.join('data', 'processed_dataset')
SAMPLE_IMAGE = 'data/selected_dataset/normal/DSC03990_5.JPG'

In [10]:
model = ResNet50(weights='imagenet', include_top=False)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [59]:
def extract_and_export(image_path, export_name):
    batch_images = []
    label = []
    class_map = {'normal': 0, 'infected': 1}
    for image_class in os.listdir(image_path):
        class_path = os.path.join(image_path, image_class)
        for image_file in os.listdir(class_path):
            image = load_img(os.path.join(class_path, image_file), target_size=(150, 150))
            image = img_to_array(image)
            image = preprocess_input(image)
            batch_images.append(image)
            label.append(class_map[image_class])

    batch_images = np.array(batch_images)
    features = model.predict(batch_images)
    features = features.reshape((-1, 5*5*2048))

    features_df = pd.DataFrame(features,
                               columns=[f'resnet_{x+1}' for x in range(features.shape[-1])])
    features_df['is_infected'] = label
    features_df.to_csv(os.path.join('data', export_name+'.csv'), index=False)
    # features_df.to_excel(os.path.join('data', 'excel', export_name+'.xlsx'), index=False)
    return features_df

In [58]:
orig_df = extract_and_export(SELECTED_DATA, 'orig_img_feature')



ValueError: This sheet is too large! Your sheet size is: 2000, 51201 Max sheet size is: 1048576, 16384

In [60]:
proc_df = extract_and_export(PROCESSED_DATASET, 'processed_img_feature')



In [61]:
proc_df

Unnamed: 0,resnet_1,resnet_2,resnet_3,resnet_4,resnet_5,resnet_6,resnet_7,resnet_8,resnet_9,resnet_10,...,resnet_51191,resnet_51192,resnet_51193,resnet_51194,resnet_51195,resnet_51196,resnet_51197,resnet_51198,resnet_51199,resnet_51200
0,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.886713,0.0,0.000000,0.000000,0.0
1,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,1.846535,0.069445,0.0
2,0.604842,0.0,0.916294,0.900313,0.0,0.205639,0.966447,0.0,0.000000,0.425253,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0
3,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0
4,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0
1996,0.000000,0.0,0.051989,0.761638,0.0,0.000000,0.000000,0.0,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,2.592691,0.0
1997,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,3.585832,0.073563,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0
1998,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,0.0,0.000000,0.000000,...,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0
