#### Module imports

In [9]:
import torch
import torchvision
import numpy as np
from numpy import asarray
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from torch.autograd import Variable
from torch import permute,avg_pool1d
from PIL import Image
import matplotlib.pyplot as plt
import cv2
import os
from pathlib import Path
import pickle
from tqdm.notebook import tqdm
import pandas as pd

import sys
sys.path.insert(0,'../')
import label_tools as lt

ModuleNotFoundError: No module named 'label_tools'

#### Loading files

In [8]:
# load pre processed files
imgs_path = Path('/home/richard/data/Schiefer/combine/preprocessed_224_clipped_pred_polygon_1126/')
assert imgs_path.is_dir()
files = sorted(imgs_path.glob('*.png'))

randomizer = np.random.RandomState(seed=99834)
randomizer.shuffle(files)

assert len(files) == 1126 # all files are found
print("First 10 files are: {}".format([x.name for x in files[:10]]))

First 10 files are: ['CFB184_ortho_mask_0038_10_preprocessed.png', 'CFB125_ortho_mask_0104_12_preprocessed.png', 'CFB184_ortho_mask_0138_12_preprocessed.png', 'CFB167_ortho_mask_0029_12_preprocessed.png', 'CFB133_ortho_mask_0203_10_preprocessed.png', 'CFB151_ortho_mask_0041_12_preprocessed.png', 'CFB184_ortho_mask_0252_10_preprocessed.png', 'CFB151_ortho_mask_0050_12_preprocessed.png', 'CFB125_ortho_mask_0053_4_preprocessed.png', 'CFB167_ortho_mask_0198_12_preprocessed.png']


#### Loading Images

In [10]:
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
to_tensor = transforms.ToTensor()

def alter_image(img_name):
    image = cv2.imread(str(img_name))
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image_tensor = normalize(to_tensor(image)).unsqueeze(0)
    image_tensor = image_tensor.reshape(1,3,224,224)
    return image_tensor

def load_images_as_tensors(paths):
    images = [alter_image(image) for image in paths]
    return images

In [11]:
image_tensors = load_images_as_tensors(files)
assert len(image_tensors) == 1126

#### Extracting labels from filenames

In [5]:
def extract_labels(files): return [filename.stem.split('_')[4] for filename in files]
labels = extract_labels(files)
print('first 10 labels: {}'.format(labels[:10]))

first 10 labels: ['10', '12', '12', '12', '10', '12', '10', '12', '4', '12']


In [6]:
# change labels from number to species (of collab)
update = {
    '4':'Fagus_sylvatica',
    '5':'Fraxinus_excelsior',
    '6':'Quercus_spec',
    '8':'deadwood',
    '10':'Abies_alba',
    '11':'Larix_decidua',
    '12':'Picea_abies',
    '13':'Pinus_sylvestris',
    '14':'Pseudotsuga_menziesii'
}

updated_labels = (pd.Series(labels)).map(update)
species_labels = list(updated_labels)
labels = species_labels
print('first 10 labels: {}'.format(labels[:10]))

first 10 labels: ['Abies_alba', 'Picea_abies', 'Picea_abies', 'Picea_abies', 'Abies_alba', 'Picea_abies', 'Abies_alba', 'Picea_abies', 'Fagus_sylvatica', 'Picea_abies']


#### Label encoding

Standardize encodings of labels to make analysis easier afterwards.

In [7]:
le = lt.CustomLabelEncoder()
le.fit(labels, sorter=lambda x: x.upper())

labels_int = le.transform(labels[:10])
labels_str = le.inverse_transform(labels_int)

label_dir = Path('/home/richard/data/Schiefer/combine/')
filename = Path('VGG16_polygon_pred_label_encodings_224_' + str(imgs_path).split('_')[5] + '.pickle')
with open(label_dir / filename, 'wb') as f:
    pickle.dump(le, f)

print('label encodings: {}'.format(le.mapper))
print('first 10 integer labels: {}'.format(labels_int))
print('first 10 string labels: {}'.format(labels_str))

label encodings: {'Abies_alba': 0, 'deadwood': 1, 'Fagus_sylvatica': 2, 'Fraxinus_excelsior': 3, 'Larix_decidua': 4, 'Picea_abies': 5, 'Pinus_sylvestris': 6, 'Pseudotsuga_menziesii': 7, 'Quercus_spec': 8}
first 10 integer labels: [0 5 5 5 0 5 0 5 2 5]
first 10 string labels: ['Abies_alba' 'Picea_abies' 'Picea_abies' 'Picea_abies' 'Abies_alba'
 'Picea_abies' 'Abies_alba' 'Picea_abies' 'Fagus_sylvatica' 'Picea_abies']


#### Feature Extraction

Load the model

In [6]:
# # for avgpool layer features 512
# model = models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1).features
# for param in model.parameters():
#     param.requires_grad = False

In [3]:
class FeatureExtractor(nn.Module):
  def __init__(self, model):
    super(FeatureExtractor, self).__init__()
		# Extract VGG-16 Feature Layers
    self.features = list(model.features)
    self.features = nn.Sequential(*self.features)
		# Extract VGG-16 Average Pooling Layer
    self.pooling = model.avgpool
		# Convert the image into one-dimensional vector
    self.flatten = nn.Flatten()
		# Extract the first part of fully-connected layer from VGG16
    self.fc = model.classifier[0]
  
  def forward(self, x):
		# It will take the input 'x' until it returns the feature vector called 'out'
    out = self.features(x)
    out = self.pooling(out)
    out = self.flatten(out)
    out = self.fc(out) 
    return out 

model = models.vgg16(weights=models.VGG16_Weights.IMAGENET1K_V1)
new_model = FeatureExtractor(model)

#### Get FC1 features of the VGG16 model

In [13]:
def get_feature(tensor):
    with torch.no_grad():
        feature = new_model(tensor)
    feature = concat_tensors(feature)
    return feature

def concat_tensors(features):
    fc = torch.cat(features)
    fc = fc.cpu().detach().numpy()
    return fc

def get_features(tensors):
    with torch.no_grad():
        features = [model(tensor) for tensor in tqdm(tensors)]
    features = concat_tensors(features)
    return features

In [12]:
# test
print(get_feature(image_tensors[0]))

tensor([[-0.5146, -3.0128,  1.2858,  ..., -4.5630, -0.6749, -2.4996]],
       grad_fn=<AddmmBackward0>)


In [None]:
# fc1 = get_features(image_tensors)
# print(fc1.shape)

#### Get avgpool features (flattened 7 x 7 x 512)

In [11]:
# def get_pooled_features(tensors):
#     features = [model(tensor) for tensor in tqdm(tensors)]
#     print("done first")
#     features = [torch.flatten(tensor, 1) for tensor in tensors]
#     features = concat_tensors(features)
#     return features

In [12]:
# fc1 = get_pooled_features(image_tensors)
# print(fc1.shape)

  0%|          | 0/1126 [00:00<?, ?it/s]

done first
(1126, 150528)


In [10]:
# save results
results = {'filename': files,
           'features': fc1,
           'labels': labels,
           'layer_name': 'fc1'}

feature_dir = Path('/home/richard/data/Schiefer/combine/')
feature_filename = Path('VGG16_polygon_pred_224_' + str(imgs_path).split('_')[5] + '.pickle')
Path(feature_dir).mkdir(parents=True, exist_ok=True)
with open(feature_dir / feature_filename, 'wb') as f:
    pickle.dump(results, f)