In [1]:
import os
import numpy as np
import pandas as pd
import geopandas as gpd
import torch 
from torchvision import transforms, utils
from torch.utils.data import Dataset
from skimage import io, transform
import matplotlib.pyplot as plt
from scipy.cluster.vq import kmeans2, whiten

In [2]:
# Enable autoreloading of imported modules.
%load_ext autoreload
%autoreload 2

In [3]:
def get_lat_lon(rel_path_to_images):
    """
    Extracts lat and lon from image name and store them in array
    !! Co-ordinates are saved as [Lon, Lat] !!
    """

    # get list of all filenames (insert path as string)
    file_names = os.listdir(rel_path_to_images)

    # strip everything but the coordinates and split lattitude and longitude
    coord = [i[4:-4] for i in file_names]

    # split to get lat and long and change type to np array 
    labels = np.array([np.array(i.split(","), dtype = float) for i in coord])
    
    return labels

In [6]:
#ToDo: Find optimal k (use Elbow Method?)
# Different outputs here because i was playing around with formats for the dataloader and thought
# we could eventually use those later

def get_clusters(labels, k):

    """
    Create k clusters and assign a cluster to each sample
        
        Returns:
        labels = array shape N*3 of sample co-ordinates with their respective cluster
        label_location: Dict that contains the center co-ordinates of each cluster
        
    """
    
    x, y = kmeans2(labels, k, iter = 20)  
    labels = np.hstack((labels, y[:, np.newaxis]))
    
    # create dict to map class to co-ordinates for final prediction
    keys = np.arange(len(x))
    label_location = {keys[i]:x[i] for i in range(len(x))}
    
    return labels, label_location, y

In [48]:
# create Dataloader
# in the NN we would only predict classes, not yet coordinates. This allows y to be one number per sample
# This can later be changed, just seemed more convenient to me

class GeoGuessrData(Dataset):
    
    def __init__(self, y, root_dir, transform = None):
        """
        y(array): array shape N with clusternames
        root_dir(string): Directory with all the images
        transform: As we won´t need to resize anything we just transform array-->tensor
        """
        self.y = y
        self.root_dir = root_dir
        self.transform = transform
        
    def __getitem__(self, idx):
        """support the indexing such that dataset[i] can be used to get ith sample"""
        
        if torch.is_tensor(idx):
            idx = idx.tolist()
        
        # get image by path and idx
        img_name = os.listdir(self.root_dir)[idx]
        image = io.imread(os.path.join(self.root_dir, img_name))
        sample = {"image": image, "cluster": self.y[idx]}
        
        if self.transform:
            sample = self.transform(sample)
            
        return sample
        
    
    def __len__(self):
        
        # get len of dataset
        return len(self.y)
    
    

class ToTensor(object):
    """Convert ndarrays in sample to Tensors."""

    def __call__(self, sample):
        image, cluster = sample['image'], sample['cluster']

        # swap color axis because
        # numpy image: H x W x C
        # torch image: C x H x W
        image = image.transpose((2, 0, 1))
        
        # not 100% sure if transforming y is of any use yet
        return {'image': torch.from_numpy(image).float(),
                'cluster': torch.from_numpy(np.array(cluster)).float()}

In [49]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((.5, .5, .5), (.5, .5, .5))
])
# load dataset for dataloader
labels = get_lat_lon(r"F:\Users\basti\Projekt")
labels, label_location, y = get_clusters(labels, 100)
dataset = GeoGuessrData(y, r"F:\Users\basti\Projekt", transform=transform)




In [50]:
size = len(dataset)
train_size = int(size*0.8)
test_size = (size - train_size) //2
validation_size = (size - train_size) //2 +1
train_dataset, validation_dataset, test_dataset = torch.utils.data.random_split(dataset,
                                               [train_size, validation_size, test_size])


In [53]:
from networks import ResNet
from solver import Solver

torch.cuda.empty_cache()
model=ResNet()
solver = Solver(model,{'train':test_dataset,'val':validation_dataset},optimizer="Adam",optimizer_config={'weight_decay':True})
history =solver.train(num_epochs=10)


cuda


  0%|          | 0/10 [00:00<?, ?it/s]

TypeError: 'DataLoader' object is not subscriptable

In [None]:
from utils import show_training
show_training(history)

In [10]:
import gc
torch.cuda.empty_cache()
#del variables
gc.collect()

0