In [1]:
import sys
sys.path.append('/content/drive/MyDrive/Uni/Deep Learning for Computer Vision/GeoGuessr_Project')

ROOT_DIR = r'C:\Users\Shadow\Pictures\Geogussr\Projekt'

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torchvision import models
#from torchsummary import summary
from torch.optim import lr_scheduler


# Dataset
from GeoGuessrDataset import GeoGuessrDataset


import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from tqdm.notebook import tnrange
import time
import copy

%matplotlib inline

# Enable autoreloading of imported modules.
%load_ext autoreload
%autoreload 2

In [3]:

# Check GPU support on your machine.
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

print(device)

HEIGHT = 512
WIDTH = 2560

cuda:0


In [4]:

# from torchvision.models import resnet50, ResNet50_Weights
from torchvision.models.resnet import resnet50, ResNet50_Weights, resnet18, ResNet18_Weights, resnet101, ResNet101_Weights 
from torchvision.models import vit_b_16, ViT_B_16_Weights
from networks import TraversedNet

# New weights with accuracy 80.858%
resnet50(weights=ResNet50_Weights.IMAGENET1K_V2)

# Best available weights (currently alias for IMAGENET1K_V2)
# Note that these weights may change across versions
resnet50(weights=ResNet50_Weights.DEFAULT)
resnet101(weights=ResNet101_Weights.DEFAULT)
vit_b_16(weights='IMAGENET1K_SWAG_E2E_V1')
TraversedNet()

TraversedNet(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv4): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv5): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv6): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool3): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv8): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv9): Conv2d(128, 2

In [5]:
import pygeohash as phg

df = pd.read_csv(r"C:\Users\Shadow\Documents\DLCV_Project_GeoGuessr_AI-valdrin\preprocess\coordinates.csv", delimiter=',', skiprows=0, low_memory=False)

# We want a geohash precsion of 3 so that we get approximately 32768 cells, which will represent our classes.
df['geohash']=df.apply(lambda coords: phg.encode(coords.latitude, coords.longitude, precision=3), axis=1)


def geohash_to_decimal(geohash):
    base_32 = '0123456789bcdefghjkmnpqrstuvwxyz';
    geohash = geohash.lower()
    return sum([32**idx * base_32.index(char) for idx, char in enumerate(geohash[::-1])])

df['geohash_decimal']=df.apply(lambda x: geohash_to_decimal(x["geohash"]) ,axis=1)

geohashes_with_samples = df["geohash_decimal"].unique()
print("Number of geohashes with samples", len(geohashes_with_samples))

geohash_map = { geo: i for i, geo in enumerate(geohashes_with_samples)}

df["geo_code"] = df.apply(lambda geohash: geohash_map[geohash["geohash_decimal"]], axis=1)

df[["filename", "latitude","longitude", "geohash_decimal", "geo_code"]].to_csv(r"C:\Users\Shadow\Documents\DLCV_Project_GeoGuessr_AI-valdrin\preprocess\coordinates2.csv", index=False)

Number of geohashes with samples 3139


In [6]:
# Define the data transformation
transform = transforms.Compose([
    transforms.ToTensor(),  # convert images to tensors
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),  # normalize images
    transforms.Resize((512, 2560))
])

# Load the dataset and split it into training and validation sets
dataset = GeoGuessrDataset(csv_file=r"C:\Users\Shadow\Documents\DLCV_Project_GeoGuessr_AI-valdrin\preprocess\coordinates2.csv",
                                    root_dir=ROOT_DIR, transform=transform, num_classes=3139)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])
print(len(train_dataset))
# Define the dataloaders
train_dataloader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=6)
val_dataloader = DataLoader(val_dataset, batch_size=16, shuffle=False, num_workers=6)

dataloaders = {"train" : train_dataloader, "val": val_dataloader}
dataset_sizes = {"train": train_size, "val" : val_size}


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")



for i, batch in enumerate(train_dataloader):
    x, y = batch["image"], batch["geohash"]
    print(x.shape, y.shape)
    break
    

103009
torch.Size([16, 3, 512, 2560]) torch.Size([16, 3139])


In [7]:
def force_cudnn_initialization():
    s = 32
    dev = torch.device('cuda')
    torch.nn.functional.conv2d(torch.zeros(s, s, s, s, device=dev), torch.zeros(s, s, s, s, device=dev))

In [8]:
def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
    since = time.time()
    torch.backends.cudnn.benchmark = True
    scaler = torch.cuda.amp.GradScaler()
    best_acc = 0.0
    
    val_acc_history = []
    train_acc_history = []
    train_loss_history = []
    val_loss_history = []
    
    for epoch in (pbar := tnrange(num_epochs)):
        print(f'Epoch {epoch+1}/{num_epochs}')
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            #for idx, batch in enumerate(dataloaders[phase]):
            batch = next(iter(dataloaders[phase]))
            inputs, labels = batch["image"], batch["geohash"]
            inputs = inputs.to(device, non_blocking=True)
            labels = labels.to(device, non_blocking=True)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward
            # track history if only in train
            with torch.set_grad_enabled(phase == 'train'):
                #with torch.autocast(device_type='cuda', dtype=torch.float16):

                    # Forward pass
                outputs = model(inputs)
                    #assert outputs.dtype is torch.float16
                loss = criterion(outputs, labels.float())
                    #assert loss.dtype is torch.float32

                # backward + optimize only if in training phase
                if phase == 'train':
                    loss.backward()
                    optimizer.step()
                    #scaler.step(optimizer)
                    #scaler.update()

                _, preds = torch.max(outputs, 1)
                _,labels = torch.max(labels, 1)
                # statistics

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels)
                
            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            if phase == 'val':
                val_acc_history.append(epoch_acc)
                val_loss_history.append(epoch_loss)
            else:
                train_acc_history.append(epoch_acc)
                train_loss_history.append(epoch_loss)
            
            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')
            
            # deep copy the model
        
            #if phase == 'val' and epoch_acc > best_acc:
            #    best_acc = epoch_acc
            #    PATH3 = r"C:\Users\Shadow\Documents\DLCV_Project_GeoGuessr_AI-Basti\models\pretrainedresnet101_14epoch.tar"
            #    torch.save({
            #    'epoch': epoch,
            #    'model_state_dict': model.state_dict(),
            #    'optimizer_state_dict': optimizer.state_dict(),
            #    'loss': loss,
                'val_loss_history': val_loss_history,
                'val_acc_history': val_acc_history,
                'train_loss_history' : train_loss_history,
                'train_acc_history' : train_acc_history
                }, PATH3)
            
        print()

    
    print(f'Best val Acc: {best_acc:4f}')

    # load best model weights
    
    return model

In [9]:
#########resnet
#model_ft = models.resnet101(weights=ResNet101_Weights.DEFAULT)
# model_ft = models.resnet18()

#num_ftrs = model_ft.fc.in_features
# Here the size of each output sample is set to 2.
# Alternatively, it can be generalized to nn.Linear(num_ftrs, len(class_names)).
#model_ft.fc = nn.Linear(num_ftrs, 3139)


#######################################

###########transformers
#model_ft = vit_b_16(weights='IMAGENET1K_SWAG_E2E_V1')
#model_ft.heads.head = nn.Linear(768,3139)


##################################

model_ft = TraversedNet()
model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()

# Observe that all parameters are being optimized
optimizer_ft = torch.optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

# Decay LR by a factor of 0.1 every 7 epochs
exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)


In [10]:
model_ft = train_model(model_ft, criterion, optimizer_ft, exp_lr_scheduler,
                       num_epochs=100)

  0%|          | 0/100 [00:00<?, ?it/s]

Epoch 1/100
----------
train Loss: 8.0488 Acc: 0.0000
val Loss: 8.0475 Acc: 0.0000

Epoch 2/100
----------
train Loss: 8.0505 Acc: 0.0000
val Loss: 8.0496 Acc: 0.0000

Epoch 3/100
----------
train Loss: 8.0448 Acc: 0.0000
val Loss: 8.0497 Acc: 0.0000

Epoch 4/100
----------
train Loss: 8.0461 Acc: 0.0000
val Loss: 8.0464 Acc: 0.0000

Epoch 5/100
----------
train Loss: 8.0433 Acc: 0.0000
val Loss: 8.0412 Acc: 0.0000

Epoch 6/100
----------
train Loss: 8.0452 Acc: 0.0000
val Loss: 8.0420 Acc: 0.0000

Epoch 7/100
----------
train Loss: 8.0473 Acc: 0.0000
val Loss: 8.0464 Acc: 0.0000

Epoch 8/100
----------
train Loss: 8.0445 Acc: 0.0000
val Loss: 8.0475 Acc: 0.0000

Epoch 9/100
----------
train Loss: 8.0474 Acc: 0.0000
val Loss: 8.0466 Acc: 0.0000

Epoch 10/100
----------
train Loss: 8.0422 Acc: 0.0000
val Loss: 8.0488 Acc: 0.0000

Epoch 11/100
----------
train Loss: 8.0456 Acc: 0.0000
val Loss: 8.0418 Acc: 0.0000

Epoch 12/100
----------
train Loss: 8.0465 Acc: 0.0000
val Loss: 8.0444 Ac

train Loss: 8.0455 Acc: 0.0000
val Loss: 8.0423 Acc: 0.0000

Epoch 99/100
----------
train Loss: 8.0446 Acc: 0.0000
val Loss: 8.0471 Acc: 0.0000

Epoch 100/100
----------
train Loss: 8.0434 Acc: 0.0000
val Loss: 8.0416 Acc: 0.0000

Best val Acc: 0.000000


In [None]:
def imshow(inp, title=None):
    """Imshow for Tensor."""
    inp = inp.numpy().transpose((1, 2, 0))
    mean = np.array([0.485, 0.456, 0.406])
    std = np.array([0.229, 0.224, 0.225])
    inp = std * inp + mean
    inp = np.clip(inp, 0, 1)
    plt.imshow(inp)
    if title is not None:
        plt.title(title)
    plt.pause(0.001)  # pause a bit so that plots are updated


for idx, batch in enumerate(dataloaders["train"]):
    # Convert images (input) to float because it is a ByteTensor. Why is it a ByteTensor?
    images, labels = batch["image"].float(), batch["geohash"].float()
    out = torchvision.utils.make_grid(images)

    imshow(out)


In [None]:
t = torch.arange(10).reshape(2,5)

In [None]:
t