In [102]:
import os
import torch
import torchvision
import numpy as np
import torch.nn.functional as F
import torchvision.models as models
from torch import optim, nn
from torch.utils import data
from torch.utils.data import DataLoader
from torchvision import transforms
from numpy.random import permutation
from tqdm import tqdm

In [166]:
# Define the CNN architecture

class CNNClassifier(nn.Module):
    def __init__(self):
        super(CNNClassifier, self).__init__()
        
        self.conv1 = nn.Conv3d(
            in_channels=1, 
            out_channels=32,
            kernel_size=(3, 3, 3), 
            stride=(1, 1, 1),
            padding=(0, 0, 0)
        )
        self.conv1_bn = nn.BatchNorm3d(32)
        
        self.conv2 = nn.Conv3d(
            in_channels=32,
            out_channels=64, 
            kernel_size=(1, 3, 3), 
            stride=(1, 1, 1)
        )
        self.conv2_bn = nn.BatchNorm3d(64)
        
        self.conv3 = nn.Conv3d(
            in_channels=64, 
            out_channels=64, 
            kernel_size=(1, 3, 3), 
            stride=(1, 1, 1)
        )
        self.conv3_bn = nn.BatchNorm3d(64)
        
        # self.fc1 = nn.Linear(64*11*11, 500)
        self.fc1 = nn.Linear(64*24*24, 500)
        self.fc2 = nn.Linear(500, 2500)
        
    def forward(self, x):
        
        x = x.view(-1, 1, 3, 102, 102)
        
        # x = self.conv1_bn(self.conv1(x))
        
        x = self.conv1(x)
        x = F.relu(F.max_pool3d(x, kernel_size=(1, 2, 2)))
        
        # x = self.conv2_bn(self.conv2(x))
        x = self.conv2(x)
        x = F.relu(F.max_pool3d(x, kernel_size=(1, 2, 2)))
        
        # x = self.conv3_bn(self.conv3(x))
        # x = F.relu(F.max_pool3d(x, kernel_size=(1, 2, 2)))
        
        # x = x.view(-1, 64*11*11)
        x = x.view(-1, 64*24*24)
        
        x = F.relu(self.fc1(x))
        x = F.tanh(self.fc2(x))
        
        return x

def evaluate_model(model, loss_fn, val_data_loader):
    losses = []
    rmse_values = []
    
    with torch.no_grad():  # There is no need to calculate gradients for the validation set
        for b_x, b_y in val_data_loader:
            model.eval()
            
            # Run batch on GPU, if possible
            b_x = b_x.to(device)
            b_y = b_y.to(device)
            
            # Compute predictions and losses
            preds = model(b_x)
            
            temp = preds.cpu().detach().numpy() - b_y.cpu().detach().numpy()
            temp = temp**2
            
            rmse = np.sqrt(temp.mean())
            rmse_values.append(rmse)
            
            loss = loss_fn(preds, b_y)
            losses.append(loss.item())

        val_avg_loss = sum(losses)/len(losses)    
    
    return val_avg_loss, np.array(rmse_values)


In [105]:
def NDVI(ds):
    ndvi = (ds.B8A_20m - ds.B04_20m) / (ds.B8A_20m + ds.B04_20m)
    return ndvi.values
        
def measure_cloudfrees(ds):
    W_valid = masking_valid(ds)
    cloud_fr = W_valid.isin([1]).sum(dim=['x', 'y']) / ds.SCL_20m.isel(time=0).count(dim = ['x', 'y'])
    return cloud_fr.values
     
def masking_valid(ds):
    W_valid = ds.SCL_20m.where(((ds.SCL_20m>=4) & (ds.SCL_20m<=5)),0)
    W_valid=W_valid.where(W_valid==0,1)
    return W_valid

def find_indices(cloudfrees, n):
    cloudfrees2 = cloudfrees.copy()
    if n == 0:
        id1 = 1+np.argmax(cloudfrees2[1:])
        cloudfrees2[id1] = -1
        id2 = 1+np.argmax(cloudfrees2[1:])
        return [id1, id2]
    elif n == len(cloudfrees2)-1:
        id1 = np.argmax(cloudfrees2[:len(cloudfrees2)-1])
        cloudfrees2[id1] = -1
        id2 = np.argmax(cloudfrees2[:len(cloudfrees2)-1])
        return [id1, id2]
    else:
        id1 = np.argmax(cloudfrees2[:n])
        id2 = n+np.argmax(cloudfrees2[n+1:])
        return [id1, id2]

def create_training_set(imgs, indices, mask_size, depth = 4):
    """ imgs is a list of list where each list contains N numpy arrays """
    mask_size_x = mask_size[0] 
    mask_size_y = mask_size[1]
    mask = np.zeros((mask_size_x, mask_size_y))
    training_set = []
    targets = []
    for j, img_batch in enumerate(imgs): # for each list with np arrays in imgs
        for i in range(len(img_batch) - 3 + 1):
            
            inds1 = indices[j][i][0]
            inds2 = indices[j][i][1]
                        
            if inds1 < i and inds2 < i:
                mat = img_batch[inds1][np.newaxis,...]
                mat = np.vstack([ mat, img_batch[inds2][np.newaxis,...] ])
                mat = np.vstack([ mat, img_batch[i][np.newaxis,...] ])
                
            elif inds1 > i and inds2 > i:
                
                mat = img_batch[i][np.newaxis,...]
                #print(inds1, len(img_batch))
                mat = np.vstack([ mat, img_batch[inds1][np.newaxis,...] ])
                mat = np.vstack([ mat, img_batch[inds2][np.newaxis,...] ])
            else:
                mat = img_batch[inds1][np.newaxis,...]
                mat = np.vstack([ mat, img_batch[i][np.newaxis,...] ])
                mat = np.vstack([ mat, img_batch[inds2][np.newaxis,...] ])
                            
            for k in range(3): # go through the stacked arrays
                for l in range(26,74,10): 
                    for m in range(26,74,10):
                        temp = mat.copy()
                       
                        target = temp[k, l:l + mask_size_y, m:m + mask_size_x].flatten()
                        targets.append(target)
                        temp[k, l:l + mask_size_y, m:m + mask_size_x] = mask.copy()
                        temp = temp[:, l - 26:l + mask_size_y + 26, m-26:m+mask_size_x + 26]
                        training_set.append([temp])
                        #print(temp.shape)
                        
    print('--- The training set consists of ' + str(len(training_set)) + ' images ---')
    return training_set, targets


def get_inputs_targets(data):
    img_ls = []
    imgs = []
    indices = []
    img_batch = NDVI(data)
    
    cloudfrees = measure_cloudfrees(data)
    ind_list = []
    for i in range(img_batch.shape[0]):
        img = img_batch[i,:,:]
        inds = find_indices(cloudfrees, i)
        imgs += [img]
        ind_list += [inds]
    
    indices += [ind_list]
    img_ls.append(imgs)
    
    training_data, targets = create_training_set(img_ls, indices, (50,50), 5)
    
    return training_data, targets
        

In [106]:
def get_inputs_targets(data):
    img_ls = []
    imgs = []
    indices = []
    img_batch = NDVI(data)
    
    cloudfrees = measure_cloudfrees(data)
    ind_list = []
    for i in range(img_batch.shape[0]):
        img = img_batch[i,:,:]
        inds = find_indices(cloudfrees, i)
        imgs += [img]
        ind_list += [inds]
    
    indices += [ind_list]
    img_ls.append(imgs)
    
    training_data, targets = create_training_set(img_ls, indices, (50,50), 5)
    
    return training_data, targets

root_path = 'febhack2020/datasets/training/checked'
dataset = get_dataset(root_path)




In [101]:
data = dataset[0][0]

for i, temp_date in enumerate(data.time.values):
    array = data.B05_20m.isel(time=i).values
    print(np.sum(array==0))



0
0
0
0
0
0
0
0
0
0
0
0
0
2500
0
0
0
0


In [167]:
def get_dataset(root_path):
    dataset = []
    for subdir, dirs, files in os.walk(root_path):
        for file in files:
            filepath = subdir + os.sep + file
            if 'data1b.pickle' in filepath:
                #img_paths.append(filepath)
                file = open(filepath, 'rb')
                data = pickle.load(file)
                file.close()
                fname = filepath.split('/')[-2]
                dataset.append((data, fname.split('_')[0]))
    return dataset

root_path = 'febhack2020/datasets/training/checked'
dataset = get_dataset(root_path)

#################
##### TRAIN #####
#################

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def train_network(model, n_epochs, batch_size, loss_fn, optimizer, dataset):
    train_losses = []
    val_losses = []
    
    for _ in range(n_epochs):
    
        inds = np.random.choice(len(dataset), 5, replace =False)
        inputs = []
        targets = []
        for j in inds:
            d, date_to_remove = dataset[j]
            
            temp_indices = []
            for i, temp_date in enumerate(d.time.values):
                array = d.B05_20m.isel(time=i).values
                if np.sum(array==0) > 0:
                    temp_indices.append(i)
            
            #for i in temp_indices:
            d = d.where(~d.time.isin(d.time[temp_indices]), drop=True)

            temp_inputs, temp_targets = get_inputs_targets(d)
            inputs.extend(temp_inputs)
            targets.extend(temp_targets)

        i_training = int(0.9 * len(inputs))

        train_x = inputs[:i_training]
        val_x = inputs[i_training:]

        train_y = targets[:i_training]
        val_y = targets[i_training:]

        tensor_train_x = torch.Tensor(train_x)
        tensor_train_y = torch.Tensor(train_y)

        tensor_val_x = torch.Tensor(val_x)
        tensor_val_y = torch.Tensor(val_y)

        train_dataset = data.TensorDataset(tensor_train_x, tensor_train_y)
        val_dataset = data.TensorDataset(tensor_val_x, tensor_val_y)

        train_data_loader = data.DataLoader(
            train_dataset,
            batch_size=batch_size,
            shuffle=True,
            drop_last=True
        )
        val_data_loader = data.DataLoader(
            val_dataset,
            batch_size=batch_size,
            shuffle=True,
            drop_last=True
        )

        for i_epoch in range(n_epochs):
            # print('--- Epoch: {} ---'.format(i_epoch))
            losses = []
            model.train()

            for i_batch, (b_x, b_y) in enumerate(train_data_loader):
                if i_batch % 10 == 0: print('Batch: {}'.format(i_batch))

                # Run batch on GPU, if possible
                b_x = b_x.to(device)
                b_y = b_y.to(device)

                # Compute predictions and losses
                preds = model(b_x)
                loss = loss_fn(preds, b_y)
                losses.append(loss.item())

                # Backpropagate
                loss.backward()
                optimizer.step()
                optimizer.zero_grad()

            # Compute loss in the entire training set
            train_avg_loss = sum(losses)/len(losses)

            # Compute loss in the entire validation set
            val_avg_loss, rmse_values = evaluate_model(model, loss_fn, val_data_loader)

            print('Training loss: %.5f' % train_avg_loss)
            print('Validation loss: %.5f' % val_avg_loss)
            print('RMSE: %.5f' % np.sqrt(np.mean(rmse_values**2)))

            train_losses.append(train_avg_loss)
            val_losses.append(val_avg_loss)
    
    return train_losses, val_losses
        
model = CNNClassifier()
model.to(device);

n_epochs = 10
batch_size = 32
loss_fn = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=0.005)
    
train_losses, val_losses = \
    train_network(model, n_epochs, batch_size, loss_fn, optimizer, dataset)


--- The training set consists of 675 images ---
--- The training set consists of 1275 images ---
--- The training set consists of 675 images ---
--- The training set consists of 750 images ---
--- The training set consists of 1125 images ---
Batch: 0
Batch: 10




Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Batch: 110
Batch: 120
Training loss: 0.04764
Validation loss: 0.01016
RMSE: 0.10078
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Batch: 110
Batch: 120
Training loss: 0.01109
Validation loss: 0.01959
RMSE: 0.13998
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Batch: 110
Batch: 120
Training loss: 0.01345
Validation loss: 0.01042
RMSE: 0.10206
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Batch: 110
Batch: 120
Training loss: 0.00902
Validation loss: 0.01411
RMSE: 0.11878
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Batch: 110
Batch: 120
Training loss: 0.01004
Validation loss: 0.01261
RMSE: 0.11230
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40




Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Batch: 110
Training loss: 0.02537
Validation loss: 0.01196
RMSE: 0.10937
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Batch: 110
Training loss: 0.02240
Validation loss: 0.01673
RMSE: 0.12934
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Batch: 110
Training loss: 0.02106
Validation loss: 0.01381
RMSE: 0.11753
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Batch: 110
Training loss: 0.02081
Validation loss: 0.01544
RMSE: 0.12425
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Batch: 110
Training loss: 0.02037
Validation loss: 0.01282
RMSE: 0.11322
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch



Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Batch: 110
Batch: 120
Training loss: 0.02587
Validation loss: 0.01106
RMSE: 0.10515
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Batch: 110
Batch: 120
Training loss: 0.02390
Validation loss: 0.01224
RMSE: 0.11065
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Batch: 110
Batch: 120
Training loss: 0.02335
Validation loss: 0.01147
RMSE: 0.10709
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Batch: 110
Batch: 120
Training loss: 0.02483
Validation loss: 0.01319
RMSE: 0.11484
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Batch: 110
Batch: 120
Training loss: 0.02326
Validation loss: 0.01225
RMSE: 0.11067
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40




Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Batch: 110
Batch: 120
Training loss: 0.02813
Validation loss: 0.00466
RMSE: 0.06824
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Batch: 110
Batch: 120
Training loss: 0.02633
Validation loss: 0.00369
RMSE: 0.06075
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Batch: 110
Batch: 120
Training loss: 0.02638
Validation loss: 0.00421
RMSE: 0.06490
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Batch: 110
Batch: 120
Training loss: 0.02533
Validation loss: 0.00734
RMSE: 0.08569
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Batch: 110
Batch: 120
Training loss: 0.02704
Validation loss: 0.00405
RMSE: 0.06365
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40




Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Training loss: 0.01655
Validation loss: 0.01129
RMSE: 0.10627
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Training loss: 0.01524
Validation loss: 0.01131
RMSE: 0.10633
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Training loss: 0.01518
Validation loss: 0.01106
RMSE: 0.10515
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Training loss: 0.01479
Validation loss: 0.01168
RMSE: 0.10806
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Training loss: 0.01473
Validation loss: 0.01071
RMSE: 0.10350
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Training loss: 0.01470
Validation loss: 0.01135
R



Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Batch: 110
Batch: 120
Training loss: 0.01764
Validation loss: 0.01799
RMSE: 0.13412
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Batch: 110
Batch: 120
Training loss: 0.01684
Validation loss: 0.01368
RMSE: 0.11696
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Batch: 110
Batch: 120
Training loss: 0.01650
Validation loss: 0.01403
RMSE: 0.11845
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Batch: 110
Batch: 120
Training loss: 0.01652
Validation loss: 0.01327
RMSE: 0.11519
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Batch: 110
Batch: 120
Training loss: 0.01656
Validation loss: 0.01413
RMSE: 0.11887
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40




Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Batch: 110
Batch: 120
Training loss: 0.01258
Validation loss: 0.01006
RMSE: 0.10029
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Batch: 110
Batch: 120
Training loss: 0.01175
Validation loss: 0.00889
RMSE: 0.09431
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Batch: 110
Batch: 120
Training loss: 0.01164
Validation loss: 0.01024
RMSE: 0.10119
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Batch: 110
Batch: 120
Training loss: 0.01153
Validation loss: 0.01025
RMSE: 0.10124
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Batch: 110
Batch: 120
Training loss: 0.01150
Validation loss: 0.00951
RMSE: 0.09749
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40




Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Batch: 110
Batch: 120
Training loss: 0.01629
Validation loss: 0.00939
RMSE: 0.09688
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Batch: 110
Batch: 120
Training loss: 0.01549
Validation loss: 0.00863
RMSE: 0.09290
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Batch: 110
Batch: 120
Training loss: 0.01538
Validation loss: 0.00902
RMSE: 0.09496
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Batch: 110
Batch: 120
Training loss: 0.01528
Validation loss: 0.00786
RMSE: 0.08867
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Batch: 110
Batch: 120
Training loss: 0.01573
Validation loss: 0.00766
RMSE: 0.08753
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40




Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Batch: 110
Batch: 120
Batch: 130
Training loss: 0.01848
Validation loss: 0.02116
RMSE: 0.14547
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Batch: 110
Batch: 120
Batch: 130
Training loss: 0.01702
Validation loss: 0.01458
RMSE: 0.12075
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Batch: 110
Batch: 120
Batch: 130
Training loss: 0.01648
Validation loss: 0.01718
RMSE: 0.13108
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Batch: 110
Batch: 120
Batch: 130
Training loss: 0.01634
Validation loss: 0.01328
RMSE: 0.11525
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Batch: 110
Batch: 120
Batch: 130
Training loss: 0.01741
Validation loss: 0.01597
RMSE: 0.



Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Batch: 110
Training loss: 0.01297
Validation loss: 0.01136
RMSE: 0.10657
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Batch: 110
Training loss: 0.01278
Validation loss: 0.01104
RMSE: 0.10505
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Batch: 110
Training loss: 0.01250
Validation loss: 0.01163
RMSE: 0.10786
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Batch: 110
Training loss: 0.01243
Validation loss: 0.01126
RMSE: 0.10613
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch: 100
Batch: 110
Training loss: 0.01246
Validation loss: 0.01196
RMSE: 0.10934
Batch: 0
Batch: 10
Batch: 20
Batch: 30
Batch: 40
Batch: 50
Batch: 60
Batch: 70
Batch: 80
Batch: 90
Batch

In [69]:
def get_n_gdf_data(filepath):
    # Load the data provided (input data to your model)
    data1a = pd.read_csv(filepath + "/data1a.csv")
    data1b = pickle.load(open(filepath + "/data1b.pickle", "rb"))
    # Get the index n where the missing data is located
    filename = filepath.split("/")[-1]
    datestr = filename.split("_")[0]
    #dt = pd.to_datetime(datestr)
    #first_date = "%04d-%02d-01"%(dt.year,dt.month)
    #last_date = "%04d-%02d-01"%(dt.year,dt.month+1)
    n = np.argwhere(data1b.time.dt.strftime("%Y-%m-%d").values == datestr)[0][0]
    # Read the geojson polygon
    gdf = gpd.read_file(filepath + "/" + filename + ".geojson")
    return data1a, data1b, n, gdf

def read_solutions(filepath, filename="answer"):    
    #Read the SOLUTIONS
    #Task 1a
    with open(filepath + "/" + filename + "1a.txt", "r") as f:
        solution1a = f.read()
    #Task 1b
    with open(filepath + "/" + filename + "1b.pickle", "rb") as f:
        solution1b = pickle.load(f)
    return solution1a, solution1b

def NDVI(ds):
    ndvi = (ds.B8A_20m - ds.B04_20m) / (ds.B8A_20m + ds.B04_20m)
    return ndvi

In [215]:
####################
##### VALIDATE #####
####################

import pickle
import pandas as pd
import geopandas as gpd
from glob import glob

files = glob("/home/jovyan/febhack2020/datasets/validation/*")

all_rmse = []



for i, filepath in enumerate(files):
    _, data1b, n, gdf = get_n_gdf_data(filepath)
    _, solution1b = read_solutions(filepath)
    
    temp_indices = []
    for j, _ in enumerate(data1b.time.values):
        array = data1b.B05_20m.isel(time=j).values
        if np.sum(array==0) > 0:
            temp_indices.append(j)
           
    temp_indices.remove(n)
    
    data1b = data1b.where(~data1b.time.isin(data1b.time[temp_indices]), drop=True)
    
    for l, _ in enumerate(data1b.time.values):
        array = data1b.B05_20m.isel(time=l).values
        if np.sum(array==0) > 0:
            break    
            
    cloudfrees = measure_cloudfrees(data1b)
    indices = find_indices(cloudfrees, l)
    
    img_batch = NDVI(data1b)
    #missing_data = input_data[n]
    #nan_indices = np.argwhere(np.isnan(missing_data))
    #corner_indices = nan_indices[0]
    
    # img_batch = [input_data,input_data[l, :, :]]
                 
    inds1 = indices[0]
    inds2 = indices[1]

    helper = 0
    if inds1 < l and inds2 < l:
        helper = 2
        mat = img_batch[inds1][np.newaxis,...]
        mat = np.vstack([ mat, img_batch[inds2][np.newaxis,...] ])
        mat = np.vstack([ mat, img_batch[l][np.newaxis,...] ])

    elif inds1 > l and inds2 > l:

        mat = img_batch[l][np.newaxis,...]
        #print(inds1, len(img_batch))
        mat = np.vstack([ mat, img_batch[inds1][np.newaxis,...] ])
        mat = np.vstack([ mat, img_batch[inds2][np.newaxis,...] ])
        helper = 0
    else:
        mat = img_batch[inds1][np.newaxis,...]
        mat = np.vstack([ mat, img_batch[l][np.newaxis,...] ])
        mat = np.vstack([ mat, img_batch[inds2][np.newaxis,...] ])
        helper = 1
    
    nan_indices = np.argwhere(np.isnan(mat[helper, :, :]))
    
    network_data = np.nan_to_num(mat)
    
    # print(nan_indices)
    
    corner_indices = nan_indices[0]
    
    if helper == 0:
        network_data = img_batch[np.array([l, inds1, inds2]), 
                              corner_indices[0]-26:corner_indices[0]-26+102, 
                              corner_indices[1]-26:corner_indices[1]-26+102]
        n_missing = 0
    elif helper == 2:
        network_data = img_batch[np.array([inds1, inds2, l]), 
                              corner_indices[0]-26:corner_indices[0]-26+102, 
                              corner_indices[1]-26:corner_indices[1]-26+102]
        n_missing = 2
    else:
        network_data = img_batch[np.array([inds1, l, inds2]), 
                              corner_indices[0]-26:corner_indices[0]-26+102, 
                              corner_indices[1]-26:corner_indices[1]-26+102]
        n_missing = 1
    
    target = solution1b.values[~np.isnan(solution1b.values)]
    
    network_data = np.float32(network_data)
    target = np.float32(target)
    
    # print(network_data.shape)
    
    #print(network_data)
    
  
    try:
        network_data_torch = torch.tensor(np.reshape(network_data, (1, 3, 102, 102))) #.view(-1, 1, 3, 102, 102)
    except:
        continue
    
    # network_data_torch = network_data_torch.unsqueeze
    
    # data.TensorDataset(network_data_torch, target)
    
    # network_data_torch = network_data_torch.unsqueeze(0)
    network_data_torch = network_data_torch.to(device)
    model.eval()
    try:
        preds = model(network_data_torch)
    except:
        continue
        
    
    try:
        rmse = np.sqrt(  np.mean((preds.cpu().detach().numpy()[0] - target)**2 ))
        print(rmse)
        
    except:
        print('Calculatation of RMSE failed.')
    
    all_rmse.append(rmse)

print(np.sqrt(np.mean(np.array(all_rmse)**2)))



0.22607055
0.18518923




0.2889991
0.1763036




0.2248437
0.16531423




0.20471868
0.15551734




0.19652748
0.28000307




0.21177359
0.18695785




0.21319996
0.21158709




0.23098147
0.22278556




0.23377322
0.2711067
0.18914445




0.21261805
0.26357207
0.14943562




0.25268906
0.13941431




0.20394224
0.28860953




0.20257066
0.20496213




0.18929619
0.13452229




0.20615971
0.15275872
0.20630826




0.16569516
0.2041693




0.26933527
0.1795914




0.23125863
0.13803773




0.1910797
0.25398287




0.22543128
0.28353813




0.21944472
0.31310585




0.16448735
0.24457875




0.29175636
0.17080908




0.25495532
0.21301319




0.19514224
0.18224451




0.16496566
0.22467747




0.17577462
0.25656232




0.1847682
0.17435226
0.18850636




0.19076437
0.24990338




0.20146598
0.19240625
0.2128602




0.24785507
0.21037875




0.1841787
0.25145677




0.2000626
0.14322914




0.22448702
0.2222974




0.26119897
0.17380653




0.1612745
0.21131389




0.16285124
0.16476144




0.23782463
0.23344591




0.15868251
0.23513153




0.16143897
0.21993521




0.16820373




0.16051403
0.22158891




0.26633292
0.21860506
0.22661142




0.20491344
0.19968253
0.23331259




0.24909243
0.22651252
0.19164136
0.21307793


