# Parameters

In [1]:
NSIDE = 8
NUMPIX = 12 * NSIDE ** 2
DEPTH = 4
SHOW_IMAGES = True

RECT = True

# Helper Functions

In [2]:
from torchvision.utils import save_image
import os
import torch

def get_device():
    if torch.cuda.is_available():
        device = 'cuda:0'
    else:
        device = 'cpu'
    return device


def make_dir():
    image_dir = 'Saved_Images'
    if not os.path.exists(image_dir):
        os.makedirs(image_dir)
def save_img(img, name):
    img = img.view(img.size(0), 1, 12 * NSIDE // 8, 64 * NSIDE // 8)
    save_image(img, name)
    

  from .autonotebook import tqdm as notebook_tqdm


### Set Device

In [3]:
device = get_device()
base = torch.float32

#pin to gpu
device

'cpu'

### Set seed.

In [4]:
from src.utils import set_seed
set_seed(2021)

ModuleNotFoundError: No module named 'src'

# 1. Load in Data

In [None]:
'''
Open the file.
'''

import pickle
import healpy as hp
import numpy as np
import matplotlib.pyplot as plt

def load_data(file_name):
    file_name = f"split_sphere_datasets_NSIDE{NSIDE}.pkl"

    f = open(file_name, "rb")
    dataset = pickle.load(f)
    f.close()

    return dataset

file_name = f"split_sphere_datasets_NSIDE{NSIDE}.pkl"
dataset = load_data(file_name)

# 2. Create Input and Output Arrays

In [None]:
'''

Create the training and validation datasets. Do any necessary reshaping. 

'''

import numpy as np

x_combined = []
y_combined = []


NUM_CHANNELS = 1

for i in range(len(dataset)):
    x = dataset[i]['data']
    y = dataset[i]['label']

    x_combined.append(x)
    y_combined.append(y)
    
x_combined = np.array(x_combined)
y_combined = np.array(y_combined)

print("x_combined shape: ", x_combined.shape)
print("y_combined shape: ", y_combined.shape)

x_combined shape:  (2048, 2)
y_combined shape:  (2048, 3072)


# 3. Split into Training, Val, and Test. Also Reshape into Rectangle if Specified.

In [None]:
# train_len = int(0.7 * len(y_combined)) # 70%
# val_len = int(0.15 * len(y_combined)) # 15%
# test_len = len(y_combined) - train_len - val_len # 15%

train_len = 1536
val_len = 256
test_len = 256


x_train = x_combined[:train_len]
x_val = x_combined[train_len : train_len + val_len]
x_test = x_combined[train_len + val_len:]

y_train = y_combined[:train_len]
y_val = y_combined[train_len : train_len + val_len]
y_test = y_combined[train_len + val_len:]

if RECT:
    # NOTE: Can change based on input size of data
    # For now, default to 12, 64 size for NSIDE 8 that is scaled up for larger NSIDEs. 
    length = 32 * NSIDE // 8
    width = 24 * NSIDE // 8

    y_train = y_train.reshape((len(y_train), DEPTH, width, length))
    y_val = y_val.reshape((len(y_val), DEPTH, width, length))
    y_test = y_test.reshape((len(y_test), DEPTH, width, length))


print("x_train shape: ", x_train.shape)
print("x_val shape: ", x_val.shape)
print("x_test shape: ", x_test.shape)
print()

print("y_train shape: ", y_train.shape)
print("y_val shape: ", y_val.shape)
print("y_test shape: ", y_test.shape)

x_train shape:  (1536, 2)
x_val shape:  (256, 2)
x_test shape:  (256, 2)

y_train shape:  (1536, 4, 24, 32)
y_val shape:  (256, 4, 24, 32)
y_test shape:  (256, 4, 24, 32)


# 4. Create data loaders

In [None]:
# constants
NUM_EPOCHS = 1000
LEARNING_RATE = 0.01
BATCH_SIZE = 32 # previously 1

In [None]:
from torch.utils.data import Dataset, DataLoader, TensorDataset

train_set = TensorDataset(torch.tensor(x_train).to(dtype=base, device=device), torch.tensor(y_train).to(dtype=base, device=device))
val_set = TensorDataset(torch.tensor(x_val).to(dtype=base, device=device), torch.tensor(y_val).to(dtype=base, device=device))
test_set = TensorDataset(torch.tensor(x_test).to(dtype=base, device=device), torch.tensor(y_test).to(dtype=base, device=device))

train_loader = DataLoader(train_set, BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_set, BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_set, BATCH_SIZE, shuffle=True)

# 5. Define Model Classes

In [None]:
from torch.nn import Module, Conv2d, Sequential, ConvTranspose2d, ReLU, MaxPool2d, Linear, Conv3d, Tanh, Dropout


class ConvExpand(Module): 
    def __init__(self, linear_layers, conv_layers, mid_rect_size, batch_size, num_channels):
        super().__init__()

        # Put encoder layers in Sequential container
        # First increase from 1 --> 64 channels
        # Keep decreasing number of channels
        
        self.linear_layers = linear_layers
        self.conv_layers = conv_layers

        self.mid_rect_size = mid_rect_size
        self.batch_size = batch_size
        self.num_channels = num_channels

    # Run x through each layer
    def forward(self, x):
        for layer in self.linear_layers:
            x = layer(x)
        

        # IMPORTANT: Reshaping output of linear layer into rect form
        # Need to reshape into (BATCH SIZE, NUM_CHANNELS, ... )
        # usually batch size is first parameter, but sometimes if smaller batch then just use whatever x has
        # originally self.batch_size as first param, but now this
        x = x.view(x.size(0), 4, self.mid_rect_size[0], self.mid_rect_size[1])
        
        for layer in self.conv_layers:
            # print(x.shape)
            x = layer(x)
        
        # print(x.detach().numpy().shape)
        # print()
        return x
        

In [None]:

fcmodel8 = Sequential(
           Linear(2, 12),
           ReLU(),
           Linear(12, 384),
           ReLU(),
           Linear(384, 1536),
           ReLU(),
           Linear(1536, 3072)
   )

In [None]:
# IMPORTANT: change linear layer output to batch size * 256 so dimensions match? hmm
lin8 = Sequential (
   Linear(2, 12),
   ReLU(),
   Linear(12, 384),
   ReLU(),
   Linear(384, 3072),
)


mid_rect_size = (width, length)


conv8 = Sequential (
   Conv2d(4, 16, kernel_size=3, padding=1, padding_mode="reflect"),
   ReLU(),
   Conv2d(16, 4, kernel_size=3, padding=1, padding_mode="reflect"),
)



expand8 = ConvExpand(lin8, conv8, mid_rect_size, BATCH_SIZE, NUM_CHANNELS)

In [None]:
model = expand8

# 6. Create Trainer

In [None]:
import torch.optim as optim
from torch.nn import MSELoss

# Use MSE Loss
# need to specify cpu
criterion = MSELoss().to(dtype=base, device=device)

# Use Adam optimizer
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

In [None]:
# Function to evaluate performance of model on some data
def eval_performance(model, data_loader):
    # Set model to evaluation mode to conserve memory 
    model.eval()

    # Don't want to waste memory on gradients
    with torch.no_grad():
        running_loss = 0.0
        
        for x, y in data_loader:
            pred = model(x)

            loss = criterion(pred, y)

            # CRUCIAL: need to multiply by batch size since loss.item() will give total loss / batch_size
            running_loss += loss.item() * pred.shape[0]
            
        
        # Revert back to train mode
        model.train()

        # IMPORTANT: need to do train_loader.dataset to get total # training examples instead of # batches
        # len(train_loader) would just give the # of batches
        return running_loss / len(data_loader.dataset)

In [None]:


train_loss_hist = []
val_loss_hist = []
epoch = 0

def train(model, train_loader, NUM_EPOCHS):
    global train_loss_hist
    global val_loss_hist
    global epoch

    lr_changed = False
    lr_change2 = False

    smallest_val_loss = 10000

    while epoch < NUM_EPOCHS:
        running_loss = 0.0

        # Go through each batch of the data (can pass in entire batch at once!)
        # batch - number of training examples for one forward/backward pass. So pass in batch data values then update weights. 
        for x_vals, y_vals in train_loader:
            torch.cuda.empty_cache()

            # Remember, it's in batches. 
            
            '''

            Reset gradients to 0 so updating of weights can be done correctly.

            When we do loss.backward(), gradients are calculated. Then, optimizer.step() does gradient descent.
            For the next batch, we don't want these gradients to still be lingering (because a new input will have new gradients).
            Thus, we have to reset the gradients to 0. 

            NOTE: This is not the same as setting the weights to 0! We are just resetting the calculated gradients.
            
            '''
            optimizer.zero_grad()

            # Calculate model outputs
            outputs = model(x_vals)

            # Calculate loss
            loss = criterion(outputs, y_vals)

            # Calculate gradients 
            loss.backward()

            # Do gradient descent to update the weights.
            optimizer.step()

            # CRUCIAL: need to multiply by batch size since loss.item() will give total loss / batch_size
            running_loss += loss.item() * outputs.shape[0]


        # IMPORTANT: need to do train_loader.dataset to get total # training examples instead of # batches
        # len(train_loader) would just give the # of batches
        loss = running_loss / len(train_loader.dataset)
        train_loss_hist.append(loss)

        torch.cuda.empty_cache()


        # change learning rate when loss < 0.01
        if loss < 0.1 and not lr_changed:
            for g in optimizer.param_groups:
                g['lr'] = 0.005
            
            lr_changed = True
        
        if loss < 0.035 and not lr_change2:
            for g in optimizer.param_groups:
                g['lr'] = 0.0008
                
            lr_change2 = True


        val_loss = eval_performance(model, val_loader)
        val_loss_hist.append(val_loss)

        smallest_val_loss = min(smallest_val_loss, val_loss)

        print(f'Epoch {epoch + 1} of {NUM_EPOCHS}, Train Loss: {loss}, Val Loss: {val_loss}')
        # print(f'Epoch {epoch + 1} of {NUM_EPOCHS}, Train Loss: {loss}')


        patience = 30
        # Decide if early stopping necessary 
        recent_min = min(val_loss_hist[-patience:])
        if smallest_val_loss < recent_min:
            print("Stopped early")
            return

        # # Saving a full batch!
        # if epoch % 2 == 0:
        #     save_img(y_vals.cpu().data, name='./Cone_Images/truth_batch_{}.png'.format(epoch))
        #     save_img(outputs.cpu().data, name='./Cone_Images/predicted_batch_{}.png'.format(epoch))
        
        epoch += 1

# Train Model

In [None]:
train(model, train_loader, NUM_EPOCHS)

KeyboardInterrupt: 

# Loss Graphs

In [None]:
plt.title("training loss history")
plt.xlabel("epoch")
plt.ylabel("train loss")
plt.yscale("log")
plt.plot(train_loss_hist)

In [None]:
plt.title("validation loss history")
plt.xlabel("epoch")
plt.ylabel("val loss")
plt.yscale("log")
plt.plot(val_loss_hist[:])

# Show a sample output

In [None]:


def display_sample(model, data_loader):
    for x, y in data_loader:
        # Set model to evaluation mode to conserve memory 
        model.eval()

        # Don't want to waste memory on gradients
        with torch.no_grad():

            pred = model(x)

            truth = torch.reshape(y[0], (DEPTH, NUMPIX, ))[3].detach().numpy()
            model_pred = np.reshape(pred[0].detach().numpy(), (DEPTH, NUMPIX, ))[3]

            diff = np.absolute(model_pred - truth)
            
            # [0] because 1 channel so need to go inside
            hp.mollview(truth, title="Ground Truth", nlocs=5)
            hp.mollview(model_pred, title="Model Prediction", nlocs=5)
            hp.mollview(diff, title="Difference Map", nlocs=5)

            print(np.mean(diff))

            # set back to train mode
            model.train()
            break
    
if SHOW_IMAGES:
    display_sample(model, val_loader)

In [None]:
print("Val loss: " , eval_performance(model, val_loader))

## Create Dataloader of Model Outputs 

In [None]:
def save_model_outputs(model, path):
    preds = []
    truths = []

    for x, y in val_loader:
    # Set model to evaluation mode to conserve memory 
        model.eval()

        # Don't want to waste memory on gradients
        with torch.no_grad():

            pred = model(x)

            pred_np = pred.detach().numpy()
            truth_np = y.detach().numpy()

            new_shape = (4, 12, 64)

            # batched right now, so need to unbatch 
            for i in range(len(pred_np)):

                # need to also reshape each element
                pred_reshape = np.reshape(pred_np[i], new_shape)
                truth_reshape = np.reshape(truth_np[i], new_shape)

                print(pred_reshape.shape)

                # need to add each depth individually
                for depth in range(4):
                    # [0] since first 1 dimension
                    preds.append(pred_reshape[depth])
                    truths.append(truth_reshape[depth])
    
    preds = np.array(preds)
    truths = np.array(truths)

    print(preds.shape)
            
    combined_set = TensorDataset(torch.tensor(preds).to(dtype=base, device=device), torch.tensor(truths).to(dtype=base, device=device))
    loader = DataLoader(combined_set, 1, shuffle=True)
    
    torch.save(loader, path)

    # set back to train mode
    model.train()

final_train_loss = round(float(train_loss_hist[-1]), 3)
final_val_loss = round(float(val_loss_hist[-1]), 3)

name = f"saved-pred-outputs/output-loader-model_NSIDE{NSIDE}_trainloss{final_train_loss}_valloss{final_val_loss}_epochs{epoch}.pth"
save_model_outputs(model, name)