In [1]:
import os
os.environ['OMP_NUM_THREADS']='2'
os.environ['LD_LIBRARY_PATH']=''
os.environ['CUDA_LAUNCH_BLOCKING']='1'

In [2]:
%cd /home/pengyu.zhang/project/superres/ProbSR/Experiment2

/home/pengyu.zhang/project/superres/ProbSR/Experiment2


In [3]:
from data_generation import *
from scipy.linalg import sqrtm
from downscaling import *
from utils import *
import random

In [4]:
%cd Bicubic_Downsampling/

/home/pengyu.zhang/project/superres/ProbSR/Experiment2/Bicubic_Downsampling


## Langevin & Training Downscale Network

### Upscale By 4

In [5]:
N_low = 40
N_high = 160
scale = 4

h_low = 1/(N_low-1)
x_low = np.arange(0,1.0001,h_low)
y_low = np.arange(0,1.0001,h_low)

h_high = 1/(N_high-1)
x_high = np.arange(0,1.0001,h_high)
y_high = np.arange(0,1.0001,h_high)

In [6]:
A_high = create_A(N_high)
A_low = create_A(N_low)

In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Parameters for prior variance
prior_sigma = 0.002
ll_sigma = 0.002

In [8]:
A_high = torch.tensor(A_high).to(torch.float32).to(device)

In [9]:
A_high = A_high.to_sparse()
operator = (A_high.T) * (1/prior_sigma**2)

In [10]:
dataset = DataFromH5File4("/home/pengyu.zhang/project/superres/ProbSR/Experiment2/data/40_160_low_forcing.h5")
trainset = random.sample(range(0, 1000), 600)
testset = [i for i in range(0,1000) if i not in trainset]

In [11]:
def sample_data():
    coefficient = random.sample(trainset,1)[0]
    forcing = dataset[coefficient][0]
    lr = dataset[coefficient][1]
    
    return forcing, lr


def sample_p_0(x):
    # Randomly sampling for initialisation of the Langevin dynamics
    # prior = torch.randn(*[batch_size,1,20,20]).to(device)
    
    # Set the u_low_mean to the initialisation of the Langevin dynamics
    # posterior_initial = torch.randn([N_high,N_high]).to(torch.float32)
    # posterior_initial = torch.tensor(posterior_initial).to(device).to(torch.float32)
    posterior_initial  = F.interpolate(x.reshape(1,1,N_low,N_low),(N_high,N_high),mode="bicubic").reshape(N_high,N_high)
    
    return posterior_initial

    
def ula_posterior_preconditioner(z, b_high, x, G):
    """
    Langevin dynamics with preconditioner
    """
    z = z.clone().detach().requires_grad_(True)
    sum = 0
    for i in range(K):
        # Grad log-likelihood
        x_hat = G(z.reshape(1,N_high,N_high)).reshape(N_low,N_low)
        log_likelihood = (-1/(2*math.pow(ll_sigma, 2)) * torch.matmul((x-x_hat).reshape(1,N_low**2),(x-x_hat).reshape(N_low**2,1)))
        grad_ll = torch.autograd.grad(log_likelihood, z)[0]

        # Grad prior
        difference = torch.spmm(A_high,z.reshape(N_high*N_high,1)) - b_high.reshape(N_high**2,1)
        # log_prior = - 0.5 * difference.T @ G_inverse @ difference
        # grad_log_prior = torch.autograd.grad(log_prior, z)[0]
        grad_log_prior = (- torch.spmm(operator,difference)).reshape(N_high,N_high)
        
        # Random noise term
        W = torch.randn(*[N_high,N_high]).to(device)
        # random = torch.matmul(G_sqrt,W.reshape(N_high**2,1)).reshape(N_high,N_high)
        
        z = z + 0.5 * s ** 2 * grad_log_prior + 0.5 * s ** 2 * grad_ll + s * W
        if i >= K-10:
            sum += z
        
    sum /= 10
           
    return sum.detach() 

In [12]:
# Train with sampled data
epoch_num = 1000
lr = 0.003
gamma = 0.5
step_size = 30
minimum_loss = float('inf')
loss_track = []

K = 70
s = 0.0004

G = DownScale()
G.apply(weights_init_xavier).to(device)
mse = nn.MSELoss(reduction='sum')
optG = torch.optim.Adam(G.parameters(), lr = lr, weight_decay=0, betas=(0.5, 0.999))
r_scheduleG = torch.optim.lr_scheduler.StepLR(optG, step_size=step_size, gamma=gamma)

# Logger info
dir_name = f'models/model1/40_160/lr{lr}_gamma{gamma}_stepsize{step_size}_K{K}_llsigma_{ll_sigma}_psigma{prior_sigma}'
makedir(dir_name)
logger = setup_logging('job0', dir_name, console=True)
logger.info(f'Training for {epoch_num} epoches and learning rate is {lr}')

for epoch in range(1, epoch_num+1):
    
    b_high, low_res = sample_data()
    b_high = torch.tensor(b_high).to(torch.float32).to(device)
    low_res = torch.tensor(low_res).to(torch.float32).to(device)
    
    posterior_initial = sample_p_0(low_res)
    posterior_final = ula_posterior_preconditioner(posterior_initial, b_high, low_res, G)

    optG.zero_grad()
    
    out = G(posterior_final.reshape(1,N_high,N_high)).reshape(N_low,N_low)
    loss = mse(out,low_res)
    
    loss.backward()
    optG.step()
    
    if loss < minimum_loss:
        save_model(dir_name, epoch, 'best_model', r_scheduleG, G, optG)
        minimum_loss = loss
            
    if epoch%100 == 0:
        save_model(dir_name, epoch, 'model_epoch_{}'.format(epoch), r_scheduleG, G, optG)
    
    save_model(dir_name, epoch, 'current_epoch', r_scheduleG, G, optG)
    loss_track.append(loss.cpu().data.numpy())
    np.save(f'{dir_name}/chains/loss_curve.npy', np.array(loss_track))
    
    print("Epoch:", epoch, "Loss:", loss)

    r_scheduleG.step()

Output directory already exists
2024-08-06 17:50:55,867 : Training for 1000 epoches and learning rate is 0.003
Epoch: 1 Loss: tensor(96617.6406, device='cuda:0', grad_fn=<MseLossBackward0>)
Epoch: 2 Loss: tensor(1035.0780, device='cuda:0', grad_fn=<MseLossBackward0>)
Epoch: 3 Loss: tensor(44756.4766, device='cuda:0', grad_fn=<MseLossBackward0>)
Epoch: 4 Loss: tensor(76884.2969, device='cuda:0', grad_fn=<MseLossBackward0>)
Epoch: 5 Loss: tensor(254926.2500, device='cuda:0', grad_fn=<MseLossBackward0>)
Epoch: 6 Loss: tensor(28378.2305, device='cuda:0', grad_fn=<MseLossBackward0>)
Epoch: 7 Loss: tensor(10447.2666, device='cuda:0', grad_fn=<MseLossBackward0>)
Epoch: 8 Loss: tensor(4519.7935, device='cuda:0', grad_fn=<MseLossBackward0>)
Epoch: 9 Loss: tensor(613.0909, device='cuda:0', grad_fn=<MseLossBackward0>)
Epoch: 10 Loss: tensor(1348.8289, device='cuda:0', grad_fn=<MseLossBackward0>)
Epoch: 11 Loss: tensor(833.5715, device='cuda:0', grad_fn=<MseLossBackward0>)
Epoch: 12 Loss: tensor(1

KeyboardInterrupt: 