In [None]:
!pip cache purge

In [None]:
!pip uninstall -y pyreflect

In [None]:
!pip install -i https://test.pypi.org/simple/ pyreflect==1.2.3

In [None]:
!pip show pyreflect

## Generate Refl1d NR SLD profile Curves

In [1]:
import pyreflect
from pyreflect.input import NRSLDDataProcessor
from pyreflect.flows import nr_predict_sld
from pyreflect.models import (
VariationalAutoencoder, 
Autoencoder, 
train_ae, 
train_vae,
NRSLDCurvesGeneratorParams
)
import numpy as np
import torch

  from .autonotebook import tqdm as notebook_tqdm


Selected device for model training: cuda


In [2]:
root= "./"
to_be_saved_nr_file = "data/curves/refl_nr_curves_poly.npy"
to_be_saved_sld_file = "data/curves/refl_sld_curves_poly.npy"

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
config = pyreflect.config.load_config(root)

## Generate NR SLD profile curves

In [3]:
generator_params = NRSLDCurvesGeneratorParams(
    root=root,
    mod_nr_file = to_be_saved_nr_file,
    mod_sld_file = to_be_saved_sld_file,
    num_curves = 100000,
    num_film_layers = 6 #Test with different film layers material
)

To be saved NR file:data/curves/refl_nr_curves_poly.npy
To be loaded SLD curves:data/curves/refl_sld_curves_poly.npy


In [4]:

#save nr, sld file
nr_arr, sld_arr = nr_predict_sld.generate_nr_sld_curves(generator_params)
nr_arr.shape,sld_arr.shape

Computing reflectivity


Generating reflectivity curves: 100%|[32m██████████[0m| 100000/100000 [11:28<00:00, 145.23it/s]


NR SLD generated curves saved at: 
               mod sld file: data/curves/refl_sld_curves_poly.npy
                mod nr file: data/curves/refl_nr_curves_poly.npy


((100000, 2, 308), (100000, 2, 900))

### Load Refl1d data

In [5]:
dproc = NRSLDDataProcessor(
    nr_file_path = to_be_saved_nr_file,
    sld_file_path = to_be_saved_sld_file)

nr_arr,sld_arr = dproc.load_data()


### Preprocessing & Train Test Split

### Split dataloader

In [6]:
expt_nr_arr = dproc.normalize_nr()
expt_sld_arr = dproc.normalize_sld()

#Train Test Split 
list_arrays = dproc.split_arrays(expt_nr_arr,expt_sld_arr, size_split=0.7)

#Keep Original data
# copied_list = [x.copy() for x in list_arrays]

# Remove X axis from train val test
# crv_tr, sld_tr, crv_val, sld_val, crv_tst, sld_tst = list_arrays

# crv_tr = dproc.reshape_nr_to_single_channel(crv_tr)
# crv_val = dproc.reshape_nr_to_single_channel(crv_val)
# crv_tst = dproc.reshape_nr_to_single_channel(crv_tst)

# list_arrays = [crv_tr, sld_tr, crv_val, sld_val, crv_tst, sld_tst]

tensor_arrays = dproc.convert_tensors(list_arrays)

# Dataloaders for training 
tr_data, val_data,tst_data, tr_load, val_load,tst_load = dproc.get_dataloaders(*tensor_arrays,16)


### VAE Training

In [7]:
class FlattenedDataset(torch.utils.data.Dataset):
    def __init__(self, dataset):
        self.dataset = dataset

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        x, y = self.dataset[idx]
        return x.view(-1), y  # flatten input

In [8]:
tr_load = torch.utils.data.DataLoader(
    FlattenedDataset(tr_data), batch_size=16, shuffle=False
)

val_load = torch.utils.data.DataLoader(
    FlattenedDataset(val_data), batch_size=16, shuffle=False
)

#Check batch shape for training
batch = next(iter(tr_load))

batch_x, batch_y = batch
x,_ = tr_data[0]

x.numel(),batch_x.shape,batch_y.shape


(616, torch.Size([16, 616]), torch.Size([16, 2, 900]))

In [9]:
# Test loader shape
tst_x,tst_y = next(iter(tst_load))
tst_x.shape, tst_y.shape

(torch.Size([16, 2, 308]), torch.Size([16, 2, 900]))

In [12]:
#The first sample input
latent_dims = 16
init_size = x.numel()

model = Autoencoder(init_size, latent_dims).to(device)
train_loss, val_loss = train_ae(
    model,
    tr_load,
    val_load,
    epochs=10, 
    loss_fn=torch.nn.MSELoss()
)
# train_loss, val_loss = train_vae(
#     model,
#     tr_load,
#     val_load,
#     epochs=10, 
#     beta=0.8
# )

Epoch: 1, train loss: 0.0010924023385159671, valid loss: 0.0006819400463939837
Epoch: 2, train loss: 0.0006268704374054713, valid loss: 0.0005632484392106617
Epoch: 3, train loss: 0.0004402847196341359, valid loss: 0.0003795025891675028
Epoch: 4, train loss: 0.00034196947703936273, valid loss: 0.00030751107583271224
Epoch: 5, train loss: 0.00028330300581708016, valid loss: 0.00024532782731948597
Epoch: 6, train loss: 0.00022610544598262224, valid loss: 0.00019776968660781524
Epoch: 7, train loss: 0.00018633532782550902, valid loss: 0.00015518427551560192
Epoch: 8, train loss: 0.00014360163956049032, valid loss: 0.0001369879099812586
Epoch: 9, train loss: 0.00012590421187841068, valid loss: 0.00012478946484061878
Epoch: 10, train loss: 0.00011717006052079212, valid loss: 0.00013072882538758414


In [13]:
#reconstruct Test Load
model.eval()
denoised_all = []
#flatten test 
tst_load = torch.utils.data.DataLoader(
    FlattenedDataset(tst_data), batch_size=16, shuffle=False
)

x_load = tst_load

with torch.no_grad():
    for batch in x_load:
        inputs, _ = batch  # ignore labels if present
        inputs = inputs.to(device)

        outputs = model(inputs)  # This calls forward() internally
        #reshape back to 2d
        outputs = outputs.view(outputs.size(0), 2, -1) # (B, 2, M)
        denoised_all.append(outputs.cpu())

denoised_all = torch.cat(denoised_all, dim=0).numpy()
denoised_all.shape, type(denoised_all)

((15000, 2, 308), numpy.ndarray)

### Reconstruct Test NR From AE

In [None]:
# # From reshaped single-channel back to full X-Y
# restored_nr_arr = np.concatenate([
#     nr_tst[:, 0:1, :],   # first channel, shape (B, 1, L)
#     denoised_all           # second channel, shape (B, 1, L)
# ], axis=1)  # → shape: (B, 2, L)

# restored_nr_arr.shape

In [14]:
x_tensor, y_tensor = tst_data.tensors
nr_tst = x_tensor.numpy()
sld_tst =y_tensor.numpy()

In [15]:
np.save("./data/curves/nr_ae_recon_ldim32.npy",denoised_all)
np.save("./data/curves/nr_tst_orig.npy",nr_tst)
np.save("./data/curves/sld_tst_orig.npy",sld_tst)