## Comparing the performance of the different models

#### Models with normalizing flows: MADE MAF RealNVP:

In [None]:
import shutil
import os

Clone the repository:

In [None]:
!rm -rf normalizing_flows
!rm -rf results/normalizing_flows
!git clone https://github.com/kamenbliznashki/normalizing_flows.git

Move our simulation datasets into the repo normalizing_flows

In [None]:
data_names = ['circle', 'torus', 'involute']

for data_name in data_names:
    source_file1 = os.path.join('datasets', data_name + '.py')
    source_file2 = os.path.join('datasets', data_name, 'B.csv')
    destination_folder =  os.path.join('normalizing_flows', 'datasets')
    shutil.copy2(source_file1, destination_folder)
    os.makedirs(os.path.join(destination_folder, data_name))
    shutil.copy2(source_file2, os.path.join(destination_folder, data_name))

Revise some lines in the file data.py and maf.py

In [None]:
import fileinput

file_path = os.path.join('normalizing_flows', 'data.py')

new_line_65 = '    elif dataset_name in [\'TOY\', \'MOONS\', \'CIRCLE\', \'TORUS\', \'INVOLUTE\']:  # use own constructors\n'
new_line_66=  '        train_dataset = load_dataset(dataset_name)()\n'
new_line_67=  '        test_dataset = load_dataset(dataset_name)()\n'
new_line_106= '    kwargs = {\'num_workers\': 1, \'pin_memory\': True} if device.type == \'cuda\' else {}\n'

# Replace the line in the file
with fileinput.FileInput(file_path, inplace=True) as file:
    for i, line in enumerate(file, 1):
        if i == 65:
            print(new_line_65, end='')
        elif i == 66:
            print(new_line_66, end='')
        elif i == 67:
            print(new_line_67, end='')
        elif i == 106:
            print(new_line_106, end='')
        else:
            print(line, end='')

In [None]:
file_path = './normalizing_flows/maf.py'
start_line = 740

replacement_code = '''
    if args.generate:
        if args.dataset == 'TOY':
            base_dist = train_dataloader.dataset.base_dist
            plot_sample_and_density(model, base_dist, args, ranges_density=[[-15,4],[-3,3]], ranges_sample=[[-1.5,1.5],[-3,3]])
        elif args.dataset in ['CIRCLE', 'TORUS', 'INVOLUTE']:
            u = model.base_dist.sample((10000, args.n_components)).squeeze()
            samples, _ = model.inverse(u)
            samples = samples.data.cpu().numpy()
            import numpy as np
            import os
            save_dir = os.path.join(args.output_dir,args.dataset+'_sample.csv')

            np.savetxt(save_dir, samples, delimiter=',')
'''

# Rewrite the specified part of the file
with fileinput.FileInput(file_path, inplace=True) as f:
    for line in f:
        if f.filelineno() >= start_line:
            # Print the replacement code instead of the original line
            print(replacement_code)
            break
        elif f.filelineno() == 667:
            print('    args.output_dir = os.path.join(\'./results/\',args.model)\n', end='')
        else:
            # Print the original line as it is
            print(line, end='')


Training of the models:

In [7]:
!python normalizing_flows/maf.py --train --model made --dataset CIRCLE --n_epochs 200
!python normalizing_flows/maf.py --generate --model made --dataset CIRCLE --restore_file ./results/made/best_model_checkpoint.pt
!python normalizing_flows/maf.py --train --model made --dataset TORUS --n_epochs 200
!python normalizing_flows/maf.py --generate --model made --dataset TORUS --restore_file ./results/made/best_model_checkpoint.pt
!python normalizing_flows/maf.py --train --model made --dataset INVOLUTE --n_epochs 200
!python normalizing_flows/maf.py --generate --model made --dataset INVOLUTE --restore_file ./results/made/best_model_checkpoint.pt

Loaded settings and model:
{'activation_fn': 'relu',
 'batch_size': 100,
 'cond_label_size': None,
 'conditional': False,
 'data_dir': './data/',
 'dataset': 'CIRCLE',
 'device': device(type='cuda', index=0),
 'evaluate': False,
 'flip_toy_var_order': False,
 'generate': False,
 'hidden_size': 100,
 'input_dims': 2,
 'input_order': 'sequential',
 'input_size': 2,
 'log_interval': 1000,
 'lr': 0.0001,
 'model': 'made',
 'n_blocks': 5,
 'n_components': 1,
 'n_epochs': 200,
 'n_hidden': 1,
 'no_batch_norm': False,
 'no_cuda': False,
 'output_dir': './results/made',
 'restore_file': None,
 'results_file': './results/made\\results.txt',
 'seed': 1,
 'start_epoch': 0,
 'train': True}
MADE(
  (net_input): MaskedLinear(in_features=2, out_features=100, bias=True)
  (net): Sequential(
    (0): ReLU()
    (1): MaskedLinear(in_features=100, out_features=100, bias=True)
    (2): ReLU()
    (3): MaskedLinear(in_features=100, out_features=4, bias=True)
  )
)
epoch   0 / 200, step    0 / 100; loss 2.3

In [8]:
!python normalizing_flows/maf.py --train --model maf --dataset CIRCLE --n_epochs 200 --no_batch_norm
!python normalizing_flows/maf.py --generate --model maf --dataset CIRCLE --no_batch_norm --restore_file ./results/maf/best_model_checkpoint.pt
!python normalizing_flows/maf.py --train --model maf --dataset TORUS --n_epochs 200 --no_batch_norm
!python normalizing_flows/maf.py --generate --model maf --dataset TORUS --no_batch_norm --restore_file ./results/maf/best_model_checkpoint.pt
!python normalizing_flows/maf.py --train --model maf --dataset INVOLUTE --n_epochs 200 --no_batch_norm
!python normalizing_flows/maf.py --generate --model maf --dataset INVOLUTE --no_batch_norm --restore_file ./results/maf/best_model_checkpoint.pt

Loaded settings and model:
{'activation_fn': 'relu',
 'batch_size': 100,
 'cond_label_size': None,
 'conditional': False,
 'data_dir': './data/',
 'dataset': 'CIRCLE',
 'device': device(type='cuda', index=0),
 'evaluate': False,
 'flip_toy_var_order': False,
 'generate': False,
 'hidden_size': 100,
 'input_dims': 2,
 'input_order': 'sequential',
 'input_size': 2,
 'log_interval': 1000,
 'lr': 0.0001,
 'model': 'maf',
 'n_blocks': 5,
 'n_components': 1,
 'n_epochs': 200,
 'n_hidden': 1,
 'no_batch_norm': True,
 'no_cuda': False,
 'output_dir': './results/maf',
 'restore_file': None,
 'results_file': './results/maf\\results.txt',
 'seed': 1,
 'start_epoch': 0,
 'train': True}
MAF(
  (net): FlowSequential(
    (0): MADE(
      (net_input): MaskedLinear(in_features=2, out_features=100, bias=True)
      (net): Sequential(
        (0): ReLU()
        (1): MaskedLinear(in_features=100, out_features=100, bias=True)
        (2): ReLU()
        (3): MaskedLinear(in_features=100, out_features=4, 

In [9]:
!python normalizing_flows/maf.py --train --model realnvp --dataset CIRCLE --n_epochs 200 --no_batch_norm
!python normalizing_flows/maf.py --generate --model realnvp --dataset CIRCLE --no_batch_norm --restore_file ./results/realnvp/best_model_checkpoint.pt
!python normalizing_flows/maf.py --train --model realnvp --dataset TORUS --n_epochs 200 --no_batch_norm
!python normalizing_flows/maf.py --generate --model realnvp --dataset TORUS --no_batch_norm --restore_file ./results/realnvp/best_model_checkpoint.pt
!python normalizing_flows/maf.py --train --model realnvp --dataset INVOLUTE --n_epochs 200 --no_batch_norm
!python normalizing_flows/maf.py --generate --model realnvp --dataset INVOLUTE --no_batch_norm --restore_file ./results/realnvp/best_model_checkpoint.pt

Loaded settings and model:
{'activation_fn': 'relu',
 'batch_size': 100,
 'cond_label_size': None,
 'conditional': False,
 'data_dir': './data/',
 'dataset': 'CIRCLE',
 'device': device(type='cuda', index=0),
 'evaluate': False,
 'flip_toy_var_order': False,
 'generate': False,
 'hidden_size': 100,
 'input_dims': 2,
 'input_order': 'sequential',
 'input_size': 2,
 'log_interval': 1000,
 'lr': 0.0001,
 'model': 'realnvp',
 'n_blocks': 5,
 'n_components': 1,
 'n_epochs': 200,
 'n_hidden': 1,
 'no_batch_norm': True,
 'no_cuda': False,
 'output_dir': './results/realnvp',
 'restore_file': None,
 'results_file': './results/realnvp\\results.txt',
 'seed': 1,
 'start_epoch': 0,
 'train': True}
RealNVP(
  (net): FlowSequential(
    (0): LinearMaskedCoupling(
      (s_net): Sequential(
        (0): Linear(in_features=2, out_features=100, bias=True)
        (1): Tanh()
        (2): Linear(in_features=100, out_features=100, bias=True)
        (3): Tanh()
        (4): Linear(in_features=100, out_fe

#### The model RoundTrip:

This model is installed from pip, and based on tensorflow. There are lots of dependencies, so it is better to install it in a virtual environment.

In [7]:
%pip install pyroundtrip

Note: you may need to restart the kernel to use updated packages.


In [8]:
# Only tested with RountTrip v2.0.1
# Revise the evaluation part in class Roundtrip
import os
import inspect
import fileinput
import importlib
import pyroundtrip as pyrt
module_path = inspect.getfile(pyrt)

# Get the directory path
directory_path = os.path.dirname(module_path)
file_path = os.path.join(directory_path,'roundtrip.py')

line_count = 0

with open(file_path, 'r') as file:
    for line in file:
        line_count += 1

replacement_code = "        np.save('{}/data_z_at_{}.npy'.format(self.save_dir,batch_idx),data_z_)\n        data_x_ = self.g_net(self.z_sampler.get_batch(10000))\n        np.save('{}/data_x_at_{}.npy'.format(self.save_dir,batch_idx),data_x_)\n"
if not pyrt.__version__ == "2.0.1":
    print("warning: Only tested with RountTrip v2.0.1")

if line_count<1510:
    with fileinput.FileInput(file_path, inplace=True) as f:
        for line in f:
            if f.filelineno() == 210:
                print(replacement_code, end='')
            else:
                # Print the original line as it is
                print(line, end='')


In [9]:
# Restart the kernel to import pyrt again
%reset -f

In [10]:
import numpy as np
import pandas as pd
import pyroundtrip as pyrt
print("Currently use version v%s of Roundtrip."%pyrt.__version__)

Currently use version v2.0.1 of Roundtrip.


In [11]:
params = {
    'dataset': 'Involute',
    'output_dir': './RoundTrip_623/',
    'x_dim': 2,
    'z_dim': 1,
    'lr': 0.0002,
    'alpha': 10,
    'beta': 1,
    'gamma': 0,
    'g_d_freq': 1,
    'g_units': [512, 512, 512, 512, 512, 512, 512, 512, 512, 512],
    'e_units': [256, 256, 256, 256, 256, 256, 256, 256, 256, 256],
    'dz_units': [128, 128],
    'dx_units': [256, 256, 256, 256],
    'save_model': False,
    'sd_x': 0.05,
    'scale': 0.5,
    'sample_size': 10000
}

model = pyrt.Roundtrip(params=params,random_seed=123)

data = pd.read_csv('datasets/involute/B.csv', header=None).values
data = np.array(data, dtype=np.float32)

model.train(data=data, save_format='csv', n_iter=100000, batches_per_eval=10000)

Iteration [0] : g_loss_adv [0.7556], e_loss_adv [0.7920],                l2_loss_x [0.1658], l2_loss_z [1.3156], g_e_loss [16.3612], dx_loss [0.4067], dz_loss [0.4071], d_loss [0.8138]
Iteration [10000] : g_loss_adv [0.1536], e_loss_adv [0.1656],                l2_loss_x [0.0155], l2_loss_z [0.0004], g_e_loss [0.4783], dx_loss [0.1596], dz_loss [0.1583], d_loss [0.3179]
Iteration [20000] : g_loss_adv [0.2227], e_loss_adv [0.1842],                l2_loss_x [0.0142], l2_loss_z [0.0038], g_e_loss [0.5863], dx_loss [0.1494], dz_loss [0.1594], d_loss [0.3088]
Iteration [30000] : g_loss_adv [0.3248], e_loss_adv [0.1551],                l2_loss_x [0.0163], l2_loss_z [0.0020], g_e_loss [0.6627], dx_loss [0.0875], dz_loss [0.1521], d_loss [0.2396]
Iteration [40000] : g_loss_adv [0.2735], e_loss_adv [0.1943],                l2_loss_x [0.0096], l2_loss_z [0.0021], g_e_loss [0.5851], dx_loss [0.1388], dz_loss [0.1515], d_loss [0.2903]
Iteration [50000] : g_loss_adv [0.2973], e_loss_adv [0.1651],  

In [12]:
params = {
    'dataset': 'Circle',
    'output_dir': './RoundTrip_623',
    'x_dim': 2,
    'z_dim': 1,
    'lr': 0.0002,
    'alpha': 10,
    'beta': 1,
    'gamma': 0,
    'g_d_freq': 1,
    'g_units': [512, 512, 512, 512, 512, 512, 512, 512, 512, 512],
    'e_units': [256, 256, 256, 256, 256, 256, 256, 256, 256, 256],
    'dz_units': [128, 128],
    'dx_units': [256, 256, 256, 256],
    'save_model': False,
    'sd_x': 0.05,
    'scale': 0.5,
    'sample_size': 10000
}

model = pyrt.Roundtrip(params=params,random_seed=123)

data = pd.read_csv('datasets/circle/B.csv', header=None).values
data = np.array(data, dtype=np.float32)

model.train(data=data, save_format='csv', n_iter=100000, batches_per_eval=10000)

Iteration [0] : g_loss_adv [0.7568], e_loss_adv [0.7920],                l2_loss_x [0.4979], l2_loss_z [1.3156], g_e_loss [19.6838], dx_loss [0.4178], dz_loss [0.4071], d_loss [0.8249]
Iteration [10000] : g_loss_adv [0.1522], e_loss_adv [0.1552],                l2_loss_x [0.0027], l2_loss_z [0.0024], g_e_loss [0.3585], dx_loss [0.1604], dz_loss [0.1614], d_loss [0.3218]
Iteration [20000] : g_loss_adv [0.2312], e_loss_adv [0.1595],                l2_loss_x [0.0017], l2_loss_z [0.0072], g_e_loss [0.4806], dx_loss [0.1487], dz_loss [0.1625], d_loss [0.3112]
Iteration [30000] : g_loss_adv [0.1729], e_loss_adv [0.1916],                l2_loss_x [0.0064], l2_loss_z [0.0021], g_e_loss [0.4493], dx_loss [0.1454], dz_loss [0.1480], d_loss [0.2934]
Iteration [40000] : g_loss_adv [0.1622], e_loss_adv [0.1656],                l2_loss_x [0.0021], l2_loss_z [0.0134], g_e_loss [0.4830], dx_loss [0.1539], dz_loss [0.1638], d_loss [0.3177]
Iteration [50000] : g_loss_adv [0.1626], e_loss_adv [0.1612],  

In [13]:
params = {
    'dataset': 'Torus',
    'output_dir': './RoundTrip_623',
    'x_dim': 3,
    'z_dim': 2,
    'lr': 0.0002,
    'alpha': 10,
    'beta': 1,
    'gamma': 0,
    'g_d_freq': 1,
    'g_units': [512, 512, 512, 512, 512, 512, 512, 512, 512, 512],
    'e_units': [256, 256, 256, 256, 256, 256, 256, 256, 256, 256],
    'dz_units': [128, 128],
    'dx_units': [256, 256, 256, 256],
    'save_model': False,
    'sd_x': 0.05,
    'scale': 0.5,
    'sample_size': 10000
}

model = pyrt.Roundtrip(params=params,random_seed=123)

data = pd.read_csv('datasets/torus/B.csv', header=None).values
data = np.array(data, dtype=np.float32)

model.train(data=data, save_format='csv', n_iter=100000, batches_per_eval=10000)

Iteration [0] : g_loss_adv [0.7581], e_loss_adv [0.7924],                l2_loss_x [0.3736], l2_loss_z [0.8799], g_e_loss [14.0855], dx_loss [0.3854], dz_loss [0.4096], d_loss [0.7951]
Iteration [10000] : g_loss_adv [0.1622], e_loss_adv [0.1646],                l2_loss_x [0.0084], l2_loss_z [0.0071], g_e_loss [0.4818], dx_loss [0.1524], dz_loss [0.1628], d_loss [0.3152]
Iteration [20000] : g_loss_adv [0.1835], e_loss_adv [0.1766],                l2_loss_x [0.0044], l2_loss_z [0.0071], g_e_loss [0.4759], dx_loss [0.1552], dz_loss [0.1584], d_loss [0.3137]
Iteration [30000] : g_loss_adv [0.1682], e_loss_adv [0.1697],                l2_loss_x [0.0015], l2_loss_z [0.0104], g_e_loss [0.4564], dx_loss [0.1497], dz_loss [0.1606], d_loss [0.3103]
Iteration [40000] : g_loss_adv [0.1780], e_loss_adv [0.1603],                l2_loss_x [0.0052], l2_loss_z [0.0038], g_e_loss [0.4286], dx_loss [0.1477], dz_loss [0.1598], d_loss [0.3075]
Iteration [50000] : g_loss_adv [0.1984], e_loss_adv [0.1670],  