# Sanity checks

Here we will perform a bunch of sanity checks on the model to see if its working as desired or not.

### Checking optimal log-likelihoods per permutation

For the end-to-end model of learning both causal ordering as well as the model parameters to work, different permutations should produce different log-likelihoods with the optimal permutation producing the best likelihood possible.

In [1]:
%load_ext autoreload

In [2]:
%autoreload 2
import sys
sys.path.append('..')


# setup data
from ocd.data import CausalDataModule
import dycode
import torch

# setup model
import lightning
import lightning.pytorch.callbacks
from ocd.training import OrderedTrainingModule


dm = CausalDataModule(
    name="asia",  # small dataset asia
    observation_size=4096,  # number of observation samples
    intervention_size=256,  # set to 0 for no intervention
    batch_size=64,
    num_workers=0,  # set to 0 for no multiprocessing
    val_size=0,  # 10% of data for validation, or use int for exact number of samples, set to 0 for no validation
    pin_memory=True,  # set to True for faster data transfer to GPU (if available)
)
dm.setup("fit")


# Extract the category sizes
in_features = dm.train_data[0].features_values


Setup a fixed permutation below and train the model. You can use the following options:
* `whole`: This will simply run the algorithm.
* `correct_order`: This will do a topological sort on the correct DAG and feed that to the model. We hope to gain the best likelihood using this.
* `seed`: This will be an integer value used to perform

In [3]:
import random
from ocd.utils import topological_sort
from ocd.evaluation import backward_score

# Set to false if you want a quick run using the whole model
prompt = True

# Set the permutation to be used, use the seed to switch between permutations

prompt_text = "[LIST OF INTEGERS] create a set of random permutations that is fixed and obtained from the input seeds. (camma seperated)\n"
prompt_text += "When you enter seed=0 it will produce the correct ordering\n"
prompt_text += "Enter option: "
resp = 'whole' if not prompt else input(prompt_text)

# perform trimming on a string x to remove all spaces
seeds = [int(x.replace(' ', '')) for x in resp.split(',')]
print("permutations being used:\n")
FIXED_PERMUTATION = {}
for seed in seeds:
    if seed == 0:
        perm = topological_sort(dm.datasets[0].dag)
    else:
        random.seed(seed)
        perm = list(range(len(in_features)))
        random.shuffle(perm)
    score = backward_score(perm, dm.datasets[0].dag)
    FIXED_PERMUTATION[f'seed_{seed}' if seed else 'correct'] = [perm,score]
    print(perm, "backward score:", score)


permutations being used:

[0, 4, 7, 5, 1, 6, 3, 2] backward score: 0.25
[1, 0, 2, 5, 3, 4, 6, 7] backward score: 0.5
[6, 7, 5, 3, 4, 0, 1, 2] backward score: 0.375
[6, 0, 4, 3, 7, 1, 5, 2] backward score: 0.375
[2, 5, 3, 1, 0, 6, 4, 7] backward score: 0.5
[5, 4, 1, 0, 6, 3, 2, 7] backward score: 0.0


In [4]:

from lightning.pytorch import loggers as pl_loggers
dycode.register_context(torch)
# iterate over all the key item pairs in the dictionary FIXED_PERMUTATION
for VERSION, permutation in FIXED_PERMUTATION.items():
    print(f"Running experiment with permutation {permutation}")
    

    logger = pl_loggers.tensorboard.TensorBoardLogger("lightning_logs", name="sanity_check", version=VERSION)

    # set callbacks for the trainer
    callbacks = [
        # monitor the learning rate (log to tensorboard)
        lightning.pytorch.callbacks.LearningRateMonitor(logging_interval="epoch"),
    ]

    trainer = lightning.Trainer(
        # accelerator="mps",  # remove this line to run on CPU
        callbacks=callbacks,
        # precision=16, # for mixed precision training
        # gradient_clip_val=1.0,
        # gradient_clip_algorithm="value",
        max_epochs=43,
        track_grad_norm="inf",
        log_every_n_steps=1,
        logger=logger,
        # overfit_batches=3,
        # detect_anomaly=True,
    )

    # Extract the category sizes
    in_features = dm.train_data[0].features_values


    # torch.autograd.set_detect_anomaly(True)
    tm = OrderedTrainingModule(
        in_covariate_features=in_features,
        hidden_features_per_covariate=[
            [128 for i in range(len(in_features))],
            [64 for i in range(len(in_features))],
            [32 for i in range(len(in_features))],
        ],
        gamma_scaling=1,
        fixed_permutation=permutation[0],
        log_permutation=False,
        batch_norm=False,
        criterion_args= dict(
            terms=[
                "ocd.training.terms.OrderedLikelihoodTerm",
                # "ocd.training.terms.PermanentMatrixPenalizer",
                dict(
                    name="norm(gamma)",
                    term_function='lambda training_module: training_module.model.Gamma.norm(float("inf"))',
                    factor=0,
                ),
                dict(
                    name="norm(layers)",
                    term_function='lambda training_module: max([layer.linear.weight.norm(float("inf")) for layer in training_module.model.made.layers])',
                    factor=0,
                ),
            ],
            regularizations=[
                    dict(
                        name="nothing",
                        term_function="lambda batch: torch.zeros(1, device=batch[0].device)",
                        factor="def factor(training_module, results_dict):\n\ttraining_module.loss = results_dict['loss']\n\treturn 0",
                    ),
            ],
        ),
        noise_factor=0,
        optimizer=['torch.optim.Adam', 'torch.optim.Adam'],
        optimizer_parameters=['model.made', 'model.Gamma'],
        optimizer_args=[
            dict(
                weight_decay=0.0001,
            ),
            dict()
        ],
        optimizer_is_active=[
            'lambda training_module: training_module.current_epoch % 10 < 10',
            'lambda training_module: training_module.current_epoch % 10 < 10',
        ],
        tau_scheduler="lambda training_module: max(0.0001, 0.5 * 0.99 ** (training_module.current_epoch // 1000))",
        n_sinkhorn_scheduler="lambda training_module: min(120, max(60, 60 + ((training_module.current_epoch - 60) // 10)))",
        lr=0.001,
        scheduler="torch.optim.lr_scheduler.ExponentialLR",
        scheduler_interval="epoch",
        scheduler_args={"gamma": 0.999},
    )
    try:
        trainer.fit(tm, dm)
    except KeyboardInterrupt:
        print("Keyboard interrupt detected, stopping training")
    print(f"Finished training on {permutation} with loss: {tm.loss}")


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Running experiment with permutation [[0, 4, 7, 5, 1, 6, 3, 2], 0.25]



  | Name  | Type                   | Params
-------------------------------------------------
0 | model | SinkhornOrderDiscovery | 677 K 
-------------------------------------------------
677 K     Trainable params
0         Non-trainable params
677 K     Total params
2.711     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=43` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Finished training on [[0, 4, 7, 5, 1, 6, 3, 2], 0.25] with loss: 4.6111249923706055
Running experiment with permutation [[1, 0, 2, 5, 3, 4, 6, 7], 0.5]



  | Name  | Type                   | Params
-------------------------------------------------
0 | model | SinkhornOrderDiscovery | 677 K 
-------------------------------------------------
677 K     Trainable params
0         Non-trainable params
677 K     Total params
2.711     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=43` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Finished training on [[1, 0, 2, 5, 3, 4, 6, 7], 0.5] with loss: 4.6557512283325195
Running experiment with permutation [[6, 7, 5, 3, 4, 0, 1, 2], 0.375]



  | Name  | Type                   | Params
-------------------------------------------------
0 | model | SinkhornOrderDiscovery | 677 K 
-------------------------------------------------
677 K     Trainable params
0         Non-trainable params
677 K     Total params
2.711     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=43` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Finished training on [[6, 7, 5, 3, 4, 0, 1, 2], 0.375] with loss: 4.504185676574707
Running experiment with permutation [[6, 0, 4, 3, 7, 1, 5, 2], 0.375]



  | Name  | Type                   | Params
-------------------------------------------------
0 | model | SinkhornOrderDiscovery | 677 K 
-------------------------------------------------
677 K     Trainable params
0         Non-trainable params
677 K     Total params
2.711     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=43` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Finished training on [[6, 0, 4, 3, 7, 1, 5, 2], 0.375] with loss: 4.657809734344482
Running experiment with permutation [[2, 5, 3, 1, 0, 6, 4, 7], 0.5]



  | Name  | Type                   | Params
-------------------------------------------------
0 | model | SinkhornOrderDiscovery | 677 K 
-------------------------------------------------
677 K     Trainable params
0         Non-trainable params
677 K     Total params
2.711     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=43` reached.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Finished training on [[2, 5, 3, 1, 0, 6, 4, 7], 0.5] with loss: 4.531513214111328
Running experiment with permutation [[5, 4, 1, 0, 6, 3, 2, 7], 0.0]



  | Name  | Type                   | Params
-------------------------------------------------
0 | model | SinkhornOrderDiscovery | 677 K 
-------------------------------------------------
677 K     Trainable params
0         Non-trainable params
677 K     Total params
2.711     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=43` reached.


Finished training on [[5, 4, 1, 0, 6, 3, 2, 7], 0.0] with loss: 4.327533721923828
