In [1]:
import torch
import pytorch_lightning as pl
import importlib
import numpy as np
import wandb
# from triplet_vade import TripletVaDE
import triplet_vade
import pl_modules
importlib.reload(triplet_vade)
from autoencoder import SimpleAutoencoder, VaDE, ClusteringEvaluationCallback, cluster_acc

In [None]:
hparams = {'lr': 2e-3, 'lr_gmm': 2e-3, 'batch_size': 2**8, 'pretrain_epochs': 20, 'triplet_loss_margin': 0.5, 'triplet_loss_alpha': 100., 'warmup_epochs':10, 'triplet_loss_alpha_kl': 1.}

model = pl_modules.PLVaDE(n_neurons=[784, 512, 512, 2048, 10], device='cuda', k=10, lr=hparams['lr'], 
            pretrain_epochs=hparams['pretrain_epochs'], covariance_type='diag', batch_size=hparams['batch_size'])



In [29]:
true_y, predicted_y, X_encoded = model.cuda().cluster_data()
print(cluster_acc(true_y, predicted_y))
predicted_y

0.3528428571428571


array([7, 1, 6, ..., 5, 1, 1])

In [24]:
true_y, predicted_y, X_encoded = model.pretrained_model.cuda().cluster_data()
print(cluster_acc(true_y, predicted_y))

0.7628


In [4]:
bx, by = next(iter(model.train_dataloader()))

In [55]:
model.init_gmm.means_, model.model.component_distribution.mean

(array([[ 0.9055791 ,  1.0950064 ,  0.75421312, -0.80641784,  0.23846424,
         -0.59620896, -0.07278592, -0.24722536, -0.40205017,  0.47454988],
        [-1.19750139,  0.10071502, -1.59797957, -1.42656271, -0.66573717,
          2.34663196, -1.3362615 , -1.96449335, -1.74677707, -1.8560651 ],
        [-0.5436473 , -0.44061554,  0.68548113, -0.49700445, -0.98773744,
         -0.09343752, -0.53467973,  0.71507886, -1.75551042,  0.29266045],
        [ 1.62304003, -1.30605376,  1.08791255,  0.54501986, -0.09922689,
          2.10611049, -1.06515453, -1.9061877 , -1.5142725 ,  0.37494932],
        [ 2.66404938, -1.48925908, -1.03127394,  0.64042681, -1.62410771,
          1.21854904, -2.36046737, -2.23743817, -1.4381259 ,  1.03853771],
        [ 0.47050186,  0.64338711, -1.17018427,  0.43427443, -1.38957879,
          0.48480622, -2.03081257, -0.87171628, -2.30156212, -0.17873957],
        [-0.52530547,  1.1544921 , -0.71658268,  0.5025238 ,  0.68580741,
          0.10497609, -0.4064432

In [54]:
model.init_gmm.covariances_, model.model.component_distribution.variance

(array([[0.5252965 , 0.33562909, 0.27581712, 0.50842733, 0.39565572,
         0.33949894, 0.16322342, 0.42585222, 0.3483301 , 0.65194756],
        [1.50162612, 1.33836077, 1.48422855, 0.95791283, 1.84342354,
         0.82042596, 2.1170919 , 1.11774057, 0.63140629, 1.07474976],
        [0.64180652, 0.60736693, 0.48753873, 0.5579199 , 0.85338415,
         0.79404369, 0.5185862 , 0.77819113, 0.45736378, 0.79952175],
        [0.26511239, 0.19691297, 0.38045161, 0.14650245, 0.39999043,
         0.23335694, 0.4270741 , 0.21161547, 0.34734943, 0.26272527],
        [0.2467947 , 0.2699257 , 1.16019882, 0.2133611 , 0.5517398 ,
         0.31045453, 0.41250295, 0.16990584, 0.31336475, 0.2071866 ],
        [0.80433203, 1.00185418, 1.35164413, 0.46503722, 1.21439897,
         0.81042522, 0.65177523, 2.22171711, 0.86698844, 0.85895694],
        [0.9154603 , 0.7562054 , 0.86345438, 0.56374431, 0.9121497 ,
         1.03155643, 0.72933431, 0.71097725, 0.49804208, 0.8270209 ],
        [1.36798733, 0.7330

In [53]:
print(model.init_gmm.predict_proba(X_encoded.cpu().detach().numpy()))
log_p_z_given_c = model.model.component_distribution.log_prob(X_encoded[:,None])
labels = (log_p_z_given_c + model.model.mixture_logits).softmax(dim=-1)
labels

[[7.35093631e-12 1.00669491e-06 8.61869720e-06 ... 3.51635538e-06
  1.01730091e-14 1.57751178e-05]
 [8.59192442e-01 3.73479555e-09 1.32073980e-01 ... 4.71088685e-04
  5.82529515e-03 1.79475360e-04]
 [1.47365354e-06 5.86144309e-05 8.55498211e-07 ... 1.65824201e-03
  6.60718538e-08 1.28938732e-05]
 ...
 [3.89746973e-17 1.63948268e-03 4.53787269e-10 ... 1.76296459e-06
  5.89301168e-06 4.23917178e-05]
 [1.05422869e-06 7.55600080e-07 9.99094418e-01 ... 2.09262708e-05
  7.28132063e-05 6.63041308e-04]
 [2.15510294e-24 2.43369646e-01 4.79198409e-09 ... 1.63444334e-07
  8.23774116e-12 7.54448322e-01]]


tensor([[1.9404e-08, 2.4331e-05, 9.9107e-01,  ..., 7.6886e-03, 5.1221e-11,
         1.2077e-03],
        [1.1910e-08, 2.7586e-04, 1.5686e-02,  ..., 1.5497e-03, 2.1305e-07,
         1.5315e-01],
        [1.5638e-13, 6.7050e-05, 1.4360e-05,  ..., 2.1732e-07, 5.5227e-05,
         3.9823e-07],
        ...,
        [2.8464e-24, 5.4530e-03, 2.5936e-09,  ..., 3.9161e-07, 6.6599e-02,
         1.5360e-04],
        [6.7088e-14, 6.8942e-03, 7.1845e-03,  ..., 3.4066e-01, 4.8533e-13,
         6.2850e-01],
        [6.1804e-22, 9.1590e-01, 4.1389e-06,  ..., 1.4146e-02, 6.9616e-07,
         2.1006e-02]], device='cuda:0', grad_fn=<SoftmaxBackward>)

In [34]:
X_encoded = model.model.latent_dist(model.model.encoder(bx.cuda())).mean
X_encoded_pre = model.pretrained_model.encoder(bx.cuda())
print(model.model.out_dist(model.model.decoder(X_encoded)).mean)
(model.pretrained_model.decoder(X_encoded_pre) == model.model.out_dist(model.model.decoder(X_encoded)).mean)

tensor([[6.9932e-09, 6.8488e-09, 3.4083e-08,  ..., 3.9841e-09, 5.3332e-09,
         1.7465e-08],
        [1.4790e-12, 6.8253e-12, 7.2082e-12,  ..., 1.2044e-12, 1.4937e-12,
         2.7497e-12],
        [1.5625e-10, 6.6934e-10, 1.5105e-10,  ..., 5.8568e-10, 7.9601e-11,
         2.3530e-10],
        ...,
        [2.9426e-10, 1.9467e-10, 1.2334e-10,  ..., 4.2031e-10, 3.6670e-10,
         3.4525e-11],
        [8.6637e-13, 4.4669e-13, 5.2528e-12,  ..., 7.7404e-13, 2.1593e-12,
         1.5315e-12],
        [4.0634e-08, 2.3921e-08, 1.2632e-08,  ..., 2.6452e-08, 1.4602e-08,
         3.2272e-08]], device='cuda:0', grad_fn=<SigmoidBackward>)


tensor([[True, True, True,  ..., True, True, True],
        [True, True, True,  ..., True, True, True],
        [True, True, True,  ..., True, True, True],
        ...,
        [True, True, True,  ..., True, True, True],
        [True, True, True,  ..., True, True, True],
        [True, True, True,  ..., True, True, True]], device='cuda:0')

In [28]:
print(model.model.encoder(bx.cuda()).mean())
model.pretrained_model.encoder[:-1](bx.cuda()).mean()

tensor(0.0465, device='cuda:0', grad_fn=<MeanBackward0>)


tensor(0.0465, device='cuda:0', grad_fn=<MeanBackward0>)

In [3]:
hparams = {'lr': 2e-3, 'lr_gmm': 2e-3, 'batch_size': 2**8, 'pretrain_epochs': 100, 'triplet_loss_margin': 0.5, 'triplet_loss_alpha': 100., 'warmup_epochs':10, 'triplet_loss_alpha_kl': 1., 'data_size': 1024}

model = pl_modules.PLVaDE(n_neurons=[784, 512, 512, 2048, 10], device='cuda', k=10, lr=hparams['lr'], 
            pretrain_epochs=hparams['pretrain_epochs'], covariance_type='full', batch_size=hparams['batch_size'],
            data_size=hparams['data_size'])

logger = pl.loggers.WandbLogger(project='VADE')
trainer = pl.Trainer(gpus=1, logger=logger, callbacks=[ClusteringEvaluationCallback()], log_every_n_steps=10, progress_bar_refresh_rate=20, max_epochs=5)
trainer.fit(model)
wandb.finish()

GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[34m[1mwandb[0m: wandb version 0.10.18 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade



  | Name    | Type       | Params
---------------------------------------
0 | encoder | Sequential | 1.7 M 
1 | decoder | Sequential | 1.7 M 
---------------------------------------
3.5 M     Trainable params
0         Non-trainable params
3.5 M     Total params


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

GPU available: True, used: True
TPU available: None, using: 0 TPU cores
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name             | Type              | Params
-------------------------------------------------------
0 | pretrained_model | SimpleAutoencoder | 3.5 M 
1 | model            | VaDE              | 3.5 M 
-------------------------------------------------------
7.0 M     Trainable params
0         Non-trainable params
7.0 M     Total params


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
loss,98.55799
bce_loss,0.10185
_step,5873.0
_runtime,252.0
_timestamp,1613079211.0
epoch,4.0
NMI,0.84802
ACC,0.80971
ACC2,0.86263
ARI,0.77793


0,1
loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█▆▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅▅
bce_loss,█▅▃▃▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▆▃▂▂▂▁▁▁▂▂▁▁▂▁▁▁▂▁▂▁▂
_step,▁▁▁▂▂▂▃▃▃▄▄▄▅▅▅▅▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇███████
_runtime,▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇████
_timestamp,▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇████
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██▁▁▂▂▂
NMI,▁▅▆▆▆▆▇▇▇▇▇▇█████████████
ACC,▁▄▅▅▅▅▆▇▇▇▇▇▇▇▇▇▇▇█▇▇▇▇▇▇
ACC2,▁▅▆▅▆▆▆▇▇▇▇▇█████████████
ARI,▁▄▅▅▅▅▆▇▇▇▇▇██▇██████████


In [None]:
hparams = {'lr': 2e-3, 'lr_gmm': 2e-3, 'batch_size': 2**8, 'pretrain_epochs': 20, 'triplet_loss_margin': 0.5, 'triplet_loss_alpha': 100., 'warmup_epochs':10, 'triplet_loss_alpha_kl': 1.}
for i in range(3):
    triplets_model = triplet_vade.TripletVaDE(n_neurons=[784, 512, 512, 2048, 10], device='cpu', **hparams)
    # wandb.init()
    logger =  pl.loggers.wandb.WandbLogger(project='VaDE Triplets', group='test_triplet_loss')

    callbacks = [ClusteringEvaluationCallback(on_start=True), pl.callbacks.LearningRateMonitor()]
    logger.log_hyperparams(hparams)
    trainer = pl.Trainer(gpus=0, callbacks=callbacks,
                        logger=logger, log_every_n_steps=10, progress_bar_refresh_rate=10, max_epochs=50)
    trainer.fit(triplets_model)
    wandb.finish()
