In [None]:
import numpy as np
import visualization.plots 
import datasets.control
import torch
import torch.optim as optim
from torchvision import datasets, transforms
from datasets.mnist1d import load_MNIST1D
from datasets.mnist import load_MNIST

import os
import matplotlib.pyplot as plt

In [None]:
from datasets.control import get_mixture_distribution

mixture_list = [
    # uniform between [-10, -5] and [5, 10]
    ("uniform",
     np.array([-10.0, -5.0]),
     np.array([  5.0, 10.0])),

    # normal with mean [10, 5] and std [2, 12]
    ("normal",
     np.array([10.0,  5.0]),
     np.array([ 2.0, 12.0])),

    # normal centered at  0 with unit variance in both dims
    ("normal",
     np.array([0.0, 0.0]),
     np.array([1.0, 1.0])),

    # normal with mean [3, 6] and std [4, 5]
    ("normal",
     np.array([3.0, 6.0]),
     np.array([4.0, 5.0])),

    # laplace centered at 4 with scale 1 in both dims
    ("laplace",
     np.array([4.0, 4.0]),
     np.array([1.0, 1.0])),

    # normal centered at -5 with std 3 in both dims
    ("normal",
     np.array([-5.0, -5.0]),
     np.array([3.0, 3.0])),

    # exponential with scale=1 in both dims, then shifted by -5
    ("exponential",
     np.array([1.0, 1.0]),    # scale
     np.array([-5.0, -5.0])), # shift
]

weights = [0.1, 0.2, 0.2, 0.1, 0.1, 0.2, 0.1]

data = get_mixture_distribution(mixture_list, weights, size=(100000, 2))

In [None]:
visualization.plots.plot_3d_kde(data)

# Train GMMN

## Parameters

In [None]:
model_dir = "./model_weights"

ENCODER_SAVE_PATH = model_dir + "/autoencoder.pth"
GMMN_SAVE_PATH = model_dir + "/gmmn.pth"

BATCH_SIZE = 1000 # taken from original paper
trainloader, testloader = load_MNIST1D(batch_size = BATCH_SIZE)
# trainloader, testloader = load_MNIST(batch_size = BATCH_SIZE, size=7, flatten=True)
N_INP = next(iter(trainloader))[0].shape[2]
NOISE_SIZE = 10
ENCODED_SIZE = N_INP // 2
N_ENCODER_EPOCHS = 2000
N_GEN_EPOCHS = 2000

if not os.path.exists(model_dir):
    os.mkdir(model_dir)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Train Autoencoder


In [None]:
from models.gmmn.train_autoencoder import train_autoencoder
from visualization.loss import plot_loss

autoencoder, losses_autoencoder = train_autoencoder(trainloader, N_INP, ENCODED_SIZE, N_ENCODER_EPOCHS, device, ENCODER_SAVE_PATH)
plot_loss(losses_autoencoder, title="Autoencoder Loss")

## Continue with GMMN


In [None]:
from models.gmmn.train_gmmn import train_gmmn
from visualization import loss

gmm_net, losses_gmmn = train_gmmn(trainloader, autoencoder, ENCODED_SIZE, NOISE_SIZE, BATCH_SIZE, N_GEN_EPOCHS, device, GMMN_SAVE_PATH)
plot_loss(losses_gmmn, title="GMMN Loss")

## Sample Visualizations

In [None]:
from models.gmmn.gmmn import generate_gmmn_samples
from visualization.visualize_1d_data import visualize_mnist1d

samples, labels = next(iter(trainloader))
visualize_mnist1d(samples, labels, title="Real")
gen_samples = generate_gmmn_samples(gmm_net, autoencoder, NOISE_SIZE, 5)
visualize_mnist1d(gen_samples, labels, title="Generated")

## Bootstrapping Hypothesis Test

In [None]:
from utilities.bootstrapping_test import bootstrap_hypothesis_test

original_data = []
for batch_idx, (data, labels) in enumerate(trainloader):
    original_data.append(data.cpu()) # .cpu() if data is on GPU
original_data = torch.cat(original_data, dim=0)
original_data = np.squeeze(original_data.numpy())
generating_function = generate_gmmn_samples
gen_args = (gmm_net, autoencoder, NOISE_SIZE, 20000)
alpha = 0.05
num_iterations = 1000

bootstrap_hypothesis_test(original_data, generating_function, gen_args, alpha, num_iterations)