# Entropy vs Accuracy

Here we study how starting with a maximum entropy dataset, and therefore, maximising the entropy transfer to the model, results in better trained models

In [1]:
import znrnd

import numpy as np
import optax
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from neural_tangents import stax

import matplotlib.pyplot as plt
import copy



### Trivial test

In [2]:
data_generator = znrnd.data.MNISTGenerator(ds_size=1000)

Metal device set to: Apple M1


2022-05-13 14:28:50.427227: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


In [3]:
model = stax.serial(
    stax.Conv(32, (3, 3)),
    stax.Relu(),
    stax.AvgPool(window_shape=(2, 2), strides=(2, 2)),
    stax.Conv(64, (3, 3)),
    stax.Relu(),
    stax.AvgPool(window_shape=(2, 2), strides=(2, 2)),
    stax.Flatten(),
    stax.Dense(256),
    stax.Relu(),
    stax.Dense(10)
)

In [4]:
production_model = znrnd.models.NTModel(
        nt_module=model,
        optimizer=optax.adam(learning_rate=0.001),
        loss_fn=znrnd.loss_functions.CrossEntropyLoss(classes=10, apply_softmax=False),
        input_shape=(1, 28, 28, 1),
        training_threshold=0.001
    )

In [5]:
test_ds = {
    "inputs": data_generator.ds_test["image"],
    "targets": data_generator.ds_test["label"]
}

In [16]:
maximum_agent = znrnd.agents.ApproximateMaximumEntropy(
    target_network=production_model, 
    data_generator=data_generator,
    samples=30
)

In [17]:

random_agent = znrnd.agents.RandomAgent(
    data_generator=data_generator
)

In [18]:
max_ds = maximum_agent.build_dataset(target_size=30)

In [19]:
ntk = production_model.compute_ntk(x_i=max_ds, normalize=False)

In [20]:
max_entropy = znrnd.analysis.EntropyAnalysis(matrix=ntk["empirical"]).compute_von_neumann_entropy()

In [21]:
max_entropy

DeviceArray(0.5621587-0.j, dtype=complex64)

In [12]:
random_ds = random_agent.build_dataset(target_size=30)

In [13]:
ntk = production_model.compute_ntk(x_i=random_ds, normalize=False)

In [14]:
random_entropy = znrnd.analysis.EntropyAnalysis(matrix=ntk["empirical"]).compute_von_neumann_entropy()

In [15]:
random_entropy

DeviceArray(0.5275276-0.j, dtype=complex64)

In [16]:
max_ds = {
    "inputs": np.take(data_generator.ds_train["image"], maximum_agent.target_indices, axis=0),
    "targets": np.take(data_generator.ds_train["label"], maximum_agent.target_indices, axis=0)
}
random_ds = {
    "inputs": np.take(data_generator.ds_train["image"], random_agent.target_indices, axis=0),
    "targets": np.take(data_generator.ds_train["label"], random_agent.target_indices, axis=0)
}

In [17]:
max_final_entropy = []
random_final_entropy = []
max_losses = []
random_losses = []
max_accuracy = []
random_accuracy = []

for _ in range(5):
    max_model = znrnd.models.NTModel(
        nt_module=model,
        optimizer=optax.adam(learning_rate=0.001),
        loss_fn=znrnd.loss_functions.CrossEntropyLoss(classes=10, apply_softmax=False),
        input_shape=(1, 28, 28, 1),
        training_threshold=0.001
    )
    random_model = znrnd.models.NTModel(
        nt_module=model,
        optimizer=optax.adam(learning_rate=0.001),
        loss_fn=znrnd.loss_functions.CrossEntropyLoss(classes=10, apply_softmax=False),
        input_shape=(1, 28, 28, 1),
        training_threshold=0.001
    )
    max_loss, max_acc, _ = max_model.train_model(
        train_ds=max_ds, test_ds=test_ds, batch_size=10, epochs=500
    )
    random_loss, random_acc = random_model.train_model(
        train_ds=random_ds, test_ds=test_ds, batch_size=10, epochs=500
    )
    final_max_ntk = max_model.compute_ntk(max_ds["inputs"], normalize=False)
    max_final_entropy.append(znrnd.analysis.EntropyAnalysis(
        final_max_ntk["empirical"]
    ).compute_von_neumann_entropy())
    final_random_ntk = random_model.compute_ntk(random_ds["inputs"], normalize=False)
    random_final_entropy.append(znrnd.analysis.EntropyAnalysis(
        final_random_ntk["empirical"]
    ).compute_von_neumann_entropy())
    
    max_losses.append(max_loss)
    max_accuracy.append(max_acc)
    random_losses.append(random_loss)
    random_accuracy.append(random_acc)

Epoch: 500: 100%|██████████████| 500/500 [01:29<00:00,  5.58batch/s, accuracy=0.486, test_loss=7.63]


ValueError: too many values to unpack (expected 2)

### Analysis

In [None]:
max_delta_s = max_entropy - np.array(max_final_entropy)

In [None]:
max_delta_s_error = np.std(max_delta_s) / np.sqrt(5)
max_delta_s_ave = np.real(np.mean(max_delta_s))

In [None]:
random_delta_s = random_entropy - np.array(random_final_entropy)

In [None]:
random_delta_s_error = np.std(random_delta_s) / np.sqrt(5)
random_delta_s_ave = np.real(np.mean(random_delta_s))

In [None]:
max_min_loss = np.min(max_losses, axis=0).mean()
random_min_loss = np.min(random_losses, axis=0).mean()

In [None]:
plt.errorbar(
    [max_min_loss, random_min_loss], 
    [max_delta_s_ave, random_delta_s_ave], 
    yerr=[float(max_delta_s_error), float(random_delta_s_error)],
    marker='o'
)
plt.show()

In [18]:
start_entropy = []
final_entropy = []
final_entropy_error = []
losses = {}
accuracy = {}
train_metrics = {}

for i in range(30):
    random_ds = random_agent.build_dataset(target_size=30)
    ntk = production_model.compute_ntk(x_i=random_ds, normalize=False)
    start_entropy.append(
        znrnd.analysis.EntropyAnalysis(
            matrix=ntk["empirical"]
        ).compute_von_neumann_entropy()
    )
    random_ds = {
    "inputs": np.take(data_generator.ds_train["image"], random_agent.target_indices, axis=0),
    "targets": np.take(data_generator.ds_train["label"], random_agent.target_indices, axis=0)
}
    entropy = []
    losses[i] = []
    accuracy[i] = []
    train_metrics[i] = []
    for _ in range(5):
        random_model = znrnd.models.NTModel(
            nt_module=model,
            optimizer=optax.adam(learning_rate=0.001),
            loss_fn=znrnd.loss_functions.CrossEntropyLoss(classes=10, apply_softmax=False),
            input_shape=(1, 28, 28, 1),
            training_threshold=0.001
        )
        random_loss, random_acc, training_metrics = random_model.train_model(
            train_ds=random_ds, test_ds=test_ds, batch_size=10, epochs=500
        )

        final_random_ntk = random_model.compute_ntk(random_ds["inputs"], normalize=False)
        entropy.append(znrnd.analysis.EntropyAnalysis(
            final_random_ntk["empirical"]
        ).compute_von_neumann_entropy())

        losses[i].append(random_loss)
        accuracy[i].append(random_acc)
        train_metrics[i].append(training_metrics)
        
    final_entropy.append(np.mean(entropy))
    final_entropy.append(np.std(entropy) / np.sqrt(5))

Epoch: 500: 100%|██████████████| 500/500 [01:15<00:00,  6.64batch/s, accuracy=0.544, test_loss=3.39]
Epoch: 500: 100%|██████████████| 500/500 [01:14<00:00,  6.67batch/s, accuracy=0.582, test_loss=2.74]
Epoch: 500: 100%|██████████████| 500/500 [01:15<00:00,  6.64batch/s, accuracy=0.578, test_loss=2.86]
Epoch: 500: 100%|██████████████| 500/500 [01:16<00:00,  6.49batch/s, accuracy=0.571, test_loss=3.16]
Epoch: 500: 100%|██████████████| 500/500 [01:16<00:00,  6.50batch/s, accuracy=0.588, test_loss=3.21]
Epoch: 500: 100%|██████████████| 500/500 [01:16<00:00,  6.52batch/s, accuracy=0.568, test_loss=2.41]
Epoch: 500: 100%|██████████████| 500/500 [01:16<00:00,  6.57batch/s, accuracy=0.582, test_loss=2.35]
Epoch: 500: 100%|██████████████| 500/500 [01:21<00:00,  6.13batch/s, accuracy=0.562, test_loss=2.47]
Epoch: 500: 100%|██████████████| 500/500 [01:17<00:00,  6.48batch/s, accuracy=0.561, test_loss=2.36]
Epoch: 500: 100%|██████████████| 500/500 [01:16<00:00,  6.54batch/s, accuracy=0.571, test_l

Epoch: 500: 100%|██████████████| 500/500 [01:17<00:00,  6.47batch/s, accuracy=0.532, test_loss=4.59]
Epoch: 500: 100%|███████████████| 500/500 [01:16<00:00,  6.54batch/s, accuracy=0.53, test_loss=4.94]
Epoch: 500: 100%|██████████████| 500/500 [01:17<00:00,  6.48batch/s, accuracy=0.519, test_loss=5.18]
Epoch: 500: 100%|██████████████| 500/500 [01:17<00:00,  6.46batch/s, accuracy=0.527, test_loss=5.53]
Epoch: 500: 100%|██████████████| 500/500 [01:17<00:00,  6.45batch/s, accuracy=0.631, test_loss=1.99]
Epoch: 500: 100%|██████████████| 500/500 [01:18<00:00,  6.37batch/s, accuracy=0.634, test_loss=1.87]
Epoch: 500: 100%|██████████████| 500/500 [01:16<00:00,  6.50batch/s, accuracy=0.622, test_loss=2.15]
Epoch: 500: 100%|███████████████| 500/500 [01:16<00:00,  6.51batch/s, accuracy=0.63, test_loss=1.68]
Epoch: 500: 100%|███████████████| 500/500 [01:16<00:00,  6.57batch/s, accuracy=0.629, test_loss=1.8]
Epoch: 500: 100%|██████████████| 500/500 [01:16<00:00,  6.56batch/s, accuracy=0.646, test_l

In [22]:
np.real(final_entropy)[::2]

array([0.34193692, 0.38839158, 0.3253614 , 0.38360772, 0.38047135,
       0.36951584, 0.38306445, 0.3529695 , 0.35951358, 0.38687006,
       0.32582137, 0.3844153 , 0.39197475, 0.39165777, 0.35401005,
       0.36168128, 0.30850658, 0.33451253, 0.37175766, 0.35899752,
       0.3832767 , 0.31578174, 0.37504977, 0.35646746, 0.3734667 ,
       0.35389709, 0.34924886, 0.35546026, 0.36773968, 0.37952518])

In [23]:
np.real(start_entropy)

array([0.492567  , 0.5426477 , 0.4761879 , 0.5166925 , 0.51928097,
       0.49038827, 0.5158705 , 0.48129734, 0.50130594, 0.53730965,
       0.482004  , 0.5239529 , 0.56094676, 0.51805216, 0.47833666,
       0.51683843, 0.48890436, 0.5154919 , 0.506537  , 0.5080511 ,
       0.52154684, 0.47180566, 0.54042846, 0.5274191 , 0.5196627 ,
       0.49716073, 0.48876482, 0.5090897 , 0.50207394, 0.5271589 ],
      dtype=float32)

In [24]:
delta_s = np.real(start_entropy) - np.real(final_entropy)[::2]

In [26]:
delta_s

array([0.15063009, 0.15425614, 0.15082651, 0.1330848 , 0.13880962,
       0.12087244, 0.13280606, 0.12832785, 0.14179236, 0.15043959,
       0.15618262, 0.1395376 , 0.16897202, 0.12639439, 0.12432662,
       0.15515715, 0.18039778, 0.18097937, 0.13477936, 0.14905357,
       0.13827014, 0.15602392, 0.16537869, 0.17095163, 0.14619598,
       0.14326364, 0.13951597, 0.15362945, 0.13433427, 0.14763373])

In [42]:
loss_arr = [min(np.mean(losses[item], axis=0)) for item in losses]

In [22]:
plt.plot(np.real(final_entropy)[::2], loss_arr, '.')

NameError: name 'final_entropy' is not defined