# Circuit learning module: Lambeq's QuantumTrainer

This module performs the optimization with Lambeq's native optimizer. Because the circuits are constructed with Lambeq and DisCoPy, this optimizer is the natural choice.

In [1]:
import json
import os
import glob
import warnings
from pathlib import Path
import numpy as np
import pickle

from discopy.utils import loads
from pytket.extensions.qiskit import AerBackend
from lambeq import TketModel
from lambeq import QuantumTrainer, SPSAOptimizer
from lambeq import Dataset

this_folder = os.path.abspath(os.getcwd())
warnings.filterwarnings('ignore')
os.environ['TOKENIZERS_PARALLELISM'] = 'true'

BATCH_SIZE = 60
EPOCHS = 1000
SEED = 2

  from tqdm.autonotebook import tqdm


## Read circuit data

We read the circuits from the pickled files.

In [2]:
training_circuits_paths = glob.glob(this_folder + "//simplified-JOB-diagrams//circuits//training//[0-9]*.p")
test_circuits_paths = glob.glob(this_folder + "//simplified-JOB-diagrams//circuits//test//[0-9]*.p")

def read_diagrams(circuit_paths):
    circuits = {}
    for serialized_diagram in circuit_paths:
        base_name = Path(serialized_diagram).stem
        f = open(serialized_diagram, "rb")
        diagram = pickle.load(f)
        circuits[base_name] = diagram
    return circuits


training_circuits = read_diagrams(training_circuits_paths)
test_circuits = read_diagrams(test_circuits_paths)

## Read training and test data

In [3]:
training_data, test_data = None, None
with open(this_folder + "//data//training_data.json", "r") as inputfile:
    training_data = json.load(inputfile)['training_data']
with open(this_folder + "//data//test_data.json", "r") as inputfile:
    test_data = json.load(inputfile)['test_data']
    

def time_to_states(data):
    labeled_data = {}
    for elem in data:
        if elem["time"] < 2001:
            labeled_data[elem["name"]] = [1,0] # corresponds to |0>
        else:
            labeled_data[elem["name"]] = [0,1] # corresponds to |1>
    return labeled_data


training_data_labels = time_to_states(training_data)
test_data_labels = time_to_states(test_data)

## Lambeq optimizer

In [4]:
all_circuits = list(training_circuits.values()) + list(test_circuits.values())

#print(all_circuits[0].free_symbols)

print("Total number of circuits: ", len(all_circuits))
print("Total number of variables: ", len([sym for circuit in all_circuits for sym in circuit.free_symbols]))

backend = AerBackend()
backend_config = {
    'backend': backend,
    'compilation': backend.default_compilation_pass(2),
    'shots': 8192
}

model = TketModel.from_diagrams(all_circuits, backend_config=backend_config)

Total number of circuits:  35
Total number of variables:  1158


## Loss function and evaluation

In [5]:
loss = lambda y_hat, y: -np.sum(y * np.log(y_hat)) / len(y)  # binary cross-entropy loss
acc = lambda y_hat, y: np.sum(np.round(y_hat) == y) / len(y) / 2  # half due to double-counting
eval_metrics = {"acc": acc}

## Trainer

In [6]:
trainer = QuantumTrainer(
    model,
    loss_function=loss,
    epochs=EPOCHS,
    optimizer=SPSAOptimizer,
    optim_hyperparams={'a': 0.05, 'c': 0.06, 'A':0.01*EPOCHS},
    evaluate_functions=eval_metrics,
    evaluate_on_train=True,
    verbose = 'text',
    seed=SEED
)

In [None]:
def randint(low=-1 << 31, high=(1 << 31)-1):
    return np.random.randint(low, high, dtype = 'int32')

print(randint())

## Training dataset

In [None]:
train_dataset = Dataset(
            list(training_circuits.values()),
            list(training_data_labels.values()),
            batch_size=BATCH_SIZE)

val_dataset = Dataset(list(test_circuits.values()), list(test_data_labels.values()), shuffle=False)

trainer.fit(train_dataset, val_dataset, evaluation_step=1, logging_step=100)

Epoch 1:     train/loss: 2.4855   valid/loss: 2.0349   train/acc: 0.4400   valid/acc: 0.5000
