# Circuit learning module: Lambeq's QuantumTrainer

This module performs the optimization with Lambeq's native optimizer. Because the circuits are constructed with Lambeq and DisCoPy, this optimizer is the natural choice. The code is based on the workflow presented in https://github.com/CQCL/lambeq/blob/main/docs/examples/quantum_pipeline.ipynb.

In [1]:
import warnings
warnings.filterwarnings('ignore')

import json
import os
import glob
from pathlib import Path
import numpy as np
import pickle

from discopy.utils import loads
from pytket.extensions.qiskit import AerBackend
from lambeq import TketModel, NumpyModel
from lambeq import QuantumTrainer, SPSAOptimizer
from lambeq import Dataset

this_folder = os.path.abspath(os.getcwd())
os.environ['TOKENIZERS_PARALLELISM'] = 'true'
#os.environ["JAX_PLATFORMS"] = "cpu"

BATCH_SIZE = 32
EPOCHS = 100
SEED = 0

## Read circuit data

We read the circuits from the pickled files.

In [2]:
training_circuits_paths = glob.glob(this_folder + "//simplified-JOB-diagrams//circuits//binary_classification//training//[0-9]*.p")
#validation_circuits_paths = glob.glob(this_folder + "//simplified-JOB-diagrams//circuits//binary_classification//validation//[0-9]*.p")
test_circuits_paths = glob.glob(this_folder + "//simplified-JOB-diagrams//circuits//binary_classification//test//[0-9]*.p")

def read_diagrams(circuit_paths):
    circuits = {}
    for serialized_diagram in circuit_paths:
        base_name = Path(serialized_diagram).stem
        f = open(serialized_diagram, "rb")
        diagram = pickle.load(f)
        circuits[base_name] = diagram
    return circuits


training_circuits = read_diagrams(training_circuits_paths[:100])
#for key in training_circuits:
#    print("training query: ", key)
test_circuits = read_diagrams(test_circuits_paths) #+ [test_circuits_paths[2]] + test_circuits_paths[8:])
#test_circuits = read_diagrams([test_circuits_paths[2]])
#for key in test_circuits:
#    print("test query: ", key)

## Read training and test data

In [3]:
training_data, test_data = None, None
with open(this_folder + "//data//training_data.json", "r") as inputfile:
    training_data = json.load(inputfile)['training_data']
with open(this_folder + "//data//test_data.json", "r") as inputfile:
    test_data = json.load(inputfile)['test_data']
    

def time_to_states(data, circuits):
    labeled_data = {}
    for elem in data:
        if elem["name"] in circuits.keys():
            if elem["time"] < 5000:
                labeled_data[elem["name"]] = [1,0] # corresponds to |0>
            else:
                labeled_data[elem["name"]] = [0,1] # corresponds to |1>
    return labeled_data


training_data_labels = time_to_states(training_data, training_circuits)
test_data_labels = time_to_states(test_data, test_circuits)

#for key in training_data_labels:
#    print("training: ", key)
#for key in test_data_labels:
#    print("test ", key)

## Lambeq optimizer

In [4]:
#all_circuits = list(training_circuits.values()) + list(test_circuits.values())

training_circuits_l = []
test_circuits_l = []
training_data_labels_l = []
test_data_labels_l = []

# Organize circuits and labels in correct order into two lists which will be input for training the model
for key in training_data_labels:
    training_circuits_l.append(training_circuits[key])
    training_data_labels_l.append(training_data_labels[key])

for key in test_data_labels:
    test_circuits_l.append(test_circuits[key])
    test_data_labels_l.append(test_data_labels[key])

all_circuits = training_circuits_l + test_circuits_l

train_syms = set([sym for circuit in training_circuits.values() for sym in circuit.free_symbols])
test_syms = set([sym for circuit in test_circuits.values() for sym in circuit.free_symbols])

print("Test circuits need to share training circuits' parameters. The parameters that are not covered: ", test_syms.difference(train_syms))

print("Total number of circuits: ", len(all_circuits))
print("Total number of variables: ", len(train_syms))

backend = AerBackend()
backend_config = {
    'backend': backend,
    'compilation': backend.default_compilation_pass(2),
    'shots': 32768
}

Test circuits need to share training circuits' parameters. The parameters that are not covered:  {'mini biography'__n.l_2, 'top 250 rank'__n.l_2, 'character-name-in-title'__n.l_2, '[us]'__n.l_2, 'character-name-in-title'__n.l_1, 'character-name-in-title'__n.l_0, country_code__n.l_0, country_code__n.l_1, 'Volker Boehm'__n.l_1, '%Film%'__n.l_2, '%Film%'__n.l_0, 'Volker Boehm'__n.l_2, '%sequel%'__n.l_1, '%Film%'__n.l_1, 'mini biography'__n.l_0, 'bottom 10 rank'__n.l_1, 'bottom 10 rank'__n.l_0, '%sequel%'__n.l_2, '%sequel%'__n.l_0, 'Volker Boehm'__n.l_0, 'mini biography'__n.l_1, 'top 250 rank'__n.l_1, '[us]'__n.l_1, '[us]'__n.l_0, country_code__n.l_2, 'bottom 10 rank'__n.l_2, 'top 250 rank'__n.l_0}
Total number of circuits:  290
Total number of variables:  207


## Model

Select the used model between `TketModel` or `NumpyModel`. `NumpyModel` can use JAX which speeds up the training.

In [5]:
#model = TketModel.from_diagrams(all_circuits, backend_config=backend_config)
model = NumpyModel.from_diagrams(all_circuits, use_jit=True)
model.initialise_weights()

## Loss function and evaluation

In [6]:
def acc(y_hat, y):
    #print("y_hat: ", y_hat)
    #print("y: ", y)
    return (np.sum(np.round(y_hat) == y) / len(y)) / 2

loss = lambda y_hat, y: -np.sum(y * np.log(y_hat)) / len(y)  # binary cross-entropy loss
#acc = lambda y_hat, y: np.sum(np.round(y_hat) == y) / len(y) / 2  # half due to double-counting

eval_metrics = {"acc": acc}

## Trainer

In [7]:
trainer = QuantumTrainer(
    model,
    loss_function=loss,
    epochs=EPOCHS,
    optimizer=SPSAOptimizer,
    optim_hyperparams={'a': 1, 'c': 0.1, 'A':0.01*EPOCHS},
    evaluate_functions=eval_metrics,
    evaluate_on_train=True,
    verbose = 'text',
    seed=SEED
)

## Training dataset

In [8]:
train_dataset = Dataset(training_circuits_l, training_data_labels_l)

#test_dataset = Dataset(test_circuits_l, test_data_labels_l, shuffle=False)

trainer.fit(train_dataset, evaluation_step=1, logging_step=1)

Epoch 1:    train/loss: 0.6051   valid/loss: -----   train/acc: 0.7826   valid/acc: -----
Epoch 2:    train/loss: 0.7127   valid/loss: -----   train/acc: 0.2174   valid/acc: -----
Epoch 3:    train/loss: 0.6745   valid/loss: -----   train/acc: 0.7283   valid/acc: -----
Epoch 4:    train/loss: 0.6818   valid/loss: -----   train/acc: 0.7283   valid/acc: -----
Epoch 5:    train/loss: 0.6810   valid/loss: -----   train/acc: 0.7283   valid/acc: -----
Epoch 6:    train/loss: 0.6764   valid/loss: -----   train/acc: 0.7283   valid/acc: -----
Epoch 7:    train/loss: 0.6530   valid/loss: -----   train/acc: 0.7283   valid/acc: -----
Epoch 8:    train/loss: 0.5914   valid/loss: -----   train/acc: 0.7283   valid/acc: -----
Epoch 9:    train/loss: 0.6719   valid/loss: -----   train/acc: 0.7826   valid/acc: -----
Epoch 10:   train/loss: 0.7095   valid/loss: -----   train/acc: 0.2174   valid/acc: -----
Epoch 11:   train/loss: 0.6771   valid/loss: -----   train/acc: 0.7283   valid/acc: -----
Epoch 12: 