## Task4: Transforming diagrams into quantum circuits <br>
The diagrams are first optimized using the "remove_cups" method. <br>
Then the reduced diagrams are used to generate the circuits using IQPAnsatz class. <br>

In [1]:
import warnings
warnings.filterwarnings("ignore")

import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [2]:
import numpy as np

BATCH_SIZE = 30
EPOCHS = 120
SEED = 2

In [3]:
def read_data(filename):
    labels, sentences1, sentences2 = [], [], []
    with open(filename) as f:
        for line in f:
            split_list = line.split(",")
            t = int(split_list[2])
            labels.append(t)
            sentences1.append(split_list[0])
            sentences2.append(split_list[1])
    return labels, sentences1, sentences2

all_labels, all_sentences1, all_sentences2 = read_data('MC1.TXT')
all_data = list(zip(all_sentences1[0:80], all_sentences2[0:80]))

train_labels, train_data1, train_data2 = all_labels[0: 80], all_sentences1[0: 80], all_sentences2[0: 80]
test_labels, test_data1, test_data2 = all_labels[80: 100], all_sentences1[80: 100], all_sentences2[80: 100]
test_data =  list(zip(test_data1[80:100], test_data2[80:100]))

## Allowing only s roots

In [None]:
from lambeq import BobcatParser

parser = BobcatParser(verbose='text', root_cats=['S[dcl]', 'S[wq]', 'S[q]', 'S[qem]'])

raw_train_sentences1_diagrams = parser.sentences2diagrams(train_data1)
raw_test_sentences1_diagrams = parser.sentences2diagrams(test_data1)

In [None]:
from lambeq import remove_cups

train_diagrams = [remove_cups(diagram) for diagram in raw_train_sentences1_diagrams]

test_diagrams = [remove_cups(diagram) for diagram in raw_test_sentences1_diagrams]

train_diagrams[0].draw()

In [None]:
from lambeq import AtomicType, IQPAnsatz

ansatz = IQPAnsatz({AtomicType.NOUN: 1, AtomicType.SENTENCE: 1},
                   n_layers=1, n_single_qubit_params=3)

train_s1_circuits = [ansatz(diagram) for diagram in train_diagrams]

test_s1_circuits = [ansatz(diagram) for diagram in test_diagrams]

all_circuits = train_s1_circuits + test_s1_circuits

In [None]:
from pytket.extensions.qiskit import AerBackend
from lambeq import NumpyModel

backend = AerBackend()
backend_config = {
    'backend': backend,
    'compilation': backend.default_compilation_pass(2),
    'shots': 8192
}
model = NumpyModel.from_diagrams(all_circuits, use_jit=True)

In [None]:
loss = lambda y_hat, y: -np.sum(y * np.log(y_hat[:, 1])) / len(y)  # binary cross-entropy loss
acc = lambda y_hat, y: np.sum(np.round(y_hat[:, 1]) == y) / len(y) / 2  # half due to double-counting


In [None]:
from lambeq import QuantumTrainer, SPSAOptimizer

trainer = QuantumTrainer(
    model,
    loss_function=loss,
    epochs=EPOCHS,
    optimizer=SPSAOptimizer,
    optim_hyperparams={'a': 0.05, 'c': 0.06, 'A':0.01*EPOCHS},
    evaluate_functions={'acc': acc},
    evaluate_on_train=True,
    verbose = 'text',
    seed=0
)

In [None]:
from lambeq import Dataset

train_dataset = Dataset(
            train_s1_circuits,
            train_labels,
            batch_size=BATCH_SIZE)

val_dataset = Dataset(test_s1_circuits, test_labels, shuffle=False)
print(type(train_dataset))

In [None]:
trainer.fit(train_dataset, val_dataset, logging_step=12)

In [None]:
import matplotlib.pyplot as plt

fig, ((ax_tl, ax_tr), (ax_bl, ax_br)) = plt.subplots(2, 2, sharex=True, sharey='row', figsize=(10, 6))
ax_tl.set_title('Training set')
ax_tr.set_title('Development set')
ax_bl.set_xlabel('Iterations')
ax_br.set_xlabel('Iterations')
ax_bl.set_ylabel('Accuracy')
ax_tl.set_ylabel('Loss')

colours = iter(plt.rcParams['axes.prop_cycle'].by_key()['color'])
ax_tl.plot(trainer.train_epoch_costs, color=next(colours))
ax_bl.plot(trainer.train_results['acc'], color=next(colours))
ax_tr.plot(trainer.val_costs, color=next(colours))
ax_br.plot(trainer.val_results['acc'], color=next(colours))

test_acc = acc(model(test_circuits), test_labels)
print('Test accuracy:', test_acc)