# Variational Shadow Quantum Learning

The method of VSQL can be shown in the diagram:

![](https://github.com/tzajack/Quantum-ML-Computing-Learning/blob/main/Variational%20Shadow%20Quantum%20Learning/fig1.png?raw=true)

One can easily see that instead of using a unitary gate which has quantum gates on every qubit in the circuit, VSQL use a unitray gate with less number of qubits and move the unitray gate along all the qubits.

One interesting analogy can be made with CNN. The unitary gate $U(\theta)$ is the kernel in CNN. Number of qubit used in $U(\theta)$ is the size of the kernel.

## Import Packages

In [1]:
import os
os.environ['PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION'] = 'python'
import warnings
warnings.filterwarnings("ignore")

In [2]:
import time
import numpy as np
import paddle
import paddle.nn.functional as F
from paddle.vision.datasets import MNIST
import paddle_quantum
from paddle_quantum.ansatz import Circuit

  from collections import namedtuple, Mapping
  from collections import Mapping, MutableMapping
  from scipy.optimize.optimize import OptimizeResult
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  if data.dtype == np.object:


In [3]:
def data_loading(n_train=1000, n_test=100):
    # We use the MNIST provided by paddle
    train_dataset = MNIST(mode='train')
    test_dataset = MNIST(mode='test')
    # Select data points from category 0 and 1
    train_dataset = np.array([i for i in train_dataset if i[1][0] == 0 or i[1][0] == 1], dtype=object)
    test_dataset = np.array([i for i in test_dataset if i[1][0] == 0 or i[1][0] == 1], dtype=object)
    np.random.shuffle(train_dataset)
    np.random.shuffle(test_dataset)
    # Separate images and labels
    train_images = train_dataset[:, 0][:n_train]
    train_labels = train_dataset[:, 1][:n_train].astype('int64')
    test_images = test_dataset[:, 0][:n_test]
    test_labels = test_dataset[:, 1][:n_test].astype('int64')
    # Normalize data and pad them with zeros
    x_train = norm_img(train_images)
    x_test = norm_img(test_images)
    # Transform integer labels into one-hot vectors
    train_targets = np.array(train_labels).reshape(-1)
    y_train = paddle.to_tensor(np.eye(2)[train_targets])
    test_targets = np.array(test_labels).reshape(-1)
    y_test = paddle.to_tensor(np.eye(2)[test_targets])

    return x_train, y_train, x_test, y_test

The structure of $U(\theta)$ is:
![](https://qml.baidu.com/static/464ec9f483f300e02d4e45131f92a694/fcda8/vsql-fig-2-local.png)

The construction is the same to the one in Quantum Classifier.

In [4]:
# Construct the shadow circuit U(theta)
def U_theta(n, n_qsc, depth):
    # Initialize the circuit
    cir = Circuit(n)
    # Add layers of rotation gates
    for i in range(n_qsc):
        cir.rx(qubits_idx=i)
        cir.ry(qubits_idx=i)
        cir.rx(qubits_idx=i)
        
    # Add D layers of Ry and CNOT after each pair
    for repeat in range(1, depth + 1):
        for i in range(n_qsc - 1):
            cir.cnot([i, i + 1])
        cir.cnot([n_qsc - 1, 0])
        for i in range(n_qsc):
            cir.ry(qubits_idx=i)

    return cir

Before complete the sliding of $U(\theta)$, first see how sublayers() works.

In [5]:
tmpcir = Circuit(4)
for i in range(2):
    tmpcir.rx(qubits_idx=i)
    tmpcir.ry(qubits_idx=i)
    tmpcir.rx(qubits_idx=i)
tmpcir.rx(qubits_idx=2)
print(tmpcir)
for sublayer in tmpcir.sublayers():
    print(sublayer.qubits_idx)

--Rx(0.239)----Ry(5.652)----Rx(0.521)--
                                       
--Rx(0.006)----Ry(5.197)----Rx(4.125)--
                                       
--Rx(5.493)----------------------------
                                       
---------------------------------------
                                       
[0]
[0]
[0]
[1]
[1]
[1]
[2]


We can see that sublayers() is a iterable variable that scans every qubit of the circuit, if there is no gate on a certain qubit, it will ignore this qubit(the qubit is not deleted, we just do not count the gate on it).

We can implment sliding by changing the qubits_idx of each sublayer

In [6]:
def slide_circuit(cir, distance):
    for sublayer in cir.sublayers():
        qubits_idx = np.array(sublayer.qubits_idx)
        qubits_idx = qubits_idx + distance
        sublayer.qubits_idx = qubits_idx.tolist()

Try to slide the tmpcir downwards to test the function

In [7]:
slide_circuit(tmpcir,1)
print(tmpcir)

---------------------------------------
                                       
--Rx(0.239)----Ry(5.652)----Rx(0.521)--
                                       
--Rx(0.006)----Ry(5.197)----Rx(4.125)--
                                       
--Rx(5.493)----------------------------
                                       


In [8]:
def observable(n_start, n_qsc):
    pauli_str = ','.join('x' + str(i) for i in range(n_start, n_start + n_qsc))
    hamiltonian = paddle_quantum.Hamiltonian([[1.0, pauli_str]])

    return hamiltonian

Noted that the network structure is different from the one in tutorial, here we try to use the approach of the paper

[1] The paper use a $1$ dimension output linear layer and use a sigmoid map to the label.

[2] The paper use MSE loss instead of cross-entropy loss in the tutorial

In [9]:
class Net(paddle.nn.Layer):
    def __init__(self,
                 n, 
                 n_qsc, 
                 depth 
                ):
        super(Net, self).__init__()
        self.n = n
        self.n_qsc = n_qsc
        self.depth = depth
        self.cir = U_theta(self.n, n_qsc=self.n_qsc, depth=self.depth)
        self.fc = paddle.nn.Linear(n - n_qsc + 1, 1,
                                   weight_attr=paddle.ParamAttr(initializer=paddle.nn.initializer.Normal()),
                                   bias_attr=paddle.ParamAttr(initializer=paddle.nn.initializer.Normal()))


    def forward(self, batch_in, label):
        
        dim = len(batch_in)
        features = []
        for state in batch_in:
            _state = paddle_quantum.State(state)
            f_i = []
            for st in range(self.n - self.n_qsc + 1):
                ob = observable(st, n_qsc=self.n_qsc)
                
                #Slide the circuit
                slide_circuit(self.cir, 1 if st != 0 else 0)
                expecval = paddle_quantum.loss.ExpecVal(ob)
                out_state = self.cir(_state)
                
                #Calculate O_i for each slide
                f_ij = expecval(out_state)
                f_i.append(f_ij)
                
            # Slide back to the original position
            slide_circuit(self.cir, -st)
            f_i = paddle.concat(f_i)
            features.append(f_i) 
        features = paddle.stack(features)
        
        #Feed O_i to fully connect NN
        output = self.fc(features)
        #Run Sigmoid to the linear layer [1]
        output = F.sigmoid(output)
       # print('output',output)
        #print('label',label)
        #MSE loss [2]
        lb = paddle.to_tensor(label)
        loss = paddle.mean((output - lb)**2) 
        
        #Calculate accuracy
        is_correct = 0
        is_correct = (paddle.abs(output - lb) < 0.5).nonzero().shape[0]
        acc = is_correct / dim

        return loss, acc

Normal train() function

In [10]:
def train(N, n_qsc, D, EPOCH, LR, BATCH, x_train,y_train,x_test,y_test):
    N_train = len(x_train)
    net = Net(N, n_qsc, depth=D)

    opt = paddle.optimizer.Adam(learning_rate=LR, parameters=net.parameters())

   
    for ep in range(EPOCH):
        for itr in range(N_train // BATCH):
            l = itr*BATCH
            r = min( (itr + 1) * BATCH , N_train )
            loss, batch_acc = net(x_train[l:r],y_train[l:r])
            
            
            loss.backward()
            opt.minimize(loss)
            opt.clear_grad()
            
            
        loss_useless, test_acc = net(x_test,y_test)
        print("epoch:%3d" % ep,  
            "  loss: %.4f" % loss.numpy(),
            "  test acc: %.4f" % test_acc)

For convenience, the MNIST data takes too long to train, so we use the titanic data again

In [11]:
import pandas as pd
train_data = pd.read_csv('train.csv')
train_data.fillna(train_data['Age'].mean(),inplace = True)
train_label = train_data['Survived']
train_data = train_data.drop(['Survived','PassengerId'],axis = 1)
normdata=(train_data-train_data.mean())/train_data.std()
trainX = normdata.to_numpy()
trainY = train_label.to_numpy()
trainY = trainY.reshape(-1,1)
train_x = trainX
train_y = trainY
train_x.shape,train_y.shape


test_data = pd.read_csv('test.csv')
test_data.fillna(test_data['Age'].mean(),inplace = True)
test_data.fillna(test_data['Fare'].mean(),inplace = True)
test_label = test_data['Survived']
test_data = test_data.drop('Survived',axis = 1)
test_data = test_data.drop('PassengerId',axis = 1)

normdata=(test_data-test_data.mean())/test_data.std()
testX = normdata.to_numpy()
testY = test_label.to_numpy()
testY = testY.reshape(-1,1)
test_x = testX
test_y = testY
test_x.shape,test_y.shape

((418, 5), (418, 1))

Encode the data with amplitude encoding first

In [14]:
from paddle_quantum.dataset import *

N=5
n_qsc=3
D=1
EPOCH=3
LR=0.1
BATCH=64
encoding = 'amplitude_encoding'
quantum_train_x= SimpleDataset(5).encode(train_x, encoding, N)
quantum_test_x= SimpleDataset(5).encode(test_x, encoding, N)
quantum_train_x = paddle.to_tensor(quantum_train_x)
quantum_test_x = paddle.to_tensor(quantum_test_x)

Train the model and see the results

In [15]:
time_st = time.time()
train(N, n_qsc, D, EPOCH, LR, BATCH, quantum_train_x, train_y, quantum_test_x, test_y)
print("time used:", time.time() - time_st)

epoch:  0   loss: 0.2412   test acc: 0.6196
epoch:  1   loss: 0.2204   test acc: 0.6411
epoch:  2   loss: 0.2075   test acc: 0.6388
time used: 209.0915026664734
