In [20]:
from model import GPT, GPTConfig
import torch
import torch.nn as nn
import numpy as np
from torch.nn import functional as F
from Pennylane.Circuit_generate import gate_generate, QuantumEmbedding
from dataload import get_data
from torch.utils.data import DataLoader, TensorDataset
import pennylane as qml
import matplotlib.pyplot as plt

In [21]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

X_train, X_eval, y_train, y_eval  = get_data(name='iris')
n,p = X_train.shape

"""If use qubit matching, set num_qubits = p  otherwise, it is up to user's choice"""
num_qubits = 4
op_pool_size = p*4*num_qubits
max_gate = 16           # senario legth

# TensorDataset을 사용하여 데이터셋 생성
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train, dtype=torch.int32).to(device)

X_eval_tensor = torch.tensor(X_eval, dtype=torch.float32).to(device)
y_eval_tensor = torch.tensor(y_eval, dtype=torch.int32).to(device)

# TensorDataset을 사용하여 데이터셋 생성
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)

# DataLoader로 배치 단위로 데이터 로드
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

In [22]:
## Define quantum embedding circuit
dev = qml.device('default.qubit', wires=num_qubits)

@qml.qnode(dev, interface='torch')
def quantum_circuit(inputs):
    tokens = inputs[:17]
    data = inputs[17:]
    QuantumEmbedding(tokens, n_qubits = num_qubits, data = data, matching=False)
    return qml.state()

In [23]:
## Transformer model
class Transformer(GPT):
    def forward(self, idx):
        device = idx.device
        b, t = idx.size()
        pos = torch.arange(0, t, dtype=torch.long, device=device) # shape (t)

        # forward the GPT model itself
        tok_emb = self.transformer.wte(idx) # token embeddings of shape (b, t, n_embd)
        pos_emb = self.transformer.wpe(pos) # position embeddings of shape (t, n_embd)
        x = self.transformer.drop(tok_emb + pos_emb)
        
        for block in self.transformer.h:
            x = block(x)
        x = self.transformer.ln_f(x)
        logits = self.lm_head(x)
        return logits
    
    def generate(self, n_sequences, max_new_tokens, temperature=1., device="cpu"):
        idx = torch.zeros(size=(n_sequences, 1), dtype=int, device=device)
        total_logits = torch.zeros(size=(n_sequences, 1), device=device)
        for _ in range(max_new_tokens):
            # if the sequence context is growing too long we must crop it at block_size
            idx_cond = idx if idx.size(1) <= self.config.block_size else idx[:, -self.config.block_size:]
            # forward the model to get the logits for the index in the sequence
            logits = self(idx_cond)
            # pluck the logits at the final step
            logits = logits[:, -1, :]
            # set the logit of the first token so that its probability will be zero
            logits[:, 0] = -float("inf")
            # apply softmax to convert logits to (normalized) probabilities and scale by desired temperature
            probs = F.softmax(logits / temperature, dim=-1)
            # sample from the distribution
            idx_next = torch.multinomial(probs, num_samples=1)
            # # Accumulate logits
            total_logits += torch.gather(logits, index=idx_next, dim=1)
            # append sampled index to the running sequence and continue
            idx = torch.cat((idx, idx_next), dim=1)
        return idx, total_logits

In [24]:
## Hybrid network
class GNQE(nn.Module):
    def __init__(self, vocab_size, block_size, dropout, n_layer, n_head, n_embd, bias):
        super(GNQE, self).__init__()
        self.transformer_decoder = Transformer((GPTConfig(
            vocab_size=vocab_size,
            block_size=block_size,
            dropout=dropout,
            n_layer = n_layer,
            n_head = n_head,
            n_embd = n_embd,
            bias=False)
            ))
        
        self.quantum_layer = qml.qnn.TorchLayer(quantum_circuit, 
                                                weight_shapes={})
    
    def gpt_forward(self, idx):
        logit = self.transformer_decoder.forward(idx)
        return logit

    def token_generate(self, max_new_tokens, temperature, device):
        tokens, logit = self.transformer_decoder.generate(n_sequences = 1, max_new_tokens = max_new_tokens,temperature=temperature, device=device)
        return tokens, logit
    
    def Qembedding(self, tokens, data, device):
        state = self.quantum_layer(torch.cat((tokens, data), dim=0).to(device))
        return state

In [25]:
model = GNQE(
    vocab_size = op_pool_size + 1,
    block_size = max_gate,
    dropout = 0.2,
    n_layer = 6,
    n_head = 6,
    n_embd = 216,
    bias=False
).to('cuda')
TEMPERATURE = 10
r = 0.99
opt = torch.optim.AdamW(model.parameters())
n_epochs = 1000
loss_history = []
train_fidloss_history = []
ev_fidloss_history = []
ev_loss_history = []

number of parameters: 3.38M


In [26]:
for epoch in range(n_epochs):
    # 학습 루프 내에서 손실 계산
    for X_batch, y_batch in train_loader:
        opt.zero_grad()
        batch_size = X_batch.size(0)

        # 배치의 상태 벡터 계산
        states = []
        token, logit = model.token_generate(max_new_tokens = max_gate, temperature = TEMPERATURE, device = device)
        for i in range(batch_size):
            state = model.Qembedding(token.squeeze(), X_batch[i], device = device)
            states.append(state)
        states = torch.stack(states)  # (batch_size, state_vector_size)

        # 상태 벡터 정규화
        states = states / torch.norm(states, dim=1, keepdim=True)

        # 상태 벡터의 켤레 복소수
        states_conj = torch.conj(states)

        # 내적 행렬 계산
        inner_products = torch.matmul(states_conj, states.T)  # (batch_size, batch_size)

        # 피델리티 행렬 계산
        fidelity_matrix = torch.abs(inner_products) ** 2

        # 라벨 곱 행렬 계산
        labels = y_batch.view(-1)  # (batch_size,)
        label_products = torch.outer(labels, labels).to(device)  # (batch_size, batch_size)

        # 손실 행렬 계산
        loss_matrix = torch.mul(label_products, fidelity_matrix).to(device)

        # 상삼각 행렬에서 i < j인 요소들 선택
        indices = torch.triu_indices(batch_size, batch_size, offset=1)
        loss_values = loss_matrix[indices[0], indices[1]]
        loss_values = -5*torch.mean(loss_values)

        # 총 손실 계산
        loss = (torch.exp(loss_values) - torch.exp(-logit.squeeze()))**2
        train_fidloss_history.append(loss_values.item())
        loss_history.append(loss.item())
        # 역전파 및 옵티마이저 스텝
        loss.backward()
        opt.step()
    
    TEMPERATURE = TEMPERATURE * r

    if (epoch+1) % 5 == 0 :
        model.eval()
        ev_size = X_eval_tensor.size(0)
        ev_states = []
        ev_token, ev_logit = model.token_generate(max_new_tokens=max_gate,temperature=.001 ,device = device)
        for i in range(ev_size):
            ev_state = model.Qembedding(ev_token.squeeze(), X_eval_tensor[i], device = device)
            ev_states.append(ev_state)
        ev_states = torch.stack(ev_states)  
        ev_states = ev_states / torch.norm(ev_states, dim=1, keepdim=True)
        ev_states_conj = torch.conj(ev_states)
        ev_inner_products = torch.matmul(ev_states_conj, ev_states.T) 
        ev_fidelity_matrix = (torch.abs(ev_inner_products) ** 2).to(device)
        ev_labels = y_eval_tensor.view(-1) 
        ev_label_products = torch.outer(ev_labels, ev_labels)
        ev_loss_matrix = torch.mul(ev_label_products, ev_fidelity_matrix).to(device)
        ev_indices = torch.triu_indices(ev_size, ev_size, offset=1)
        ev_loss_values = ev_loss_matrix[ev_indices[0], ev_indices[1]]
        ev_loss_values = -5*torch.mean(ev_loss_values)
        ev_loss = (torch.exp(ev_loss_values) - torch.exp(-ev_logit.squeeze()))**2
        
        ev_fidloss_history.append(ev_loss_values.item())
        ev_loss_history.append(ev_loss.item())



        print(f"Epoch : {epoch+1},  loss : {loss}, fidloss : {loss_values}")
        print(f"Evaluation loss : {ev_loss}")
        print(f"Evaluation fidloss : {ev_loss_values}")
        model.train()

Epoch : 5,  loss : 0.16705073416233063, fidloss : -0.19465282559394836
Evaluation loss : 1.6926844120025635
Evaluation fidloss : 0.2631579041481018
Epoch : 10,  loss : 0.32358479499816895, fidloss : -0.5599483847618103
Evaluation loss : 1.6926844120025635
Evaluation fidloss : 0.2631579041481018
Epoch : 15,  loss : 0.017713651061058044, fidloss : -0.5760005712509155
Evaluation loss : 1.6926844120025635
Evaluation fidloss : 0.2631579041481018
Epoch : 20,  loss : 0.5995155572891235, fidloss : -0.20577333867549896
Evaluation loss : 1.5612685680389404
Evaluation fidloss : 0.22274932265281677
Epoch : 25,  loss : 0.687972366809845, fidloss : -0.18699832260608673
Evaluation loss : 1.6926844120025635
Evaluation fidloss : 0.2631579041481018
Epoch : 30,  loss : 0.42819419503211975, fidloss : -0.424085408449173
Evaluation loss : 1.6926844120025635
Evaluation fidloss : 0.2631579041481018
Epoch : 35,  loss : 0.642946720123291, fidloss : -0.2208465039730072
Evaluation loss : 1.6926844120025635
Evalua

In [27]:
(torch.exp(loss_values)-torch.exp(-logit.squeeze()))**2

tensor(1.9477, device='cuda:0', grad_fn=<PowBackward0>)