<a href="https://colab.research.google.com/github/ynaowusu/protein-folding-quantum-algorithms/blob/main/proteinfolding.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install qiskit #this is just me intalling qiskit into our notebook
!pip install matplotlib plotly #since it says we need a 3d structure to simulate the lattice and any other 3d elements
!pip install numpy


Collecting qiskit
  Downloading qiskit-2.1.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting rustworkx>=0.15.0 (from qiskit)
  Downloading rustworkx-0.16.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (10 kB)
Collecting stevedore>=3.0.0 (from qiskit)
  Downloading stevedore-5.4.1-py3-none-any.whl.metadata (2.3 kB)
Collecting pbr>=2.0.0 (from stevedore>=3.0.0->qiskit)
  Downloading pbr-6.1.1-py2.py3-none-any.whl.metadata (3.4 kB)
Downloading qiskit-2.1.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.5/7.5 MB[0m [31m78.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading rustworkx-0.16.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m70.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading stevedore-5.4.1-py3-none-any.whl (49 kB)
[2K   [90m━━━━━━━━━━━━

In [3]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import random


In [4]:
"""
        proteins fold in 3D space and here we're working with a simplified lattice model w tetrahedral directions. There are two kinds of alternating sites in the protein chain: 'A' and 'B'.
        every turn corresponds to a direction in 3D space either x,y or z
        The vectors  [1, 1, 1]) are unit steps in different directions based on the protein structure model.
"""

class ProtienFoldVector:
  def __init_(self):
    self.lattice_vector = {
        "A":{
            0:[(1,1,1)], #|00> This is for the first round of qubits reprsents x,y,z
            1:[(1,-1,-1)], #|10>
            2:[(-1,1,-1)], #|01>
            3:[(-1,-1,-1)] #|11>
        },
        "B":{
            0:[(1,1,-1)], #|00>
            1:[(1,-1,1)], #|10>
            2:[(-1,1,1)], #|01>
            3:[(-1,-1,-1)] #11>
        }





    }

    pass


    #vectors go here


In [5]:
#hamiltonian functions


# t_i and t_j are vectors
# we decode the bit string and supply indices i and j directly.
#for each pair of beads i and j, T(i,j ) returns a 1 iff the turns t1 and tj are along the same axis
#we need this for the growth constrainst function
#we need this to prevrent the growth of the chain towards unphysical geometries



def T(t_i,t_j):
  if np.array_equal(t_i,t_j) or np.array_equal(t_i, -t_j) :
    return 1
  else:
    return 0

def test_T_function():
  assert T(np.array([1,1,1]),np.array([1,1,1]))== 1
  assert T(np.array([-1,-1,-1]),np.array([-1,-1,-1]))== 1
  assert T(np.array([1,1,1]),np.array([-1,-1,-1]))== 1

  assert T(np.array([1,-1,-1]),np.array([1,1,-1]))== 0
  assert T(np.array([-1,1,-1]),np.array([-1,-1,-1]))== 0

  print("all test_t_function tests passed")
test_T_function()

'''
because there are somethings we do not have implemented yet, growth_constraint_hamiltonian is a bit hardcoded
what growth constaint does is it eliminates sequences where the same axis
is chosen twice in a row, since this will give rise to a chain folding back onto itself.
turns is for now a placeholder but it  supposed to be a chain and so we will decode the measured qubits to then get the backbone turns
for the particular fold and then if some consecutive pairs share an axis, then the penalty which here is 30 will apply
N is the length of the peptide chain which is the number of amino-acid residues

'''
amino_acid_sequence = ([1,2,3,4,5,6])
N = random.choice(amino_acid_sequence)
def growth_constraint_Hamiltonian():
  H_gc = 0
  penalty_weight = 30
  turns = [np.array([1,1,1]),np.array([1,1,1]), np.array([1,-1,-1]),np.array([-1,-1,1]),np.array([1,1,-1]),np.array([1, -1,-1]),np.array([-1,-1,1])]
  for i in range(3, N - 1):
     same_axis = T(turns[i], turns[i + 1])
     H_gc += 30 * same_axis
  return H_gc
growth_constraint_Hamiltonian()

all test_t_function tests passed


0

In [6]:
#the methods we creae will be used to fold protein model with 6 and 8
#amino acid sequences on 3D lattice
amino_acid_sequence = ([6,7,8])
N = random.choice(amino_acid_sequence)
print(N)

7


In [14]:
from qiskit import QuantumCircuit

class CVARVQE:

    def __init__(self, hamiltonian, alpha = 0.1):
        # self is just the initializer
        # the hamiltonian calculates the amount of energy it takes to form any given protein configuration.
        # this is important because the optimizer needs a way to evaluate how "good" or "bad" a quantum circuit’s result is.
        # alpha = 0.1 means CVaR will only average the best 10% of outputs.
        self.hamiltonian = hamiltonian  # Save the energy calculator
        self.alpha = alpha              # Only use the best 10% of folds
        self.n_qubits = hamiltonian.total_qubits  # Know how many qubits to use in the circuit

    def create_ansatz(self, params):
        qc = QuantumCircuit(self.n_qubits)
        # This creates a quantum circuit with the number of qubits based on the protein

        # Step 1: Put each qubit into superposition using Hadamard gates
        for i in range(self.n_qubits):
            qc.h(i)  # Hadamard gate turns each qubit into a mix of 0 and 1

        # Step 2: Add a rotation gate to each qubit (this is how we "teach" the circuit how to fold the protein)
        param_1 = 0  # Keeps track of which parameter we're using from the list

        for i in range(self.n_qubits):
            qc.ry(params[param_1], i)  # apply RY (Y-axis) rotation to qubit i using params[param_1]
            param_1 += 1  # Move to the next angle in the list

        for i in range(self.n_qubits - 1): #what this is is that it's making a CNOT gate to make an entanglement between two qubits
          qc.cx(i, i+ 1 ) #this entanglement lets qubits share information which is important because in proteins, one fold affects nearby folds.

        for i in range(self.n_qubits):
          qc.ry(params[param_1], i) #this is a repeat of the other line of code that just rotates the qubit again after the cnot entangled gate is applied
          param_1 += 1 #the reason it's roated again is so that the portien can fold in more complex ways.


        return qc  # Return the final circuit
    def evaluate_energy(self, params, n_shots = 2000):
      #this function will be based around evalauting the energy of the circuit.
      #it takes in the list of angles from a cirq, nshot is for how many times the cirq should run and it returns an energy score.
      qc = self.create_ansatz(params) #this is used to build a qunatum circuit with the angles from the previous function
      qc.add_register(ClassicalRegister(self.n_qubits)) #this clasical register is a storage unit designed to hold classical bits, which represent binary states or bitstring of (0 and 1s)
      qc.measure_all() #this tells the quantum circuit to measure every qubit so a bitstring is outputted like 0101 0r 10101
      #baicallty what this says is measure all the incomign qubits then place them in the classical register

      sample = Sampler() #what this does is run the circuit and gives us the data
      job = sample.run(qc, shots = n_shots) #what this does is run the ciruit 2000 times to see all of the differnt bitstrings it'll output. so run it 2k times and collect the outcome
      result = job_result() #this gets the data from the run

      counts = result.quasi_dists[0] #this gives you the oputput in dictonioary form. quasi_dists stands for quasi-probability distributions.
      # as an output it could like this [{0: 0.05, 1: 0.10, 2: 0.25, 3: 0.60}] That means Bitstring 000 (0) came up ~5% of the time bitstring 001 (1) ~10% Bitstring 010 (2) ~25% bitstring 011 (3) ~60%
      #overall it basically means get the outcome probabilities from the first circuit run, so I can loop through each bitstring and score it based on how well it folded the protein.
      energies = [] #This is a list to store energy values for each output

      for bitstring, count in counts.itme(): #this loops through each result and its probabilty
        binary_str = format(bitstring, f'0{self.n_qubits}b') #what this is basically doing is taking the integer bitstring and turn it into a nice, full-length binary string (like '0101') that matches the number of qubits we're using.

        #heres an example of what happens If self.n_qubits = 4, and bitstring = 3, then: format(3, '04b') → '0011'

        energy = self.hamiltonian.total_hamiltonian(binary_str) #so with this line, what it's doing is calling the hamitlontian to calcute the energy the bitstring takes. so for exmapl if we have a bitstirng of like 0011, the energy used to make the fold is 2.5. it stakes 2.5 units to hold that shape.

        energies.extend([energy] * int(count * n_shots)) #so this is adding energy values to a list multiple times, depending on how often that folding showed up in the simulation.
        #for example Let’s say this fold showed up 30% of the time (count = 0.3) n_shots = 2000, this means 0.3 * 2000 = 200 times You add [energy] * 300 → so this energy gets added 300 times and so now the list reflects how often each folding happened
        energies = np.array(energies) #what this is doing is that it it converts the list of energies into a numPy array so we can use numPy functions like sort() and mean().
        energies.sort() #it sorts all of the energy values from ascending order from low to high

        cutoff_in = int(len(energies) * self.alpha) #
        if cutoff_idx == 0:
            cutoff_idx = 1  # Always keep at least 1

        # Average the best energies to get our CVaR value
        cvar_energy = np.mean(energies[:cutoff_idx])

        return cvar_energy  # Lower is better
    def optimize(self, initial_params=None, maxiter=100):
        """
        Try different parameter values to find the ones that give the lowest CVaR energy.

        initial_params: Starting guess. If not given, we pick random angles.
        maxiter: How many times to try new parameters.

        Returns the best parameters and their corresponding energy.
        """
        # If we don’t have a starting guess, create random parameters
        if initial_params is None:
            initial_params = np.random.uniform(0, 2 * np.pi, 2 * self.n_qubits)

        # Use a classical algorithm (COBYLA) to minimize the energy
        result = minimize(
            self.evaluate_energy,         # What we're trying to minimize
            initial_params,               # Starting guess
            method='COBYLA',              # Optimization algorithm
            options={'maxiter': maxiter}  # How many steps to take
        )

        return result.x, result.fun  # Return best params and best energy








