# Practical Quantum Computing Approach for Sustainable Workflow Optimization in Cloud Infrastructures

by [Valter Uotila](https://researchportal.helsinki.fi/en/persons/valter-johan-edvard-uotila), PhD student, [Unified Database Management Systems](https://www2.helsinki.fi/en/researchgroups/unified-database-management-systems-udbms/news), University of Helsinki

This is just a specified shortest path finding application applied to the problem presented in the [document](https://github.com/valterUo/Quantum-Computing-based-Optimization-for-Sustainable-Data-Workflows-in-Cloud/blob/main/Quantum_Computing__based_Optimization_for_Sustainable_Data_Workflows_in_Cloud.pdf) that comes along with this implementation.

Possible quantum software-harware combinations to solve the problem:

1. Amazon Braket: Ocean implementation of this code
2. D-wave's Leap Advantage: Ocean implementation of this code
3. IBM Quantum systems
    1. Simulator in cloud
    2. NISQ device in cloud
4. Local machine
    1. Ocean's imulated annealing
    2. Qiskit's local qasm simulator

## Part 1: Implementation with Ocean connecting to Amazon Braket and D-wave Leap quantum annealers

In [1]:
# Install a pip package in the current Jupyter kernel
#import sys
#!{sys.executable} -m pip install numpy
#!{sys.executable} -m pip install ocean_plugin

In [2]:
import dimod
from dimod.generators.constraints import combinations
from dwave.system import LeapHybridSampler
from hybrid.reference import KerberosSampler
from dwave.system.composites import EmbeddingComposite

from braket.aws import AwsDevice
from braket.ocean_plugin import BraketSampler, BraketDWaveSampler

import numpy as np
import json
import itertools
import os
import math
import random
import networkx as nx
import matplotlib.pyplot as plt

notebook_path = os.path.abspath("main.ipynb")

In [3]:
def append_linear_safe(variable, value, linear_dict):
    if variable in linear_dict.keys():
        linear_dict[variable] = linear_dict[variable] + value
    else:
        linear_dict[variable] = value

def append_quadratic_safe(variable, value, quadratic_dict):
    if variable in quadratic_dict.keys():
        quadratic_dict[variable] = quadratic_dict[variable] + value
    else:
        quadratic_dict[variable] = value

## Importing data

This demonstration implements three different sized data sets. Comment and uncomment the data sets you want to use.

In [4]:
# Demonstration 1

cloud_partners_data = "cloud_partners_small.json"
workload_data = "workload_small.json"
strength = 1500.0

# Demonstration 2

#cloud_partners_data = "cloud_partners_medium.json"
#workload_data = "workload_medium.json"
#strength = 40.0

# Demonstration 3

#cloud_partners_data = "cloud_partners_large.json"
#workload_data = "workload_large.json"
#strength = 40.0

In [5]:
cloud_partners_file_path = os.path.join(os.path.dirname(notebook_path), "data/single_round_data/cloud_partners/" + cloud_partners_data)
f = open(cloud_partners_file_path)
partners_root = json.load(f)
cloud_partners = partners_root["cloud_partners"]

workload_file_path = os.path.join(os.path.dirname(notebook_path), "data/single_round_data/workloads/" + workload_data)
f = open(workload_file_path)
workload_root = json.load(f)
workload = workload_root["workload"]

#print("Cloud partners: ", json.dumps(cloud_partners, indent=1))
#print("Workloads: ", json.dumps(workload, indent=1))

## Emission simulator

This section implements an emission simulator which simulates emission changes in data center operations. Note that it is relatively hard to get accurate data from individual data centers. This simulator is just for demonstration and it does not have an actual scientific background.

In [6]:
def emission_simulator(variable1, variable2, cloud_partners, workload):
    simulated_carbon_footprint = 1
    emission_factor = 1
    workload_type_in_process = None
    
    source_data_center_id = variable1[1]
    work_in_process = variable2[0]
    target_data_center_id = variable2[1]
    
    for work in workload:
        if work["work_id"] == int(work_in_process):
            emission_factor = work["emission_factor"]
            workload_type_in_process = work["work_type"]
    
    for partner in cloud_partners:
        for center in partner["data_centers"]:
            # Find correct target center
            if target_data_center_id == center["center_id"]:
                for workload_type in center["workload_dependent_emissions"]:
                    # Find correct workload type i.e. Big Data, IoT, ML, etc.
                    if workload_type_in_process == workload_type["workload_type"]:
                        center_emission_factor = workload_type["center_emission_factor"]
                        #print(center_emission_factor)
                        simulated_carbon_footprint = emission_factor*center_emission_factor
            
    return simulated_carbon_footprint

## Creating variables for the binary quadratic model

In the demo paper we defined variables to be $ x_{i,j} = (w_i, d_j) $.

In [7]:
#%%timeit
variables = dict()
workload_order = []

for work in workload:
    variables[str(work["work_id"])] = list()
    workload_order.append(str(work["work_id"]))
    for partner in cloud_partners:
        for center in partner["data_centers"]:
            # The each key in the variables dictionary corresponds to a level in a tree i.e. a time step in the workflow
            variables[str(work["work_id"])].append((str(work["work_id"]), center["center_id"]))
            
#print(json.dumps(variables, indent=1))

## Constructing constraints 

### Constraint 1

This constraint implements the requirement that for every work $ w_i $ we have exactly one variable $ x_{i,j} = (w_i, d_j) = 1$. In other words, this means that every work is executed exactly on a single data center.

In [8]:
def construct_bqm_constraint1(bqm, variables, strength):
    for work_id in variables:
        one_work_bqm = combinations(variables[work_id], 1, strength=strength)
        bqm.update(one_work_bqm)
    return bqm

### Constraint 2

This constraint implements the requirement that for every pair of variables $x_{i,j} = (w_i, d_j)$ and $x_{i+1,k} = (w_{i+1}, d_k)$ we associate the estimated emission coefficient $e(x_{i,j}, x_{i+1,k})$. This coefficient is calculated in emission_simulator function. Note that we need to calculate this only for those pairs, where the works $w_i$ and $w_{i+1}$ are consecutive works in the workload.

To evaluate the algorithm we store the tree in a networkx graph.

In [9]:
def construct_bqm_constraint2(bqm, variables, workload_order):
    vartype = dimod.BINARY
    A = 1
    linear = dict()
    quadratic = dict()
    offset = 0.0
    tree = nx.Graph()

    for work_id_current in range(len(workload_order) - 1):
        work_id_next = work_id_current + 1
        key_current = workload_order[work_id_current]
        key_next = workload_order[work_id_next]

        for work1 in variables[key_current]:
            for work2 in variables[key_next]:
                
                coeff = emission_simulator(work1, work2, cloud_partners, workload)
                
                append_quadratic_safe((work1, work2), coeff, quadratic)
                tree.add_edge(work1, work2, weight=coeff)

                #print("Works", work1, work2)
                #print("Coefficient", coeff)

    bqm_c2 = dimod.BinaryQuadraticModel(linear, quadratic, offset, vartype)
    bqm_c2.scale(A)
    bqm.update(bqm_c2)
    return bqm, tree

## Demonstrating algorithm

In [10]:
def compare_to_optimal(solution, tree, optimal_weight):
    current_total = 0
    try:
        for i in range(len(solution) - 1):
            edge_weight = tree.get_edge_data(solution[i], solution[i+1])
            current_total += edge_weight["weight"]
    except:
      print("The quantum result contains edges which are not in the tree.")
    return np.abs(optimal_weight - current_total)/optimal_weight

In [11]:
def print_solution(sample, tree, optimal_weight = -1):
    positive_solution = []
    for varname, value in sample.items():
        if value == 1:
            positive_solution.append(varname)
            print(varname, value)
    if optimal_weight != -1:
        print("Difference from the optimal ", compare_to_optimal(positive_solution, tree, optimal_weight))

### Wrapping up various methods to solve the QUBO

In [12]:
def solve_bqm_in_leap(bqm, sampler = "Kerberos"):
    bqm.normalize()
    if sampler == "Kerberos":
        kerberos_sampler = KerberosSampler().sample(bqm, max_iter=10, convergence=3, qpu_params={'label': 'Data workflow optimization'})
        sample = kerberos_sampler.first.sample
        # print timing info for the previous D-Wave job
        #print(kerberos_sampler.info['additionalMetadata']['dwaveMetadata']['timing'])
    elif sampler == "LeapHybrid":
        sampler = LeapHybridSampler()
        sampleset = sampler.sample(bqm)
        sample = sampleset.first.sample
        # print timing info for the previous D-Wave job
        print(sampleset.info['additionalMetadata']['dwaveMetadata']['timing'])
    return sample
    
    #print(sampleset)
    #print(best_solution)
    #sample = best_solution
    #energy = sampleset.first.energy

In [13]:
def solve_bqm_in_amazon_braket(bqm, system = "Advantage"):
    device = None
    num_reads = 1000
    if system == "Advantage":
        device = "arn:aws:braket:::device/qpu/d-wave/Advantage_system4"
    elif system == "2000Q":
        device = "arn:aws:braket:::device/qpu/d-wave/DW_2000Q_6"
    sampler = BraketDWaveSampler(device_arn = device)
    sampler = EmbeddingComposite(sampler)
    sampleset = sampler.sample(bqm, num_reads=num_reads)
    sample = sampleset.first.sample
    
    # print timing info for the previous D-Wave job
    print(sampleset.info['additionalMetadata']['dwaveMetadata']['timing'])
    
    return sample

In [14]:
def solve_with_simulated_annealing(bqm):
    sampler = dimod.SimulatedAnnealingSampler()
    sampleset = sampler.sample(bqm, num_reads=100)
    sample = sampleset.first.sample
    return sample

In [15]:
def solve_exactly(bqm):
    sampler = dimod.ExactSolver()
    sampleset = sampler.sample(bqm)
    sample = sampleset.first.sample
    return sample

In [16]:
def solve_with_networkx(tree, variables, start_work):
    possible_solutions = []
    best_solution = None
    min_weight = float('Inf')
    for source_var in variables[start_work]:
        for target_var in variables[str(len(variables) - 1)]:
            possible_solutions.append(nx.dijkstra_path(tree, source=source_var, target=target_var))
    for sol in possible_solutions:
        current_total = 0
        for i in range(len(sol) - 1):
            edge_weight = tree.get_edge_data(sol[i], sol[i+1])
            current_total += edge_weight["weight"]
        #print("Shortest path ", sol)
        #print("Current total ", current_total)
        if min_weight > current_total:
            min_weight = current_total
            best_solution = sol
    return best_solution, min_weight

## Run single time step

In [17]:
vartype = dimod.BINARY
bqm = dimod.BinaryQuadraticModel({}, {}, 0.0, vartype)

In [18]:
#%timeit construct_bqm_constraint1(bqm, variables, strength)
#%timeit construct_bqm_constraint2(bqm, variables, workload_order)

In [19]:
bqm = construct_bqm_constraint1(bqm, variables, strength)
bqm, tree = construct_bqm_constraint2(bqm, variables, workload_order)
# print(bqm)

#print("The problem is to find the minimum path from some of the nodes ('0', x) to some of the nodes ('5', y). The weight of the edges are defined by carbon footprint associated to the computation.")
#nx.draw(tree, with_labels = True)

#### Optimal and correct solution for evaluation

In [20]:
#%timeit solve_with_networkx(tree, variables, '0')

In [21]:
best_solution, optimal_weight = solve_with_networkx(tree, variables, '0')
print(best_solution, optimal_weight)

[('0', '00'), ('1', '00'), ('2', '10')] 108


The following results we obtain with annealing. Ideally we would be close to the results we obtain from the function solve_with_networkx.

In [22]:
#print("Solution with Amazon Braket")
#solution = solve_bqm_in_amazon_braket(bqm)
#print_solution(solution, tree, optimal_weight)

print("Solution with D-wave Leap")
solution = solve_bqm_in_leap(bqm)
print_solution(solution, tree, optimal_weight)

#print("Solution with simulated annealing")
#solve_with_simulated_annealing(bqm)

#print("Exact solution (takes time)")
#solve_exactly()

Solution with D-wave Leap
('0', '00') 1
('1', '00') 1
('2', '10') 1
Difference from the optimal  0.0


## Part 2: Transfering problem to Qiskit

In this part of the code I rely on the [Qiskit Tutorials](https://qiskit.org/documentation/optimization/tutorials/index.html). I want to learn to understand the connection between Ocean implementation and Qiskit. The formulation in Qiskit enables solving the problem using IBM Quantum systems. Although Amazon Braket does not implement the following kind of approach, it might be possible to translate the Qiskit into the equivalent Pennylane code and run it in Braket.

### Importing Qiskit and IBM Quantum Systems

In [23]:
from qiskit import IBMQ
from qiskit import BasicAer
from qiskit.utils import algorithm_globals, QuantumInstance
from qiskit.algorithms import QAOA, NumPyMinimumEigensolver
from qiskit_optimization.algorithms import (
    MinimumEigenOptimizer,
    RecursiveMinimumEigenOptimizer,
    SolutionSample,
    OptimizationResultStatus,
)
from qiskit_optimization import QuadraticProgram

provider = IBMQ.load_account()

We start by importing a smaller data sets. After testing with the bigger data sets, we noted that the processing takes very long. So for the demonstration it makes sense to run the code only with these smaller data sets.

In [24]:
cloud_partners_file_path = os.path.join(os.path.dirname(notebook_path), "data/cloud_partners_small.json")
f = open(cloud_partners_file_path)
partners_root = json.load(f)
cloud_partners_small = partners_root["cloud_partners"]

workload_name = "workload2.json"
workload_file_path = os.path.join(os.path.dirname(notebook_path), "data/workloads/" + workload_name)
f = open(workload_file_path)
workload_root = json.load(f)
workload_small = workload_root["workload"]

FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\valte\\Desktop\\Quantum-Computing-based-Optimization-for-Sustainable-Data-Workflows-in-Cloud\\VLDB2022_demos\\data/cloud_partners_small.json'

### Transforming QUBO in Ocean to QUBO in Qiskit 

Constructing BQM in Ocean:

In [None]:
bqm = dimod.BinaryQuadraticModel({}, {}, 0.0, dimod.BINARY)
variables, work_order = construct_variables(workload_small, cloud_partners_small, 0)
bqm = construct_bqm_constraint1(bqm, variables)
bqm, tree = construct_bqm_constraint2(bqm, variables, work_order)

Solving the problem optimally using the classical algorithm:

In [None]:
best_solution, optimal_weight = solve_with_networkx(tree, variables, '0')
print("Best solution with networkx: ", optimal_weight)

Function for evaluating Qiskit result:

In [None]:
def evaluate_qiskit_solution(result, tree, optimal):
    #print(result.variables_dict)
    path = []
    for key in result.variables_dict:
        if result.variables_dict[key] == 1.0:
            path.append(eval(key))
    print("Difference (in [0,1]) between the optimal solution and the solution found with Qiskit:")
    print(compare_to_optimal(path, tree, optimal))

Transforming the QUBO in Qiskit. We use QAOA module in order to understand the details of the process better.

In [None]:
qubo = QuadraticProgram()
qubo_variables = []
for var in bqm.variables:
    qubo.binary_var(str(var))
    qubo_variables.append(str(var))

constant = bqm.offset
linear = []
quadratic = {}

for var in bqm.variables:
    linear.append(bqm.linear[var])
    
for key in bqm.quadratic:
    quadratic[(str(key[0]), str(key[1]))] = bqm.quadratic[key]

#print("Variables: ", qubo_variables)
#print("Offset ", constant)
#print("Linear ", linear)
#print("Quadratic ", quadratic)

qubo.minimize(constant = constant, linear=linear, quadratic=quadratic)

In [None]:
backend = BasicAer.get_backend("qasm_simulator")
#backend = provider.get_backend('ibmq_qasm_simulator')

algorithm_globals.random_seed = 10598
quantum_instance = QuantumInstance(
    backend = backend,
    seed_simulator=algorithm_globals.random_seed,
    seed_transpiler=algorithm_globals.random_seed,
)
qaoa_mes = QAOA(quantum_instance=quantum_instance)
exact_mes = NumPyMinimumEigensolver()

In [None]:
#qaoa = MinimumEigenOptimizer(qaoa_mes)  # using QAOA
#exact = MinimumEigenOptimizer(exact_mes)  # using the exact classical numpy minimum eigen solver

#qaoa_result = qaoa.solve(qubo)
#print(qaoa_result)
#print()
#evaluate_qiskit_solution(qaoa_result, tree, optimal_weight)

In [None]:
#rqaoa = RecursiveMinimumEigenOptimizer(qaoa, min_num_vars=1, min_num_vars_optimizer=exact)
#rqaoa_result = rqaoa.solve(qubo)
#print(rqaoa_result)
print()
#evaluate_qiskit_solution(rqaoa_result, tree, optimal_weight)