# Input data and parameters

In [1]:
from rdkit.Chem import rdFingerprintGenerator
from rdkit import Chem
import warnings
warnings.filterwarnings('ignore')
import math
import re
import numpy as np
import time
import csv
import pickle 
import pprint
import json

#pyqubo version==1.5.0
from pyqubo import Array
from pyqubo import Binary
#dwave-neal version==0.6.0
import neal


#SMILES-functional groups 
fgps=['*', '*C(F)(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)F', '*C=C', '*C', '*CC=C', '*OC(C)=O', '*OC', '*OCOC','*OC(=O)OC', '*C(=O)OC', '*C#N', 
      '*N=C=O', '*F', '*Cl', '*Br', '*c1ccccc1', '*c1cccs1', '*C(F)(F)F', '*[Si](C)(C)C','*CCC','*NC(C)=O','*C#C','*C=CC(=O)OC']

#SMILES-skeletons 
skeletons=['*COC(*)=O','*C1OC(=O)OC1*','*C1OC(=O)C(*)OC1=O','*C1OCC2(CO1)COC(*)OC2','*C(*)(OC)OC','*COC(=O)CCC(=O)OC*', '*COC(=O)CCCC(=O)OC*',
           '*COC(=O)OC*', '*C1COC(=O)C1*', '*C1CC(=O)OC1*', '*C1CC(*)C(=O)O1', '*C1OS(=O)OC1*', '*C1CCOS(=O)C1*', '*C1COS(=O)C(*)C1', 
           '*C1CCC(*)S(=O)O1','*C1COS(=O)CC1*','*C1CCS(=O)OC1*', '*C1CCS(=O)(=O)OC1*', '*C1CC(*)OS(=O)(=O)C1', '*C1CCC(*)S(=O)(=O)O1', 
           '*C1COS(=O)(=O)CC1*', '*C1COS(=O)(=O)C(*)C1', '*C1CCOS(=O)(=O)C1*', '*C(*)C', '*CC*','*C=C*','*C(*)=C','*C1C(=O)OC(=O)C1*',
           '*C#C*','*C1CC(=O)OC(=O)C1*','*C1CC(*)C(=O)OC1=O','*C1OS(=O)(=O)C(*)S(=O)(=O)O1']

#-----set of parameters for LUMO or chemical hardeness  -------#
#clustering by lUMO value:

#chainsk=[[0, 25, 26, 28], [4, 5, 6, 7, 23, 24]]
#cycsk=[[2, 29,27, 30,31],[1, 3, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]]
#task='LUMO'

#clustering by chemical hardeness:

cycsk=[[12, 13, 14, 15, 16],[1, 2, 3, 8, 9, 10, 11, 17, 18, 19, 20, 21, 22, 27, 29, 30, 31]]
chainsk=[[0, 4, 5, 6, 7, 23, 24], [25, 26, 28]]
task='chemical_hardness'

#-----------------------------#



path = 'morgen_rad2_1024bits_'+task+'_coeffi.pkl'

with open(path, 'rb') as f:
    loaded_dict = pickle.load(f)
regT=[[],[]]
for i in range(2):
    for j in range(2):
        reg={}
        reg["coeff"]=loaded_dict["reg"+str(i)+str(j)+"_Co_In"][0]
        reg["intercept"]=loaded_dict["reg"+str(i)+str(j)+"_Co_In"][1]
        regT[i].append(reg)



#morgen fingerprint as ECFP 4
bits=1024
mfpgen = rdFingerprintGenerator.GetMorganGenerator(radius=2,fpSize=bits)

print("---finish loading data and parameters---")

---finish loading data and parameters---


# Functions 

In [2]:

  # code to glue skeleton and functional group together
def sk_fg_glue(i_skeleton,func_group1,func_group2):
    skeleton = Chem.MolFromSmiles(i_skeleton)
    func_group1 = Chem.MolFromSmiles(func_group1)
    func_group2 = Chem.MolFromSmiles(func_group2)
    combination = Chem.CombineMols(skeleton, func_group1)
    combination = Chem.CombineMols(combination, func_group2)

    r_loc = []
    for k in range(combination.GetNumAtoms()):
        atom = combination.GetAtoms()[k].GetSymbol()
        if atom == '*':
            r_loc.append(k)
            # connect between the skeleton and functional group
    edit_mol = Chem.EditableMol(combination)
    edit_mol.AddBond(r_loc[0], r_loc[2], order=Chem.rdchem.BondType.SINGLE)
    edit_mol.AddBond(r_loc[1], r_loc[3], order=Chem.rdchem.BondType.SINGLE)
    combination = edit_mol.GetMol()
    combination_smiles = Chem.MolToSmiles(combination)
            # correct the SMLIES afther gluing
    combination_smiles = combination_smiles.replace('**', '')
    combination_smiles = combination_smiles.replace('()', '')
    if combination_smiles.count('*(*') > 0:    
      
        combination_smiles = smiles_fix(combination_smiles)
    mol = Chem.MolFromSmiles(combination_smiles)

    return  mol

def smiles_fix(smiles):
    smiles = smiles.replace('*(*', '(', 1)
    branches = []
    # find (..), (..(..)..), and (..(..)..(..)..), use & to represent a branch
    num_of_parens = len(re.findall('\(\w+\)|\(\w*\(\w+\)\w*\)|\(\w*\(\w+\)\w*\(\w+\)\w*\)', smiles))
    for i in range(num_of_parens):
        branches.append(re.search('\(\w+\)|\(\w*\(\w+\)\w*\)|\(\w*\(\w+\)\w*\(\w+\)\w*\)', smiles).group(0))
        smiles = smiles.replace(branches[i], '&', 1)
    # deal with smiles start from branch
    if smiles[0] == '&':
        smiles = re.sub('(&)([A-Za-z]\d?)', r'\2\1', smiles, 1)

    for i in range(num_of_parens):
        branches[i] = branches[i].replace('1', f'{i*2+4}')
        branches[i] = branches[i].replace('2', f'{i*2+5}')
        smiles = smiles.replace('&', branches[i], 1)
    return smiles

def code_Generator_defined(sklt,fg1,fg2,bits):
  
    #establish morgen fingerorint
   
    fg1=np.array( mfpgen.GetFingerprint (sk_fg_glue(sklt,fg1,"*"))) 
    fg2=np.array( mfpgen.GetFingerprint (sk_fg_glue(sklt,"*",fg2))) 
    sk=np.array( mfpgen.GetFingerprint (sk_fg_glue(sklt,"*","*"))) 

    #establish defined features
    code=(fg1-fg1*sk)+(fg2-fg2*sk)+sk
        
  
    x=[]

    x.append(code)
 
    return x


 #calculate the prediction results yred:prediction  
def ypred1(test,Reg_model):
    ypred=0
    
    ypred=np.matmul(Reg_model["coeff"],test) +Reg_model["intercept"]

    return ypred

# QUBO model to .json format
class chemical_QUBO(object):
    def __init__(self,AMP):
     
        self.amp=AMP
        
    def obj(self):
        ie = {}
        tt=[]
        cal={}
        
       #binary variables are in the order of [skeletons,function groups at site 1,function groups at site 2]

      #QUBO for ring-type cluster
        for i in range(len(cycsk)):
            for b in range (len(cycsk[i])):
           
           
                #weight of skeketon for QUBO term X_t  
                
                sk1=np.array(mfpgen.GetFingerprint (sk_fg_glue(skeletons[cycsk[i][b]],"*","*")))                        
                c1=np.matmul(regT[0][i]["coeff"],sk1)+regT[0][i]["intercept"]

                tt.append({'coefficient': c1*q.amp, 'polynomials':[cycsk[i][b]]})
                
                
                for j in range(len(fgps)):
                    fg1=fgps[j]
                    fg2=fgps[j]

                     # weight of a single functional group j that contributes to the skeleton t at site 1 for QUBO term X_t*X_0_j     
                    fg1a=np.array(mfpgen.GetFingerprint (sk_fg_glue(skeletons[cycsk[i][b]],fg1,"*")))
                    fg1bit=fg1a-fg1a*sk1
                    c1=np.matmul(regT[0][i]["coeff"],fg1bit)
                    tt.append({'coefficient': c1*q.amp, 'polynomials':[cycsk[i][b],len(skeletons)+j]})
                    
                    
                    # weight of a single functional group j that contributes to the skeleton t at site 2 for QUBO term X_t*X_1_j  
                    fg2a=np.array(mfpgen.GetFingerprint (sk_fg_glue(skeletons[cycsk[i][b]],"*",fg2)))
                    fg2bit=fg2a-fg2a*sk1
                    c2=np.matmul(regT[0][i]["coeff"],fg2bit) 
                    tt.append({'coefficient': c2*q.amp, 'polynomials':[cycsk[i][b],len(skeletons)+len(fgps)+j]})

                    
                   

      #QUBO for chain-type cluster
        for i in range(len(chainsk)):
            for b in range (len(chainsk[i])):
                
               #weight of skeketon for QUBO term X_t  
                sk1=np.array(mfpgen.GetFingerprint (sk_fg_glue(skeletons[chainsk[i][b]],"*","*")))                        
                c1=np.matmul(regT[1][i]["coeff"],sk1)+regT[1][i]["intercept"]
                tt.append({'coefficient': c1*q.amp, 'polynomials':[chainsk[i][b]]})
                
                for j in range(len(fgps)):
                    fg1=fgps[j]
                    fg2=fgps[j]
                    
                    # weight of a single functional group j that contributes to the skeleton t at site 1 for QUBO term X_t*X_0_j     
                    fg1a=np.array(mfpgen.GetFingerprint (sk_fg_glue(skeletons[chainsk[i][b]],fg1,"*")))
                    fg1bit=fg1a-fg1a*sk1
                    c1=np.matmul(regT[1][i]["coeff"],fg1bit)
                    tt.append({'coefficient': c1*q.amp, 'polynomials':[chainsk[i][b],len(skeletons)+j]})
                    
                    
                    # weight of a single functional group j that contributes to the skeleton t at site 2 for QUBO term X_t*X_1_j  
                    fg2a=np.array(mfpgen.GetFingerprint (sk_fg_glue(skeletons[chainsk[i][b]],"*",fg2)))
                    fg2bit=fg2a-fg2a*sk1
                    c2=np.matmul(regT[1][i]["coeff"], fg2bit) 
                    tt.append({'coefficient': c2*q.amp, 'polynomials':[chainsk[i][b],len(skeletons)+len(fgps)+j]})

                    
                
        if len(tt) != 0:
    
            cal['binary_polynomial']={'terms':tt}

        return cal  
   

    #constraint 1:each site can connect to only one functional group
    def one_way_1(self,p):
        dict1 = {}
        tt=[]
        cal={}
        a=p
        for i in range (2):
            for fg in range (len(fgps)):
                tt.append({"coefficient": float(-1*self.amp*a), "polynomials": [int(len(skeletons)+len(fgps)*i+fg)]})
                for fg1 in range (fg+1,len(fgps)):
                    tt.append({"coefficient": float(2*self.amp*a), "polynomials": [int(len(skeletons)+len(fgps)*i+fg),len(skeletons)+int(len(fgps)*i+fg1)]})
                    
            if len(tt) != 0 and fg==len(fgps)-1:
                tt.append({"coefficient": float(1*self.amp*a), "polynomials":[]})
        dict1['terms']=tt
        
        return dict1 
        
    #constraint 2:each additive can choose only one skeleton
    def one_way_2(self,p):
        tt=[]
        ie = []
        dict1={}
        a=p

        for i in range(len(skeletons)):
            tt.append({"coefficient": float(-1*self.amp*a), "polynomials": [int(i)]})
            for j in range (i+1,len(skeletons)):
                tt.append({"coefficient": float(2*self.amp*a), "polynomials": [i,j]})
  
        if len(tt) != 0 and i==len(skeletons)-1:
            tt.append({"coefficient": float(1*self.amp*a), "polynomials":[]})
        dict1['terms']=tt   
        return dict1
    
       

    
class DA3PSolver(object):
    #initial settings
    def __init__(self, time_limit_sec=10, target_energy=-0.07, num_output_solution=8):
        self.rest_url = None
        self.access_key = None
        self.version = None
        self.type_num = None
        self.proxies = None
        self.rest_headers = {'content-type': 'application/json'}
        self.params = {}
        self.params['time_limit_sec'] = time_limit_sec
        self.params['target_energy'] = target_energy

    
    def check(self,bp):
     
        solver = DA3PSolver()
        solver.access_key = access_key
        solver.rest_url = rest_url
        solver.version = version
        solver.proxies = proxies

        request = {"fujitsuDA3": solver.params}

          #----input QUBO model
        request.update(bp)
        
        return request  
    
    
    def req(self,DAUinput):
        
    
        solver = DA3PSolver()
        solver.access_key = access_key
        solver.rest_url = rest_url
        solver.version = version
        solver.proxies = proxies
    
        #-------setting the parameters in 'fujitsuDA3'
        request = DAUinput

        


        
        headers = solver.rest_headers
        headers['X-Api-Key'] = solver.access_key
        post_status = requests.post(solver.rest_url + '/' + solver.version + '/async/qubo/solve', json.dumps(request), headers=headers, proxies=solver.proxies)
    
        jobid = post_status.json()['job_id']
        sleep(request['fujitsuDA3']['time_limit_sec']+5)
    
        print({'job_id':jobid})
        response = requests.get(solver.rest_url + '/' + solver.version + '/async/jobs/result/' + jobid, headers=headers, proxies=solver.proxies)
 
        if post_status.ok:

            return [{'job_id':jobid},response.json()]
            
    def fin(self,jobid,ask):
        solver = DA3PSolver()
        solver.access_key = access_key
        solver.rest_url = rest_url
        solver.version = version
        solver.proxies = proxies
        headers = DA3PSolver().rest_headers
        headers['X-Api-Key'] = solver.access_key
        
        if ask==0:
            response = requests.get(solver.rest_url + '/' + solver.version + '/async/jobs/result/' + jobid, headers=headers, proxies=solver.proxies)

            return response
        if ask==1:
            response = requests.get(solver.rest_url + '/' + solver.version + '/async/jobs', headers=headers, proxies=solver.proxies)
            return response
        if ask==2:
            delete_status = requests.delete(solver.rest_url + '/' + solver.version + '/async/jobs/result/' + jobid, headers=headers, proxies=solver.proxies)

            return delete_status

# QUBO model for DA

In [4]:
AMPL=1 #ratio to amplify QUBO model 
q = chemical_QUBO(AMPL)

pen=0.15 #penalty coefficient, we choose the value to be 1.5*M. M denotes as the possible maximum absolute value of chemical property. The value is ~

#----construct QUBO model with constraints and objective function. 
#constraint1:Each site can connect to only one functional group, one_way_1() 
#constraint2:Each additive can choose only one skeleton, one_way_2() 

bp=q.obj()
bp['binary_polynomial']['terms']=bp['binary_polynomial']['terms']+q.one_way_2(pen)['terms']+q.one_way_1(pen)['terms']

print('#--finish constructing QUBO model---#')

#--finish constructing QUBO model---#


# Parameters for DA solver

In [5]:
# For more detail see website: https://portal.aispf.global.fujitsu.com/apidoc/da/jp/api-ref/da-qubo-v3c-en.html

access_key ='[API_access_key]'   
rest_url = 'https://api.aispf.global.fujitsu.com/da'
version = 'v3c'
proxies = {}


Check = DA3PSolver().check(bp)

Check['fujitsuDA3']['time_limit_sec'] = 5
Check['fujitsuDA3']['num_output_solution'] = 64
Check['fujitsuDA3']['num_group'] = 16
Check['fujitsuDA3']['num_run'] = 16
Check['fujitsuDA3']['internal_penalty'] = 0

#Check['fujitsuDA3']['target_energy'] = -0.07*AMPL  #For LUMO prediction
Check['fujitsuDA3']['target_energy'] = 0.12*AMPL    #For Chemical hardness prediction




#---check the parameters and QUBO----
pprint.pprint(Check)

{'binary_polynomial': {'terms': [{'coefficient': np.float64(0.15471797116580419),
                                  'polynomials': [12]},
                                 {'coefficient': np.float64(0.0),
                                  'polynomials': [12, 32]},
                                 {'coefficient': np.float64(0.0),
                                  'polynomials': [12, 55]},
                                 {'coefficient': np.float64(0.005844768908624561),
                                  'polynomials': [12, 33]},
                                 {'coefficient': np.float64(0.003536403332318113),
                                  'polynomials': [12, 56]},
                                 {'coefficient': np.float64(-0.002590982519820469),
                                  'polynomials': [12, 34]},
                                 {'coefficient': np.float64(-0.0032727740591172024),
                                  'polynomials': [12, 57]},
                                 {'

# Solve by DA

In [None]:
result=DA3PSolver().req(Check)
result

# Results from DA

In [10]:
#Here we demonstrate the LUMO and chemical hardness results from DA 
#task1="lumo"
task1="chemical_hardness"

path1 = 'morgen_rad2_1024bits_'+task1+'_DA3sol.txt'

f = open(path1)
output = json.load(f)
E=0.15
t=0
for i in range(len(output['qubo_solution']['progress'])):
    if abs(E)>=abs(output['qubo_solution']['progress'][i]['energy']):
        E=output['qubo_solution']['progress'][i]['energy']
        t=output['qubo_solution']['progress'][i]['time']

sol=[]

for j in range(len(output['qubo_solution']['solutions'])):
    if output['qubo_solution']['solutions'][j]['energy']==E:
        for key,value in  output['qubo_solution']['solutions'][j]['configuration'].items():
            if value==True:
                sol.append(int(key))
sorted(sol)
sol=[sol[0],sol[1]-len(skeletons),sol[2]-len(skeletons)-len(fgps)]

config=[skeletons[sol[0]],fgps[sol[1]],fgps[sol[2]]]

print("Best solution of DA:"+str(E))
print("Running time for Best solution:"+str(t))
print("----Configuration----")
print("Label:"+str(sol))
print("SMILES:"+str(config))


Best solution of DA:0.10340270447059444
Running time for Best solution:0.123
----Configuration----
Label:[28, 16, 16]
SMILES:['*C#C*', '*c1cccs1', '*c1cccs1']
