In [1]:
#!pwd

/Users/shihosato/src/github.com/twinkle13531/master_degree/202011/scripts/functions


In [2]:
import pandas as pd 
import numpy as np
from scipy.special import comb
import math
from neal import SimulatedAnnealingSampler
import itertools
import random
import matplotlib.pyplot as plt
from dwave.system.samplers import DWaveCliqueSampler
from dwave.system.composites import EmbeddingComposite
from dwave.system import ReverseBatchStatesComposite, ReverseAdvanceComposite
from dwave.embedding.chain_strength import uniform_torque_compensation
import dimod
import timeit

In [3]:
def calc_marginals(df):                   
    return np.array([                      
        sum(df['Y']),    
        np.dot(df['Y'], df['SEX']),      
        np.dot(df['Y'], df['AOP']),      
    ])     

In [4]:
def make_Hamiltonian(df):
    t_list = calc_marginals(df)
    N=len(df)
    dup_list = [(i, i) for i in range(N)]
    comb_list = [(i, j) for i in range(N) for j in range(i+1, N)]
    
    lin_Y = [1-2*t_list[0] for (i, _) in dup_list] #同じy同士
    quad_Y = [2 for (i, j) in comb_list] #異なるy同士
    num_Y = t_list[0]**2 #数字の二乗
    
    SEX = df['SEX'].iloc
    lin_SEX  = [(SEX[i] - 2 * t_list[1]) * SEX[i] for (i, _) in dup_list]
    quad_SEX  = [2*SEX[i] * SEX[j] for (i, j) in comb_list]
    num_SEX  = t_list[1]**2
    
    AOP = df['AOP'].iloc
    lin_AOP = [(AOP[i] - 2 * t_list[2]) * AOP[i] for (i, _) in dup_list]
    quad_AOP = [2*AOP[i] * AOP[j] for (i, j) in comb_list]
    num_AOP = t_list[2]**2
    
    #lin
    lin_list = [sum(lin) for lin in zip(lin_Y, lin_SEX, lin_AOP)]
    lin = {i: lin_list[i] for (i, _) in dup_list}
    
    #quad
    quad_values = [sum(quad) for quad in zip(quad_Y, quad_SEX, quad_AOP)]
    quad = {ij: quad_values[n] for (n, ij) in enumerate(comb_list)}
    
    #num
    num = num_Y + num_SEX + num_AOP
    
    return dimod.BinaryQuadraticModel(lin, quad, num, dimod.Vartype.BINARY)#dic, dic, num

In [5]:
def QA_ReverseAdvanceComposite(df, numreads, devi):
    valid_y_list = []
    valid_y_num= 0

    bqm = make_Hamiltonian(df)

    dw_sampler = DWaveCliqueSampler(
        endpoint="https://cloud.dwavesys.com/sapi",
        solver = 'DW_2000Q_6',
        token = "TOKY-1319d5c52b9aa35f34b40feba0cea58a4f5d3c09"
    )

    sampler_reverse = ReverseAdvanceComposite(dw_sampler)
    init_samples = {n: y for n, y in enumerate(df['Y'].values.tolist())}

    res = sampler_reverse.sample(bqm,
                                   anneal_schedules=[[[0.0, 1.0], [t, 0.5], [20, 1.0]] for t in (5, 10, devi)],
                                   initial_state=init_samples,
                                   num_reads=numreads,
                                   reinitialize_state=True)

    for canditate_y_info in list(res.record):
        if canditate_y_info[1]==0.:
            canditate_y = list(canditate_y_info[0])
            if len(valid_y_list)==0:
                valid_y_list.append(canditate_y)
                valid_y_num += 1
            elif all(canditate_y != p for p in valid_y_list): 
                valid_y_list.append(canditate_y)
                valid_y_num += 1
                
    return valid_y_list, valid_y_num

In [6]:
def QA_ReverseBatchStatesComposite(df, numreads, time_time):
    valid_y_list= []
    valid_y_num = 0

    bqm = make_Hamiltonian(df)

    dw_sampler = DWaveCliqueSampler(
        endpoint="https://cloud.dwavesys.com/sapi",
        solver = 'DW_2000Q_6',
        token = "TOKY-1319d5c52b9aa35f34b40feba0cea58a4f5d3c09"
    )

    sampler_reverse = ReverseBatchStatesComposite(dw_sampler)
    init_samples = {n: y for n, y in enumerate(df['Y'].values.tolist())}

    res = sampler_reverse.sample(bqm,
                                 initial_state=init_samples,
                                 anneal_schedule=[[0.0, 1.0], [time_time, 0.5], [20, 1.0]],
                                 num_reads=numreads,
                                 reinitialize_state=True)

    for canditate_y_info in list(res.record):
        if canditate_y_info[1]==0.:
            canditate_y = list(canditate_y_info[0])
            if len(valid_y_list)==0:
                valid_y_list.append(canditate_y)
                valid_y_num += 1
            elif all(canditate_y != p for p in valid_y_list): 
                valid_y_list.append(canditate_y)
                valid_y_num += 1
                
    return valid_y_list, valid_y_num

In [7]:
def time_measurement(df):
    sum_time = 0
    annealing_time = 20
    for t1 in range(0, sum(df['LI'])+1):
        timeit_repeat = timeit.repeat("make_Hamiltonian(df, t1)", number=1, repeat=1, globals={"make_Hamiltonian": make_Hamiltonian, "df": df, "t1": t1})
        sum_time += timeit_repeat[0] + annealing_time
    return sum_time

In [8]:
#==========
#テストコード
#==========
def test_find_valid_y():
    df = pd.read_csv('../../input/ost6.csv', sep=',', index_col=0)
    true_t1 = sum(df['Y'] * df['LI'])
    valid_y_list, valid_y_num = find_valid_y(df,  num_reads=20)
    print(valid_y_list, valid_y_num)
    assert valid_y_num[true_t1] > 0

In [9]:
def test_validity():
    df1 = pd.read_csv('../../input/ost6.csv', sep=',',index_col=0)
    df2 = pd.read_csv('../../input/ost6.csv', sep=',',index_col=0)
    new_y = np.array([1, 0, 0, 0, 0, 1])
    df2['Y'] = new_y
    t_list1 = calc_marginals(df1)
    t_list2 = calc_marginals(df2)
    print(t_list1)
    print(t_list2)
    assert np.all(t_list1[[0,2]] == t_list2[[0,2]]) 