In [1]:
import pandas as pd 
import numpy as np
from scipy.special import comb
import math
from neal import SimulatedAnnealingSampler
import itertools
import random
import matplotlib.pyplot as plt
from dwave.system.samplers import DWaveSampler
from dwave.system.composites import AutoEmbeddingComposite
from dwave.embedding.chain_strength import uniform_torque_compensation
import dimod
import timeit
import time

In [2]:
def calc_marginals(df):                   
    return np.array([                      
        sum(df['Y']), #0
        np.dot(df['Y'], df['SEX']),      #1
        np.dot(df['Y'], df['AOP']),    #2  
    ])     

In [3]:
def make_Hamiltonian__(df):
    t_list = calc_marginals(df)
    N=len(df)
    dup_list = [(i, i) for i in range(N)]
    comb_list = [(i, j) for i in range(N) for j in range(i+1, N)]
    
    lin_Y = [1-2*t_list[0] for (i, _) in dup_list] #同じy同士
    quad_Y = [2 for (i, j) in comb_list] #異なるy同士
    num_Y = t_list[0]**2 #数字の二乗
    
    """SEX = df['SEX'].iloc
    lin_SEX  = [(SEX[i] - 2 * t_list[1]) * SEX[i] for (i, _) in dup_list]
    quad_SEX  = [2*SEX[i] * SEX[j] for (i, j) in comb_list]
    num_SEX  = t_list[1]**2"""
    
    AOP = df['AOP'].iloc
    lin_AOP = [(AOP[i] - 2 * t_list[2]) * AOP[i] for (i, _) in dup_list]
    quad_AOP = [2*AOP[i] * AOP[j] for (i, j) in comb_list]
    num_AOP = t_list[2]**2
    
    #lin
    #lin_list = [sum(lin) for lin in zip(lin_Y, lin_SEX, lin_AOP)]
    lin_list = [sum(lin) for lin in zip(lin_Y, lin_AOP)]
    lin = {i: lin_list[i] for (i, _) in dup_list}
    
    #quad
    #quad_values = [sum(quad) for quad in zip(quad_Y, quad_SEX, quad_AOP)]
    quad_values = [sum(quad) for quad in zip(quad_Y, quad_AOP)]
    quad = {ij: quad_values[n] for (n, ij) in enumerate(comb_list)}
    
    #num
    num = num_Y + num_SEX + num_AOP
    
    return dimod.BinaryQuadraticModel(lin, quad, num, dimod.Vartype.BINARY)#dic, dic, num

In [4]:
def make_Hamiltonian(df):
    t_list = calc_marginals(df)
    N=len(df)
    dup_list = [(i, i) for i in range(N)]
    comb_list = [(i, j) for i in range(N) for j in range(i+1, N)]
    
    lin_Y = [1-2*t_list[0] for (i, _) in dup_list] #同じy同士
    quad_Y = [2 for (i, j) in comb_list] #異なるy同士
    num_Y = t_list[0]**2 #数字の二乗
    
    SEX = df['SEX'].iloc
    lin_SEX  = [(SEX[i] - 2 * t_list[1]) * SEX[i] for (i, _) in dup_list]
    quad_SEX  = [2*SEX[i] * SEX[j] for (i, j) in comb_list]
    num_SEX  = t_list[1]**2
    
    AOP = df['AOP'].iloc
    lin_AOP = [(AOP[i] - 2 * t_list[2]) * AOP[i] for (i, _) in dup_list]
    quad_AOP = [2*AOP[i] * AOP[j] for (i, j) in comb_list]
    num_AOP = t_list[2]**2
    
    #lin
    lin_list = [sum(lin) for lin in zip(lin_Y, lin_SEX, lin_AOP)]
    lin = {i: lin_list[i] for (i, _) in dup_list}
    
    #quad
    quad_values = [sum(quad) for quad in zip(quad_Y, quad_SEX, quad_AOP)]
    quad = {ij: quad_values[n] for (n, ij) in enumerate(comb_list)}
    
    #num
    num = num_Y + num_SEX + num_AOP
    
    return dimod.BinaryQuadraticModel(lin, quad, num, dimod.Vartype.BINARY)#dic, dic, num

In [5]:
def find_valid_y_0(df, num_reads):                                                        
    # dimod sampler
    dw_sampler = DWaveSampler(
        endpoint="https://cloud.dwavesys.com/sapi",
        solver = 'DW_2000Q_6',
        token = "TOKY-1319d5c52b9aa35f34b40feba0cea58a4f5d3c09",
        retry_interval=5
    )
    # インスタンス作成
    qa_sampler = AutoEmbeddingComposite(dw_sampler)
                                                                                                                                                                                                                                               
    #bqm
    bqm = make_Hamiltonian(df)

    #method
    res = qa_sampler.sample(bqm, num_reads=num_reads)
    #print(res)
    
    valid_y_list = []                                                           
    valid_y_num = 0
    print(res)
    for y_info in list(res.record):
        print(y_info)
        if y_info[1] == 0.0:
            valid_y_num += 1                                        
            valid_y_list.append(list(y_info[0]))
                                                      
    return valid_y_list, valid_y_num

In [6]:
def find_valid_y_1(df, num_reads, chain_break_fraction):                                                        
    # dimod sampler
    dw_sampler = DWaveSampler(
        endpoint="https://cloud.dwavesys.com/sapi",
        solver = 'DW_2000Q_6',
        token = "TOKY-1319d5c52b9aa35f34b40feba0cea58a4f5d3c09",
        retry_interval=5
    )
    # インスタンス作成
    qa_sampler = AutoEmbeddingComposite(dw_sampler)
                                                                                                                                                                                                                                               
    #bqm
    bqm = make_Hamiltonian(df)

    #method
    res = qa_sampler.sample(bqm, chain_break_fraction=chain_break_fraction, num_reads=num_reads)
    #print(res)
    
    valid_y_list = []                                                           
    valid_y_num = 0                                                             
    for y_info in list(res.record):
        if y_info[1] == 0.0:
            valid_y_num += 1                                        
            valid_y_list.append(list(y_info[0]))
                                                      
    return valid_y_list, valid_y_num

In [None]:
dw_sampler = DWaveSampler(
        endpoint="https://cloud.dwavesys.com/sapi",
        solver = 'DW_2000Q_6',
        token = "TOKY-1319d5c52b9aa35f34b40feba0cea58a4f5d3c09",
        retry_interval=5
    )
# インスタンス作成
qa_sampler = AutoEmbeddingComposite(dw_sampler)

In [7]:
def find_valid_y_01(df, num_reads):                                                        
    #bqm
    bqm = make_Hamiltonian(df)

    #method
    res = qa_sampler.sample(bqm, chain_strength = 9, chain_break_fraction=True, num_reads=num_reads)
    #print(res.data(['energy', 'num_occurrences', 'chain_break_fraction']))
    for energy, num_occurrences, chain_break_fraction in res.data(['energy', 'num_occurrences', 'chain_break_fraction']):
        print(energy, num_occurrences, chain_break_fraction)
        
    
    valid_y_list = []                                                           
    valid_y_num = 0    

    for sample, energy in res.data(['sample', 'energy']):
        print(sample)
        if energy == 0.0:
            valid_y_num += 1                                        
            valid_y_list.append(list(sample.values()))
                                                      
    return valid_y_list, valid_y_num
                                                                                                                                                                                                                                               
    

In [9]:
def find_valid_y_2(df, num_reads, chain_strength, chain_break_fraction):                                                        
    # dimod sampler
    dw_sampler = DWaveSampler(
        endpoint="https://cloud.dwavesys.com/sapi",
        solver = 'DW_2000Q_6',
        token = "TOKY-1319d5c52b9aa35f34b40feba0cea58a4f5d3c09",
        retry_interval=5
    )
    # インスタンス作成
    qa_sampler = AutoEmbeddingComposite(dw_sampler)
                                                                                                                                                                                                                                               
    #bqm
    bqm = make_Hamiltonian(df)

    #method
    res = qa_sampler.sample(bqm, chain_strength = chain_strength, chain_break_fraction=chain_break_fraction, num_reads=num_reads)
    #print(res)
    
    valid_y_list = []                                                           
    valid_y_num = 0                                                             
    for y_info in list(res.record):
        if y_info[1] == 0.0:
            valid_y_num += 1                                        
            valid_y_list.append(list(y_info[0]))
                                                      
    return valid_y_list, valid_y_num

In [10]:
def find_valid_y_3(df, num_reads, max_chain_strength, chain_break_fraction):                                                        
    sum_time = 0
    time_0 = time.time()   
    # dimod sampler
    dw_sampler = DWaveSampler(
        endpoint="https://cloud.dwavesys.com/sapi",
        solver = 'DW_2000Q_6',
        token = "TOKY-1319d5c52b9aa35f34b40feba0cea58a4f5d3c09",
        retry_interval=5
    )
    # インスタンス作成
    qa_sampler = AutoEmbeddingComposite(dw_sampler)
    
    t_list = calc_marginals(df)
    valid_y_list = []                                                           
    valid_y_num = 0     
    
    bqm = make_Hamiltonian(df)
    time_1 = time.time()
    sum_time = time_1 - time_0
    
    for chain_strength in range(0, max_chain_strength):
        res = qa_sampler.sample(
            bqm, chain_strength = chain_strength, 
            chain_break_fraction=chain_break_fraction, num_reads=num_reads)
        
        time_2 = time.time()
        for y_info in list(res.record):
            if y_info[1] == 0.:
                valid_y = list(y_info[0])
                if len(valid_y_list)==0:
                    valid_y_num += 1                                        
                    valid_y_list.append(valid_y)

                elif all([valid_y != p for p in valid_y_list]):
                    valid_y_num += 1                                    
                    valid_y_list.append(valid_y)
        time_3 =  time.time()
        sum_time += time_3 - time_2 + 20*10**-6
        
    return valid_y_list, valid_y_num, sum_time

In [11]:
def find_valid_y_4(df, num_reads, max_chain_strength, chain_break_fraction):                                                        
    # dimod sampler
    dw_sampler = DWaveSampler(
        endpoint="https://cloud.dwavesys.com/sapi",
        solver = 'DW_2000Q_6',
        token = "TOKY-1319d5c52b9aa35f34b40feba0cea58a4f5d3c09",
        retry_interval=5
    )
    # インスタンス作成
    qa_sampler = AutoEmbeddingComposite(dw_sampler)
    
    t_list = calc_marginals(df)
    valid_y_list = {}                                                                   
    valid_y_num = {}
    
    bqm = make_Hamiltonian(df)

    for chain_strength in range(0, max_chain_strength):
        res = qa_sampler.sample(
            bqm, chain_strength = chain_strength, 
            chain_break_fraction=chain_break_fraction, num_reads=num_reads)
        print(res)
        
        valid_y_list[chain_strength] = []                                                           
        valid_y_num[chain_strength] = 0                                                             
        for y_info in list(res.record):
            if y_info[1] == 0.0:
                valid_y = list(y_info[0])
                if len(valid_y_list)==0:
                    valid_y_num[chain_strength] += 1                                        
                    valid_y_list[chain_strength].append(valid_y)

                elif all([valid_y != p for p in valid_y_list[chain_strength]]):
                    valid_y_num[chain_strength] += 1                                    
                    valid_y_list[chain_strength].append(valid_y)
                                                      
    return valid_y_list, valid_y_num

In [12]:
def y_num_hist1(df, num_reads, chain_break_fraction, path):
    # dimod sampler
    dw_sampler = DWaveSampler(
        endpoint="https://cloud.dwavesys.com/sapi",
        solver = 'DW_2000Q_6',
        token = "TOKY-1319d5c52b9aa35f34b40feba0cea58a4f5d3c09",
        retry_interval=5
    )
    # インスタンス作成
    qa_sampler = AutoEmbeddingComposite(dw_sampler)
    
    t_list = calc_marginals(df)
    valid_y_list = []                                                           
    valid_y_num = 0   
    hist_dic = {}
    LI = list(df['LI'])
    
    bqm = make_Hamiltonian(df)
    res = qa_sampler.sample(
        bqm, 
        chain_break_fraction=chain_break_fraction, 
        num_reads=num_reads
    )

    for y_info in list(res.record):
        if y_info[1]==0.:
            valid_y = list(y_info[0])
            valid_y_list.append(valid_y)
            valid_y_num += 1
            t1 = int(np.dot(LI, valid_y))
            if t1 in hist_dic.keys():
                hist_dic[t1] += 1
            else:
                hist_dic[t1] = 1
            
    plt.xlabel('value of t1')
    plt.ylabel('number of samples')
    plt.bar(hist_dic.keys(), hist_dic.values())
    plt.savefig(path)
    plt.show()
    
    return valid_y_list, valid_y_num, hist_dic

In [13]:
def y_num_hist3(df, num_reads, max_chain_strength, chain_break_fraction, path):
    # dimod sampler
    dw_sampler = DWaveSampler(
        endpoint="https://cloud.dwavesys.com/sapi",
        solver = 'DW_2000Q_6',
        token = "TOKY-1319d5c52b9aa35f34b40feba0cea58a4f5d3c09",
        retry_interval=5
    )
    # インスタンス作成
    qa_sampler = AutoEmbeddingComposite(dw_sampler)
    
    t_list = calc_marginals(df)
    valid_y_list = []                                                           
    valid_y_num = 0   
    hist_dic = {}
    LI = list(df['LI'])
    
    bqm = make_Hamiltonian(df)
    for chain_strength in range(0, max_chain_strength):
        res = qa_sampler.sample(
            bqm, chain_strength = chain_strength, 
            chain_break_fraction=chain_break_fraction, num_reads=num_reads
        )
    
        for y_info in list(res.record):
            if y_info[1]==0.:
                valid_y = list(y_info[0])
                valid_y_list.append(valid_y)
                valid_y_num += 1
                t1 = int(np.dot(LI, valid_y))
                if t1 in hist_dic.keys():
                    hist_dic[t1] += 1
                else:
                    hist_dic[t1] = 1
            
    plt.xlabel('value of t1')
    plt.ylabel('number of samples')
    plt.bar(hist_dic.keys(), hist_dic.values())
    plt.savefig(path)
    plt.show()
    
    return valid_y_list, valid_y_num, hist_dic

In [14]:
def y_num_hist1(df, num_reads, chain_break_fraction, path):
    # dimod sampler
    dw_sampler = DWaveSampler(
        endpoint="https://cloud.dwavesys.com/sapi",
        solver = 'DW_2000Q_6',
        token = "TOKY-1319d5c52b9aa35f34b40feba0cea58a4f5d3c09",
        retry_interval=5
    )
    # インスタンス作成
    qa_sampler = AutoEmbeddingComposite(dw_sampler)
    
    t_list = calc_marginals(df)
    valid_y_list = []                                                           
    valid_y_num = 0   
    hist_dic = {}
    LI = list(df['LI'])
    
    bqm = make_Hamiltonian(df)
    res = qa_sampler.sample(
        bqm, 
        chain_break_fraction=chain_break_fraction, 
        num_reads=num_reads
    )

    for y_info in list(res.record):
        if y_info[1]==0.:
            valid_y = list(y_info[0])
            valid_y_list.append(valid_y)
            valid_y_num += 1
            t1 = int(np.dot(LI, valid_y))
            if t1 in hist_dic.keys():
                hist_dic[t1] += 1
            else:
                hist_dic[t1] = 1
            
    plt.xlabel('value of t1')
    plt.ylabel('number of samples')
    plt.bar(hist_dic.keys(), hist_dic.values())
    plt.savefig(path)
    plt.show()
    
    return valid_y_list, valid_y_num, hist_dic

In [15]:
def time_num_y3(df, num_reads, max_chain_strength, chain_break_fraction, path):
    time_list = []
    time_0 = time.time() 
    
    initial_states = df['Y'].values.tolist()
    t_list = calc_marginals(df)
    
    valid_y_list= []                                                           
    valid_y_num= 0
    bqm = make_Hamiltonian(df)
    
    # dimod sampler
    dw_sampler = DWaveSampler(
        endpoint="https://cloud.dwavesys.com/sapi",
        solver = 'DW_2000Q_6',
        token = "TOKY-1319d5c52b9aa35f34b40feba0cea58a4f5d3c09", 
        retry_interval=5
    )
    # インスタンス作成
    qa_sampler = AutoEmbeddingComposite(dw_sampler)
    
    for chain_strength in range(0, max_chain_strength):
        res = qa_sampler.sample(
            bqm, chain_strength = chain_strength, 
            chain_break_fraction=chain_break_fraction, num_reads=num_reads)
                                                       
        for y_info in list(res.record):
            if y_info[1]==0.:
                if len(valid_y_list)==0:
                    valid_y_list.append(list(y_info[0]))
                    valid_y_num += 1
                    time_1 = time.time()
                    elapsed_time = time_1 - time_0
                    time_list.append(elapsed_time)

                elif all(list(y_info[0]) != p for p in valid_y_list): 
                    valid_y_list.append(list(y_info[0]))
                    valid_y_num += 1
                    time_1 = time.time()
                    elapsed_time = time_1 - time_0
                    time_list.append(elapsed_time)
    
    valid_y_num_list = [i for i in range(1, valid_y_num+1)]
    
    plt.xlabel('time')
    plt.ylabel('number of hits')
    plt.plot(time_list, valid_y_num_list)
    plt.savefig(path)
    plt.show()
    
    return valid_y_list, valid_y_num_list, time_list         

In [16]:
def time_num_y1(df, num_reads, max_chain_strength, chain_break_fraction, path):
    time_list = []
    time_0 = time.time() 
    
    initial_states = df['Y'].values.tolist()
    t_list = calc_marginals(df)
    
    valid_y_list= []                                                           
    valid_y_num= 0
    bqm = make_Hamiltonian(df)
    
    # dimod sampler
    dw_sampler = DWaveSampler(
        endpoint="https://cloud.dwavesys.com/sapi",
        solver = 'DW_2000Q_6',
        token = "TOKY-1319d5c52b9aa35f34b40feba0cea58a4f5d3c09",
        retry_interval=5
    )
    # インスタンス作成
    qa_sampler = AutoEmbeddingComposite(dw_sampler)
    
    
    res = qa_sampler.sample(
        bqm, 
        chain_break_fraction=chain_break_fraction, 
        num_reads=num_reads)

    for y_info in list(res.record):
        if y_info[1]==0.:
            if len(valid_y_list)==0:
                valid_y_list.append(list(y_info[0]))
                valid_y_num += 1
                time_1 = time.time()
                elapsed_time = time_1 - time_0
                time_list.append(elapsed_time)

            elif all(list(y_info[0]) != p for p in valid_y_list): 
                valid_y_list.append(list(y_info[0]))
                valid_y_num += 1
                time_1 = time.time()
                elapsed_time = time_1 - time_0
                time_list.append(elapsed_time)
    
    valid_y_num_list = [i for i in range(1, valid_y_num+1)]
    
    plt.xlabel('time')
    plt.ylabel('number of hits')
    plt.plot(time_list, valid_y_num_list)
    plt.savefig(path)
    plt.show()
    
    return valid_y_list, valid_y_num_list, time_list       

In [17]:
def p_value_transition(df, num_reads, max_chain_strength, chain_break_fraction, path) :
    # dimod sampler
    dw_sampler = DWaveSampler(
        endpoint="https://cloud.dwavesys.com/sapi",
        solver = 'DW_2000Q_6',
        token = "TOKY-1319d5c52b9aa35f34b40feba0cea58a4f5d3c09",
        retry_interval=5
    )
    # インスタンス作成
    qa_sampler = AutoEmbeddingComposite(dw_sampler)
    
    initial_states = df['Y'].values.tolist()
    t_list = calc_marginals(df)
    t1 = int(np.dot(df['Y'], df['LI']))
    t1_y = 0
    p_dic = {}                                                
    valid_y_num= 0
    valid_y_list = []
    bqm = make_Hamiltonian(df)
    
    for chain_strength in range(0, max_chain_strength):
        res = qa_sampler.sample(
            bqm, chain_strength = chain_strength, 
            chain_break_fraction=chain_break_fraction, num_reads=num_reads)
    
    
        for y_info in list(res.record):
            if y_info[1]==0.:
                valid_y = list(y_info[0]) 
                if all(valid_y != p for p in valid_y_list):
                    valid_y_num += 1
                    valid_y_list.append(valid_y)
                    if int(np.dot(valid_y, list(df['LI'])))==t1:
                        t1_y += 1
                        p_dic[valid_y_num] = t1_y/valid_y_num
                    
    plt.xlabel('number of hits')
    plt.ylabel('p value')
    plt.plot(p_dic.keys(), p_dic.values())
    plt.savefig(path)
    plt.show()
    
    return valid_y_num, valid_y_list, p_dic

In [18]:
def test_validity():
    df1 = pd.read_csv('../../input/ost16.csv', sep=',',index_col=0)
    df2 = pd.read_csv('../../input/ost16.csv', sep=',',index_col=0)
    new_y = np.array([0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1])
    df2['Y'] = new_y
    t_list1 = calc_marginals(df1)
    t_list2 = calc_marginals(df2)
    print(t_list1)
    print(t_list2)
    assert np.all(t_list1[[0,2]] == t_list2[[0,2]])

In [19]:
def time_measurement(df, max_chain_strength):
    annealing_times = sum([20*chain_strength for chain_strength in range(0, max_chain_strength)])
    time_once = timeit.timeit("make_Hamiltonian(df)", globals=globals(), number=1)
    sum_time = annealing_times + time_once
    return sum_time

In [20]:
#==========
#テストコード
#==========
def test_find_valid_y():
    df = pd.read_csv('../../input/ost6.csv', sep=',', index_col=0)
    true_t1 = sum(df['Y'] * df['LI'])
    valid_y_list, valid_y_num = find_valid_y(df,  num_reads=20)
    print(valid_y_list, valid_y_num)
    assert valid_y_num[true_t1] > 0

In [21]:
def test_validity():
    df1 = pd.read_csv('../../input/ost6.csv', sep=',',index_col=0)
    df2 = pd.read_csv('../../input/ost6.csv', sep=',',index_col=0)
    new_y = np.array([1, 0, 0, 0, 0, 1])
    df2['Y'] = new_y
    t_list1 = calc_marginals(df1)
    t_list2 = calc_marginals(df2)
    print(t_list1)
    print(t_list2)
    assert np.all(t_list1[[0,2]] == t_list2[[0,2]])