# 不等式のQUBO変換を用いて基底エネルギー以下のサンプルを得る
- small data
- SA using Pyqubo

### import

In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_selection import SelectKBest, f_regression
import random
from pyqubo import Array, OneHotEncInteger, solve_qubo

### 各変数

In [2]:
# X
matrix = np.random.randn(5,6)
X_ori= pd.DataFrame(matrix, columns=list('ABCDEF'))
X_ori

Unnamed: 0,A,B,C,D,E,F
0,0.479386,-0.958332,-0.660104,-0.034278,1.09715,0.568359
1,1.041824,0.841728,-0.188132,0.520696,-0.119567,-0.292868
2,0.910752,-0.438261,1.334449,0.991943,-0.124563,-0.156023
3,-0.269907,-1.960551,-0.340431,0.898961,-0.168632,0.988616
4,1.76523,1.110544,-0.074837,0.02845,-0.227094,1.258755


In [3]:
# y
ori_y = pd.Series([1, 0, 1, 1, 0])
ori_y

0    1
1    0
2    1
3    1
4    0
dtype: int64

In [4]:
selected_col_num = 1

## 特徴量選択
 - select_cols, else_colsを決める

In [5]:
selector = SelectKBest(score_func=f_regression, k=selected_col_num) 
selector.fit(X_ori, ori_y)
mask = selector.get_support() 

In [6]:
select_cols = []
else_cols = []
for ii in range(len(mask)):
    if mask[ii] == 0:
        else_cols.append(X_ori.columns[ii])
    else:
         select_cols.append(X_ori.columns[ii])

select_cols

['B']

## すべての不等式が成り立つ：条件
したいこと→ハミルトニアンを最小化させる（xを最適化）

In [7]:
from decimal import *

In [8]:
#Xの要素をすべてintに置き換える
X = pd.DataFrame(index=[])
for col in X_ori.columns:
    X[col] = X_ori[col].map(lambda x: int(Decimal(str(x)).quantize(Decimal('0'), rounding=ROUND_HALF_UP)))
    
X

Unnamed: 0,A,B,C,D,E,F
0,0,-1,-1,0,1,1
1,1,1,0,1,0,0
2,1,0,1,1,0,0
3,0,-2,0,1,0,1
4,2,1,0,0,0,1


In [9]:
max_X_element = X.max(axis=1).max() 
min_X_element = X.min(axis=1).min() 

num_samples = X.shape[0]
num_cols = X.shape[1]

In [10]:
# W : ay - byの最大値

#xの要素で最大値とxの要素の最小値を知れば推定はできる
W = int((max_X_element *1*num_samples - min_X_element*1*num_samples)//1)

In [11]:
y = Array.create('y', shape=num_samples, vartype='BINARY')

In [12]:
m_list = []
for i in range(selected_col_num):
    for j in range(num_cols - selected_col_num):
        sel_list  = X[select_cols[i]].values.tolist()
        else_list  = X[else_cols[j]].values.tolist()
        m_list.append(y.dot(Array(sel_list)) - y.dot(Array(else_list)))

In [13]:
m_list

[((Binary(y[0])*Num(-1))+(Binary(y[1])*Num(1))+(Binary(y[2])*Num(0))+(Binary(y[3])*Num(-2))+(Binary(y[4])*Num(1))+(((Binary(y[0])*Num(0))+(Binary(y[1])*Num(1))+(Binary(y[2])*Num(1))+(Binary(y[3])*Num(0))+(Binary(y[4])*Num(2)))*Num(-1))),
 ((Binary(y[0])*Num(-1))+(Binary(y[1])*Num(1))+(Binary(y[2])*Num(0))+(Binary(y[3])*Num(-2))+(Binary(y[4])*Num(1))+(((Binary(y[0])*Num(-1))+(Binary(y[1])*Num(0))+(Binary(y[2])*Num(1))+(Binary(y[3])*Num(0))+(Binary(y[4])*Num(0)))*Num(-1))),
 ((Binary(y[0])*Num(-1))+(Binary(y[1])*Num(1))+(Binary(y[2])*Num(0))+(Binary(y[3])*Num(-2))+(Binary(y[4])*Num(1))+(((Binary(y[0])*Num(0))+(Binary(y[1])*Num(1))+(Binary(y[2])*Num(1))+(Binary(y[3])*Num(1))+(Binary(y[4])*Num(0)))*Num(-1))),
 ((Binary(y[0])*Num(-1))+(Binary(y[1])*Num(1))+(Binary(y[2])*Num(0))+(Binary(y[3])*Num(-2))+(Binary(y[4])*Num(1))+(((Binary(y[0])*Num(1))+(Binary(y[1])*Num(0))+(Binary(y[2])*Num(0))+(Binary(y[3])*Num(0))+(Binary(y[4])*Num(0)))*Num(-1))),
 ((Binary(y[0])*Num(-1))+(Binary(y[1])*Num(1))+

In [14]:
a = OneHotEncInteger("x", 1, W, strength = 1)
H_sum = sum((a-m)**2 for m in m_list)
model = H_sum.compile()

In [15]:
qubo, offset = model.to_qubo()

#PyQUBOによるSA
raw_solution = solve_qubo(qubo)

# 得られた結果をデコードする
decoded_solution, broken, energy = model.decode_solution(raw_solution, vartype="BINARY")
decoded_solution["y"] 

{0: 0, 1: 0, 2: 0, 3: 0, 4: 0}

In [16]:
decoded_solution

{'x': {0: 0,
  10: 0,
  11: 0,
  12: 0,
  13: 0,
  14: 0,
  15: 0,
  16: 0,
  17: 0,
  18: 0,
  19: 0,
  1: 0,
  2: 0,
  3: 0,
  4: 0,
  5: 0,
  6: 0,
  7: 0,
  8: 0,
  9: 0},
 'y': {0: 0, 1: 0, 2: 0, 3: 0, 4: 0}}

In [17]:
broken

{'x_const': {'result': {'x[13]': 0,
   'x[6]': 0,
   'x[0]': 0,
   'x[7]': 0,
   'x[2]': 0,
   'x[4]': 0,
   'x[8]': 0,
   'x[11]': 0,
   'x[17]': 0,
   'x[1]': 0,
   'x[14]': 0,
   'x[12]': 0,
   'x[3]': 0,
   'x[5]': 0,
   'x[9]': 0,
   'x[10]': 0,
   'x[18]': 0,
   'x[16]': 0,
   'x[19]': 0,
   'x[15]': 0},
  'penalty': 1.0}}

In [18]:
energy 

15.0

#### 課題
すべての$x$を出力したいが、できていない

<br/>

### 元のyで求められるエネルギーとenergy を比較したい
- ori_H_sum : 元のyで求められるエネルギー

In [19]:
def make_ax_by(y):
    ax_by = []
    for i in range(selected_col_num):
        for j in range(num_cols - selected_col_num):
            ax_by.append(np.dot(X[select_cols[i]], y) - np.dot(X[else_cols[j]], y))
    return ax_by 

In [20]:
def H(ax_by, index_ax_by):
    z = [0]*W
        
    each_axby = ax_by[index_ax_by]
    sum_w_z = 0
    
    if each_axby > 0:
        z[int(each_axby)] = 1
        sum_w_z = sum([each_axby*z[w] for w in range(W)]) #正確には間違っているが、zの要素は1つ1がある以外全て0なので計算結果は合う
    else:
        z[0] = 1
        sum_w_z = sum([1*z[w] for w in range(W)]) #一番小さいHを得られるのは z[0] = 1であるため、こう設定した
        
    sum_z = sum(z)
    z_series = pd.Series(z)

    ans_h =  (1-sum_z)**2 + (sum_w_z -  int(each_axby))**2
    return ans_h,  z_series

In [21]:
#know ori_H_sum
ax_by = make_ax_by(ori_y)

ori_H_sum =  sum([H(ax_by, index_ax_by)[0] for index_ax_by in range(len(ax_by))])
ori_z_list =  [H(ax_by, index_ax_by)[1] for index_ax_by in range(len(ax_by))]
ori_z_df = pd.concat(ori_z_list, axis = 1)

print(ori_H_sum)
print([H(ax_by, index_ax_by)[0] for index_ax_by in range(len(ax_by))])
print(ori_z_df)

138
[25, 16, 36, 25, 36]
    0  1  2  3  4
0   1  1  1  1  1
1   0  0  0  0  0
2   0  0  0  0  0
3   0  0  0  0  0
4   0  0  0  0  0
5   0  0  0  0  0
6   0  0  0  0  0
7   0  0  0  0  0
8   0  0  0  0  0
9   0  0  0  0  0
10  0  0  0  0  0
11  0  0  0  0  0
12  0  0  0  0  0
13  0  0  0  0  0
14  0  0  0  0  0
15  0  0  0  0  0
16  0  0  0  0  0
17  0  0  0  0  0
18  0  0  0  0  0
19  0  0  0  0  0


In [22]:
if ori_H_sum >= energy:
    print('SAは成功していると考えられる')

SAは成功していると考えられる


## SAで出したyが全ての不等式を満たしているかどうか

In [25]:
sol_y =  pd.Series([0, 0, 0, 0, 0])

In [26]:
sol_ax_by = make_ax_by(sol_y)
sol_ax_by 

[0, 0, 0, 0, 0]

In [27]:
ok = 0
for  ii in sol_ax_by :
    if ii > 0:
        ok += 1
if ok == len(sol_ax_by):
    print('SAで出したyは全ての不等式を満たしている')