# 不等式のQUBO変換を用いて基底エネルギー以下のサンプルを得る
- small data
- SA using Pyqubo

### import

In [1]:
import pandas as pd
import numpy as np
from sklearn.feature_selection import SelectKBest, f_regression
import random
from pyqubo import Array, OneHotEncInteger, solve_qubo

### 各変数

In [2]:
# X
X_ori = pd.read_csv("../input/df1.csv", sep=',', index_col=0)
X_ori

Unnamed: 0,A,B,C,D,E
0,-0.355886,0.74274,-1.179183,-0.129722,1.686046
1,2.24989,0.065733,-0.076507,-0.21311,1.962091
2,-2.172508,2.75988,0.39064,0.124017,-1.465474


In [3]:
# y
ori_y = pd.Series([1, 0, 1])
ori_y

0    1
1    0
2    1
dtype: int64

In [4]:
selected_col_num = 1

## 特徴量選択
 - select_cols, else_colsを決める

In [5]:
selector = SelectKBest(score_func=f_regression, k=selected_col_num) 
selector.fit(X_ori, ori_y)
mask = selector.get_support() 

In [6]:
select_cols = []
else_cols = []
for ii in range(len(mask)):
    if mask[ii] == 0:
        else_cols.append(X_ori.columns[ii])
    else:
         select_cols.append(X_ori.columns[ii])

select_cols

['A']

## すべての不等式が成り立つ：条件
したいこと→ハミルトニアンを最小化させる（xを最適化）

In [7]:
from decimal import *

In [8]:
#Xの要素をすべてintに置き換える
X = pd.DataFrame(index=[])
for col in X_ori.columns:
    X[col] = X_ori[col].map(lambda x: int(Decimal(str(x)).quantize(Decimal('0'), rounding=ROUND_HALF_UP)))
    
X

Unnamed: 0,A,B,C,D,E
0,0,1,-1,0,2
1,2,0,0,0,2
2,-2,3,0,0,-1


In [9]:
max_X_element = X.max(axis=1).max() 
min_X_element = X.min(axis=1).min() 

num_samples = X.shape[0]
num_cols = X.shape[1]

In [10]:
# W : ay - byの最大値

#xの要素で最大値とxの要素の最小値を知れば推定はできる
W = int((max_X_element *1*num_samples - min_X_element*1*num_samples)//1)

In [11]:
y = Array.create('y', shape=num_samples, vartype='BINARY')

In [12]:
m_list = []
for i in range(selected_col_num):
    for j in range(num_cols - selected_col_num):
        sel_list  = X[select_cols[i]].values.tolist()
        else_list  = X[else_cols[j]].values.tolist()
        m_list.append(y.dot(Array(sel_list)) - y.dot(Array(else_list)))

In [13]:
m_list

[((Binary(y[0])*Num(0))+(Binary(y[1])*Num(2))+(Binary(y[2])*Num(-2))+(((Binary(y[0])*Num(1))+(Binary(y[1])*Num(0))+(Binary(y[2])*Num(3)))*Num(-1))),
 ((Binary(y[0])*Num(0))+(Binary(y[1])*Num(2))+(Binary(y[2])*Num(-2))+(((Binary(y[0])*Num(-1))+(Binary(y[1])*Num(0))+(Binary(y[2])*Num(0)))*Num(-1))),
 ((Binary(y[0])*Num(0))+(Binary(y[1])*Num(2))+(Binary(y[2])*Num(-2))+(((Binary(y[0])*Num(0))+(Binary(y[1])*Num(0))+(Binary(y[2])*Num(0)))*Num(-1))),
 ((Binary(y[0])*Num(0))+(Binary(y[1])*Num(2))+(Binary(y[2])*Num(-2))+(((Binary(y[0])*Num(2))+(Binary(y[1])*Num(2))+(Binary(y[2])*Num(-1)))*Num(-1)))]

In [14]:
a = OneHotEncInteger("x", 1, W, strength = 1)
H_sum = sum((a-m)**2 for m in m_list)
model = H_sum.compile()

In [15]:
qubo, offset = model.to_qubo()

#PyQUBOによるSA
raw_solution = solve_qubo(qubo)

# 得られた結果をデコードする
decoded_solution, broken, energy = model.decode_solution(raw_solution, vartype="BINARY")
decoded_solution["y"] 

{0: 0, 1: 1, 2: 0}

## 出力結果の記録
- {0: 0, 1: 0, 2: 0}
- {0: 0, 1: 0, 2: 1}-SA不成功

In [16]:
decoded_solution

{'x': {0: 1,
  10: 0,
  11: 0,
  12: 0,
  13: 0,
  14: 0,
  1: 0,
  2: 0,
  3: 0,
  4: 0,
  5: 0,
  6: 0,
  7: 0,
  8: 0,
  9: 0},
 'y': {0: 0, 1: 1, 2: 0}}

In [17]:
broken

{}

In [18]:
energy 

4.0

#### 課題
すべての$x$を出力したいが、できていない

<br/>

### 元のyで求められるエネルギーとenergy を比較したい
- ori_H_sum : 元のyで求められるエネルギー

In [19]:
def make_ax_by(y):
    ax_by = []
    for i in range(selected_col_num):
        for j in range(num_cols - selected_col_num):
            ax_by.append(np.dot(X[select_cols[i]], y) - np.dot(X[else_cols[j]], y))
    return ax_by 

In [20]:
def H(ax_by, index_ax_by):
    z = [0]*W
        
    each_axby = ax_by[index_ax_by]
    sum_w_z = 0
    
    if each_axby > 0:
        z[int(each_axby)] = 1
        sum_w_z = sum([each_axby*z[w] for w in range(W)]) #正確には間違っているが、zの要素は1つ1がある以外全て0なので計算結果は合う
    else:
        z[0] = 1
        sum_w_z = sum([1*z[w] for w in range(W)]) #一番小さいHを得られるのは z[0] = 1であるため、こう設定した
        
    sum_z = sum(z)
    z_series = pd.Series(z)

    ans_h =  (1-sum_z)**2 + (sum_w_z -  int(each_axby))**2
    return ans_h,  z_series

In [21]:
#know ori_H_sum
ax_by = make_ax_by(ori_y)

ori_H_sum =  sum([H(ax_by, index_ax_by)[0] for index_ax_by in range(len(ax_by))])
ori_z_list =  [H(ax_by, index_ax_by)[1] for index_ax_by in range(len(ax_by))]
ori_z_df = pd.concat(ori_z_list, axis = 1)

print(ori_H_sum)
print([H(ax_by, index_ax_by)[0] for index_ax_by in range(len(ax_by))])
print(ori_z_df)

78
[49, 4, 9, 16]
    0  1  2  3
0   1  1  1  1
1   0  0  0  0
2   0  0  0  0
3   0  0  0  0
4   0  0  0  0
5   0  0  0  0
6   0  0  0  0
7   0  0  0  0
8   0  0  0  0
9   0  0  0  0
10  0  0  0  0
11  0  0  0  0
12  0  0  0  0
13  0  0  0  0
14  0  0  0  0


In [22]:
if ori_H_sum >= energy:
    print('SAは成功していると考えられる')

SAは成功していると考えられる


## SAで出したyが全ての不等式を満たしているかどうか

In [23]:
sol_y =  pd.Series([0, 1, 0])

In [24]:
sol_ax_by = make_ax_by(sol_y)
sol_ax_by 

[2, 2, 2, 0]

In [25]:
ok = 0
for  ii in sol_ax_by :
    if ii >= 0:
        ok += 1
if ok == len(sol_ax_by):
    print('SAで出したyは全ての不等式を満たしている')

SAで出したyは全ての不等式を満たしている


## 結果
- `SAは成功していると考えられる`, `SAで出したyは全ての不等式を満たしている`両方を満たしていない結果も出る