In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_boston

from zdistance.combos import VariableChooser
from zdistance.tools import z_distance, n_combos

In [2]:
boston = load_boston()
df = pd.DataFrame(boston.data, columns=boston.feature_names)
df['PRICE'] = boston.target
df.shape

(506, 14)

In [3]:
df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,PRICE
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


---

In [4]:
z_distance(df[['PRICE', 'INDUS', 'NOX', 'AGE', 'TAX', 'RAD']], 'PRICE')

1.1406268062748648

In [5]:
z_distance(df[['PRICE', 'DIS', 'RM', 'PTRATIO', 'LSTAT', 'TAX']], 'PRICE')

0.8277173487342929

In [6]:
n_combos(df.shape[1]-1, 3)

8100

---

In [7]:
vc = VariableChooser(df, 'PRICE')

In [23]:
vc.select_combo()

('NOX', 'RM', 'PTRATIO')

In [24]:
vc.solutions

[(('NOX', 'RM', 'PTRATIO'), 0.6238119419314533),
 (('INDUS', 'RM', 'PTRATIO'), 0.6404706015401547),
 (('CRIM', 'RM', 'PTRATIO'), 0.6509830803637401),
 (('CRIM', 'INDUS', 'RM'), 0.6967152482489248),
 (('CRIM', 'ZN', 'RM'), 0.7105515037430197),
 (('CRIM', 'ZN', 'INDUS'), 0.8360954056489769),
 ('init', inf)]

In [25]:
vc.select_combo(len_penalty=True)

('CRIM', 'ZN', 'INDUS', 'RM', 'PTRATIO', 'B', 'LSTAT')

In [26]:
vc.solutions

[(('CRIM', 'ZN', 'INDUS', 'RM', 'PTRATIO', 'B', 'LSTAT'), 0.8516304471644691),
 (('CRIM', 'INDUS', 'RM', 'PTRATIO', 'B', 'LSTAT'), 0.8599144540120842),
 (('CRIM', 'ZN', 'RM', 'TAX', 'PTRATIO', 'LSTAT'), 0.861060223522834),
 (('CRIM', 'ZN', 'INDUS', 'RM', 'PTRATIO', 'LSTAT'), 0.8615809706768566),
 (('CRIM', 'ZN', 'INDUS', 'RM', 'PTRATIO', 'B'), 0.8621542610047892),
 (('CRIM', 'INDUS', 'RM', 'PTRATIO', 'B'), 0.8671169574949262),
 (('CRIM', 'NOX', 'RM', 'PTRATIO'), 0.8741439579760025),
 (('CRIM', 'INDUS', 'RM', 'PTRATIO'), 0.8748363550900666),
 (('CRIM', 'ZN', 'RM', 'PTRATIO'), 0.9031066867475166),
 (('NOX', 'RM', 'PTRATIO'), 0.9058632239827353)]

In [27]:
vc.n_combos()

8100

In [28]:
vc.select_combo(indep_vars=['NOX', 'RM', 'PTRATIO', 'ZN', 'AGE'], minimum=4, maximum=4)

('NOX', 'RM', 'PTRATIO', 'ZN')