In [None]:
import logging
import sys
import os
%cd ..
from bds.meel import approx_mc2, approx_mc2_core
from bds.rule import Rule
from bds.utils import bin_random , bin_array, randints, solutions_to_dict

#from bds.bb import BranchAndBoundNaive, BranchAndBoundV1, BranchAndBoundV0, BranchAndBoundV2
from logzero import logger
import gmpy2 as gmp
from gmpy2 import mpz , mpfr
from bds.sat.min_freq import construct_min_freq_program
from bds.sat.solver import construct_solver
from bds.gf2 import GF
from bds.rule import Rule
from bds.utils import mpz_set_bits
from tests.utils import assert_dict_allclose, assert_close_mpfr

#from bds.bounds_utils import find_equivalence_classes
import numpy as np 

from bds.cbb_v2 import * 

logger.setLevel(logging.DEBUG)

num_pts = 500

show_progres = True
rand_seed = 1234

In [None]:
from time import time

#num_rules_list = reversed([25, 50, 75, 100, 200])
ub = 0.7
lmbd = 0.1
res_rows = []
all_times_naive = []
all_times_V1 = [] 
all_times_V0 = [] 

for num_rules in [5]:
    
    rules = [Rule.random(i+1, num_pts) for i in range(num_rules)]
    y = bin_random(num_pts)
    
    start_time = time() 
    bbNaive = BranchAndBoundNaive(rules, ub=ub, y=y, lmbd=lmbd)
    feasible_solutions = list(bbNaive.run(return_objective=True))
    all_times_naive.append(time() - start_time) 
    actualNaive = solutions_to_dict(feasible_solutions)
    
    
    start_time = time() 
    bbV0 = BranchAndBoundV0(rules, ub=ub, y=y, lmbd=lmbd)
    feasible_solutions = list(bbV0.run(return_objective=True))
    all_times_V0.append(time() - start_time)
    actual_V0 = solutions_to_dict(feasible_solutions)

    
    
    start_time = time() 
    bbV1 = BranchAndBoundV1(rules, ub=ub, y=y, lmbd=lmbd)
    feasible_solutions = list(bbV1.run(return_objective=True))
    all_times_V1.append(time() - start_time)
    actual_V1 = solutions_to_dict(feasible_solutions)
    

In [None]:
os.chdir("/u/50/ciaperm1/unix/Desktop/sampling-rashomon-decision-set-code")

In [None]:
import matplotlib.pyplot as plt 

colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f']

# plot 
fig, ax = plt.subplots(1, 1, figsize=(6.38, 3.7))
plt.plot([10, 25, 40], all_times_naive, label="HLB", marker = "o", linewidth=1.5 , color = colors[0], markersize = 12 , markeredgewidth=0.5, markeredgecolor= "grey"  , alpha = 0.5) 
plt.plot([10, 25, 40], all_times_V0, label="RSSB", marker = "^",  linewidth=1.5 ,  color = colors[1],  markersize = 12 , markeredgewidth=0.5, markeredgecolor= "grey"  , alpha = 0.5) 
plt.plot([10, 25, 40], all_times_V1, label="RSSB + EPLB", marker = "s", linewidth=1.5 ,  color = colors[2],  markersize = 12 , markeredgewidth=0.5, markeredgecolor= "grey"  , alpha = 0.5) 
plt.yscale("log") 
plt.title("RSSB VS HLB VS RSSB + EPLB") 
plt.xlabel("# Rules")
plt.legend()
plt.grid()
plt.tight_layout() 
plt.savefig("plots/log_first_comparison_bounds" + ".pdf")
plt.show()


fig, ax = plt.subplots(1, 1, figsize=(6.38, 3.7))
plt.plot([10, 25, 40], all_times_naive, label="HLB", marker = "o", linewidth=1.5 , color = colors[0],  markersize = 12 , markeredgewidth=0.5, markeredgecolor= "grey"  , alpha = 0.5) 
plt.plot([10, 25, 40], all_times_V0, label="RSSB", marker = "^", linewidth=1.5 , color = colors[1],   markersize = 12 , markeredgewidth=0.5, markeredgecolor= "grey"  , alpha = 0.5) 
plt.plot([10, 25, 40], all_times_V1, label="RSSB + EPLB",  marker = "s", linewidth=1.5 ,  color = colors[2],   markersize = 12 , markeredgewidth=0.5, markeredgecolor= "grey"  , alpha = 0.5) 
plt.title("RSSB VS HLB VS RSSB + EPLB") 
plt.xlabel("# Rules")
plt.legend()
plt.grid()
plt.tight_layout() 
plt.savefig("plots/first_comparison_bounds" + ".pdf" )
plt.show()

#### Check same results 

In [None]:
from time import time

#num_rules_list = reversed([25, 50, 75, 100, 200])
ub = 0.7
lmbd = 0.1
res_rows = []
all_times_naive = []
all_times_V1 = [] 
all_times_V0 = [] 

for num_rules in [10]:
    
    rules = [Rule.random(i+1, num_pts) for i in range(num_rules)]
    y = bin_random(num_pts)
    
    start_time = time() 
    bbNaive = BranchAndBoundNaive(rules, ub=ub, y=y, lmbd=lmbd)
    feasible_solutions = list(bbNaive.run(return_objective=True))
    all_times_naive.append(time() - start_time) 
    actualNaive = solutions_to_dict(feasible_solutions)
    
    
    start_time = time() 
    bbV0 = BranchAndBoundV0(rules, ub=ub, y=y, lmbd=lmbd)
    feasible_solutions = list(bbV0.run(return_objective=True))
    all_times_V0.append(time() - start_time)
    actual_V0 = solutions_to_dict(feasible_solutions)
    
    assert_dict_allclose(actualNaive, actual_V0)
    
    start_time = time() 
    bbV1 = BranchAndBoundV1(rules, ub=ub, y=y, lmbd=lmbd)
    feasible_solutions = list(bbV1.run(return_objective=True))
    all_times_V1.append(time() - start_time)
    actual_V1 = solutions_to_dict(feasible_solutions)
    
    assert_dict_allclose(actualNaive, actual_V1)

In [None]:
actualNaive


### try real world data 

In [1]:
from sklearn import datasets 
import logging
import sys
import os
%cd ..
from bds.meel import approx_mc2, approx_mc2_core
from bds.rule import Rule
from bds.utils import bin_random , bin_array, randints, solutions_to_dict

#from bds.bb import BranchAndBoundNaive, BranchAndBoundV1, BranchAndBoundV0, BranchAndBoundV2
from logzero import logger
import gmpy2 as gmp
from gmpy2 import mpz , mpfr
from bds.sat.min_freq import construct_min_freq_program
from bds.sat.solver import construct_solver
from bds.gf2 import GF
from bds.rule import Rule
from bds.utils import mpz_set_bits
from tests.utils import assert_dict_allclose, assert_close_mpfr

#from bds.bounds_utils import find_equivalence_classes
import numpy as np 

#from bds.cbb_v2 import * 

logger.setLevel(logging.DEBUG)

num_pts = 500

show_progres = True
rand_seed = 1234

/u/50/ciaperm1/unix/sampling-rashomon-decision-set-code


In [2]:
import pandas as pd

In [3]:
# Load the CSV file using Pandas
data = pd.read_csv('data/compas_test-binary.csv')

# Print the data
data.head()

Unnamed: 0,sex:Male,age:18-20,age:21-22,age:23-25,age:26-45,age:>45,juvenile-felonies:>0,juvenile-misdemeanors:>0,juvenile-crimes:>0,priors:2-3,priors:=0,priors:=1,priors:>3,current-charge-degree:Misdemeanor,recidivate-within-two-years:1
0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,0
1,1,0,0,0,1,0,0,0,0,0,0,0,1,1,1
2,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1
3,1,0,0,1,0,0,0,0,0,1,0,0,0,0,1
4,1,0,0,0,1,0,0,0,0,0,1,0,0,1,0


In [4]:
data.shape

(721, 15)

In [5]:
X = data.to_numpy()

In [6]:
X.shape

(721, 15)

In [7]:
# read labels 
f = open("data/compas_test.label", "r")
labels = []
for row in f.readlines(): 
    labels.append(list(map(int, row.split(" ")[1:])))

In [8]:
y = np.array(labels[1], dtype = bool) 

In [9]:
assert len(y) == X.shape[0]

In [10]:
# now we need to extract rules 
X_bag = [set([j for j, x in enumerate(row) if x]) for row in X]

In [11]:
from bds.fpgrowth import preprocess_transaction_list, build_fptree, fpgrowth_on_tree

In [12]:
lmbd = 0.1
min_support = lmbd * X.shape[0] / 2

In [13]:
ordered_input_data = preprocess_transaction_list(
    X_bag, min_support
)
tree = build_fptree(ordered_input_data)
frequent_itemsets = set(fpgrowth_on_tree(tree, set(), min_support))

In [14]:
len(frequent_itemsets)

83

In [15]:
from bds.rule import Rule
from time import time

def compute_truthtable(X,itemset): 
    ''' poorly optimized function '''
    support_list = [] 
    for i in range(X.shape[0]): 
        if sum([X[i][j] for j in itemset]) == len(itemset): 
            support_list.append(i)
                
    truthtable = mpz_set_bits(mpz(), support_list)
    
    return truthtable
    

In [16]:
# Now create rules 
rules = [] 
for i,itemset in enumerate(frequent_itemsets):
    tt = compute_truthtable(X,itemset)
    this_Rule = Rule(id = i+1, name = "rule_" + str(i), cardinality=len(itemset), truthtable=tt)
    # we need the truthtable for this itemset 
    rules.append(this_Rule) 
    

In [17]:
import pickle 
pickle.dump(rules, open("rules_compas_" + str(len(rules)), "wb" ))

In [18]:
from bds.bbSampleTree import BranchAndBoundNaive
import time 
import random

In [19]:
lmbd = 0.1
ub = 0.5
bb = BranchAndBoundNaive(rules, ub, y, lmbd, l=4, k=150) 
ll = list(bb.runST())
ll

[D 230702 23:43:46 bbSampleTree:60] calling BranchAndBoundNaive with ub=0.5, lmbd=0.1


[({0, 12, 40, 44, 46},
  (Node(rule_id=46, lower_bound=0.4943134535367545, objective=0.4943134535367545),
   mpz(3125306672532590281242601330083320625139072548849475009812709706423058279332502941362627212235171556652063788448342441743418153884534911089387409341473586327218915720661265948417115949989929421486246416609999020563473)),
  0.4943134535367545),
 ({0, 12, 40, 65, 68},
  (Node(rule_id=65, lower_bound=0.4388349514563107, objective=0.4388349514563107),
   mpz(3211499608801524945890741444623303939446306800928117630649172097681368798371474586664965383039343198783509965207847258372215735609721925869834424573208620501319560568415214220708061823377636416469135306435910444719185)),
  0.4388349514563107),
 ({0, 12, 40, 67},
  (Node(rule_id=67, lower_bound=0.32635228848821085, objective=0.32635228848821085),
   mpz(3211499608801524946040243498781548032370740712103763744946161245990531171152132185665757108798383234191138904203513130624796117092307105583196452246849769684379327246988419517

In [20]:
from bds.bbSampleTreeApproxCounting import BranchAndBoundNaive
import time 
import random

In [21]:
lmbd = 0.1
ub = 0.5
bb = BranchAndBoundNaive(rules, ub, y, lmbd, l=4, k=150) 
Z = bb.runST()


[D 230702 23:43:46 bbSampleTreeApproxCounting:60] calling BranchAndBoundNaive with ub=0.5, lmbd=0.1


In [22]:
Z

162.0

In [23]:
import pandas as pd
import ray
import logging
from bds.meel import approx_mc2, approx_mc2_core
from bds.bb import get_ground_truth_count
from bds.rule import Rule
from bds.utils import bin_random, randints
from logzero import logger

import numpy as np
from contexttimer import Timer

logger.setLevel(logging.WARN)

num_pts = 1000

ub = 0.8
lmbd = .1
delta = .8
eps = .8
show_progres = True
rand_seed = 1234

ray.init(num_cpus=4)

2023-07-02 23:43:51,960	INFO worker.py:1636 -- Started a local Ray instance.


0,1
Python version:,3.9.16
Ray version:,2.5.0


In [24]:
test_cnt = approx_mc2(
                rules, y, lmbd=lmbd, ub=ub,
                delta=delta, eps=eps, rand_seed=rand_seed, show_progress=show_progres,
                parallel=True, log_level=logging.WARN
            )

  0%|                                                     | 0/4 [00:00<?, ?it/s]

[2m[36m(approx_mc2_core_wrapper pid=531589)[0m rand_seed: 822569775
[2m[36m(approx_mc2_core_wrapper pid=531590)[0m rand_seed: 524453158[32m [repeated 2x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/ray-logging.html#log-deduplication for more options.)[0m


[2m[33m(raylet)[0m [2023-07-02 23:44:51,872 E 531537 531537] (raylet) node_manager.cc:3069: 2 Workers (tasks / actors) killed due to memory pressure (OOM), 0 Workers crashed due to other reasons at node (ID: 38bc205c356c3acba8e94637f15569795aceec977ed48ec6b4f44555, IP: 192.168.0.103) over the last time period. To see more information about the Workers killed on this node, use `ray logs raylet.out -ip 192.168.0.103`
[2m[33m(raylet)[0m 
[2m[33m(raylet)[0m Refer to the documentation on how to address the out of memory issue: https://docs.ray.io/en/latest/ray-core/scheduling/ray-oom-prevention.html. Consider provisioning more memory on this node or reducing task parallelism by requesting more CPUs per task. To adjust the kill threshold, set the environment variable `RAY_memory_usage_threshold` when starting Ray. To disable worker killing, set the environment variable `RAY_memory_monitor_refresh_ms` to zero.


[2m[36m(approx_mc2_core_wrapper pid=531587)[0m rand_seed: 1365105717
[2m[36m(approx_mc2_core_wrapper pid=531944)[0m rand_seed: 2137449171
[2m[36m(approx_mc2_core_wrapper pid=532013)[0m rand_seed: 524453158
[2m[36m(approx_mc2_core_wrapper pid=532094)[0m rand_seed: 1365105717
[2m[36m(approx_mc2_core_wrapper pid=532178)[0m rand_seed: 2137449171


[2m[33m(raylet)[0m [2023-07-02 23:45:51,873 E 531537 531537] (raylet) node_manager.cc:3069: 4 Workers (tasks / actors) killed due to memory pressure (OOM), 0 Workers crashed due to other reasons at node (ID: 38bc205c356c3acba8e94637f15569795aceec977ed48ec6b4f44555, IP: 192.168.0.103) over the last time period. To see more information about the Workers killed on this node, use `ray logs raylet.out -ip 192.168.0.103`
[2m[33m(raylet)[0m 
[2m[33m(raylet)[0m Refer to the documentation on how to address the out of memory issue: https://docs.ray.io/en/latest/ray-core/scheduling/ray-oom-prevention.html. Consider provisioning more memory on this node or reducing task parallelism by requesting more CPUs per task. To adjust the kill threshold, set the environment variable `RAY_memory_usage_threshold` when starting Ray. To disable worker killing, set the environment variable `RAY_memory_monitor_refresh_ms` to zero.


[2m[36m(approx_mc2_core_wrapper pid=532239)[0m rand_seed: 524453158
[2m[36m(approx_mc2_core_wrapper pid=532309)[0m rand_seed: 1365105717
[2m[36m(approx_mc2_core_wrapper pid=532374)[0m rand_seed: 2137449171


[2m[33m(raylet)[0m [2023-07-02 23:46:51,874 E 531537 531537] (raylet) node_manager.cc:3069: 3 Workers (tasks / actors) killed due to memory pressure (OOM), 0 Workers crashed due to other reasons at node (ID: 38bc205c356c3acba8e94637f15569795aceec977ed48ec6b4f44555, IP: 192.168.0.103) over the last time period. To see more information about the Workers killed on this node, use `ray logs raylet.out -ip 192.168.0.103`
[2m[33m(raylet)[0m 
[2m[33m(raylet)[0m Refer to the documentation on how to address the out of memory issue: https://docs.ray.io/en/latest/ray-core/scheduling/ray-oom-prevention.html. Consider provisioning more memory on this node or reducing task parallelism by requesting more CPUs per task. To adjust the kill threshold, set the environment variable `RAY_memory_usage_threshold` when starting Ray. To disable worker killing, set the environment variable `RAY_memory_monitor_refresh_ms` to zero.


[2m[36m(approx_mc2_core_wrapper pid=532483)[0m rand_seed: 524453158
[2m[36m(approx_mc2_core_wrapper pid=532548)[0m rand_seed: 1365105717
[2m[36m(approx_mc2_core_wrapper pid=532609)[0m rand_seed: 2137449171
[2m[36m(approx_mc2_core_wrapper pid=532675)[0m rand_seed: 524453158


  0%|                                                     | 0/4 [03:46<?, ?it/s]


OutOfMemoryError: Task was killed due to the node running low on memory.
Memory on the node (IP: 192.168.0.103, ID: 38bc205c356c3acba8e94637f15569795aceec977ed48ec6b4f44555) where the task (task ID: 7299104621e99824e642104d65b6e40581c0534801000000, name=approx_mc2_core_wrapper, pid=531589, memory used=4.52GB) was running was 14.68GB / 15.43GB (0.951097), which exceeds the memory usage threshold of 0.95. Ray killed this worker (ID: 43c6a704808b9f473b2aea886a3ee8db12bfd6642378a9443087a307) because it was the most recently scheduled task; to see more information about memory usage on this node, use `ray logs raylet.out -ip 192.168.0.103`. To see the logs of the worker, use `ray logs worker-43c6a704808b9f473b2aea886a3ee8db12bfd6642378a9443087a307*out -ip 192.168.0.103. Top 10 memory users:
PID	MEM(GB)	COMMAND
531589	4.52	ray::approx_mc2_core_wrapper
77440	1.45	/usr/lib/libreoffice/program/soffice.bin --impress file:///u/50/ciaperm1/unix/Documents/thesisDef_v2...
4253	0.73	/u/50/ciaperm1/unix/anaconda3/envs/mynewenvnewest/bin/python /u/50/ciaperm1/unix/anaconda3/envs/myne...
3636	0.61	/usr/bin/gnome-software --gapplication-service
48783	0.47	/opt/google/chrome/chrome --type=renderer --field-trial-handle=3761225392334852051,77675432944395349...
51334	0.43	/usr/bin/nautilus --gapplication-service
115185	0.31	/u/50/ciaperm1/unix/anaconda3/bin/python -m pyls --host 127.0.0.1 --port 2088 --tcp --check-parent-p...
115119	0.30	/u/50/ciaperm1/unix/anaconda3/bin/python /u/50/ciaperm1/unix/anaconda3/bin/spyder --new
2583	0.24	/usr/bin/gnome-shell
531400	0.24	/u/50/ciaperm1/unix/anaconda3/envs/mynewenvnewest/bin/python -m ipykernel_launcher -f /u/50/ciaperm1...
Refer to the documentation on how to address the out of memory issue: https://docs.ray.io/en/latest/ray-core/scheduling/ray-oom-prevention.html. Consider provisioning more memory on this node or reducing task parallelism by requesting more CPUs per task. To adjust the kill threshold, set the environment variable `RAY_memory_usage_threshold` when starting Ray. To disable worker killing, set the environment variable `RAY_memory_monitor_refresh_ms` to zero.

[2m[36m(approx_mc2_core_wrapper pid=532799)[0m rand_seed: 2137449171


[2m[33m(raylet)[0m [2023-07-02 23:47:51,875 E 531537 531537] (raylet) node_manager.cc:3069: 7 Workers (tasks / actors) killed due to memory pressure (OOM), 0 Workers crashed due to other reasons at node (ID: 38bc205c356c3acba8e94637f15569795aceec977ed48ec6b4f44555, IP: 192.168.0.103) over the last time period. To see more information about the Workers killed on this node, use `ray logs raylet.out -ip 192.168.0.103`
[2m[33m(raylet)[0m 
[2m[33m(raylet)[0m Refer to the documentation on how to address the out of memory issue: https://docs.ray.io/en/latest/ray-core/scheduling/ray-oom-prevention.html. Consider provisioning more memory on this node or reducing task parallelism by requesting more CPUs per task. To adjust the kill threshold, set the environment variable `RAY_memory_usage_threshold` when starting Ray. To disable worker killing, set the environment variable `RAY_memory_monitor_refresh_ms` to zero.


[2m[36m(approx_mc2_core_wrapper pid=532798)[0m rand_seed: 524453158


In [25]:
ref_count = get_ground_truth_count(rules, y, lmbd, ub)

[2m[36m(approx_mc2_core_wrapper pid=532918)[0m rand_seed: 1365105717


KeyboardInterrupt: 

In [None]:
len(rules)

In [None]:
bb.reset_tree()

In [None]:
bb.tree.root

In [None]:
pseudosolutions = [bb.tree.root] # empty -- 

In [None]:
sampled_items = random.sample(pseudosolutions, 1)

In [None]:
sampled_items

In [None]:
not_captured_root = bb._not_captured_by_default_rule()



In [None]:
pseudosolutions = [(bb.tree._root, not_captured_root)] # empty -
pseudosolutions     

In [None]:
out = list(bb._loop( *pseudosolutions[0] , 1 , return_objective=False))

In [None]:
set(out)

In [None]:
input_set = {'0', '4-57'}

output_list = [int(part) for item in input_set for part in item.split('-')]
print(output_list)

In [None]:
import gmpy2 as gmp

In [None]:
a1 = np.array([0,0,1,1,0,1]) 
a2 = np.array([0,1,0,1,0,1]) 
def mpz_set_bits(n: mpz, bits: np.ndarray) -> mpz:
    """return a copy of n and set `bits` to 1 in `n`"""
    for i in bits:
        n = gmp.bit_set(n, int(i))
    return n


In [None]:
a1_mpz = mpz_set_bits( mpz(), a1.nonzero()[0])
a2_mpz = mpz_set_bits( mpz(), a2.nonzero()[0])

In [None]:
a3_mpz = mpz_set_bits( mpz(), a3.nonzero()[0]) 

In [None]:
a3_mpz

In [None]:
logicalAnda = a1_mpz | a2_mpz

In [None]:
logicalAnda

In [None]:
def mpz2bag(n: mpz):
    """given a mpz() this function returns the indices of non-zero entries"""
    i = 0
    bag = set()
    thisBit = gmp.bit_scan1(n, i)
    while thisBit is not None:
        bag.add(thisBit)
        i += 1
        thisBit = gmp.bit_scan1(n, i)

    return bag

mpz2bag(a1_mpz)

In [None]:
a1, a2

In [None]:
mpz2bag(logicalAnda)

In [None]:
def compute_union(vectors):
    result = vectors[0]  # Initialize result with the first vector
    for vector in vectors[1:]:
        result = result | vector   # Perform bitwise OR operation
    return result

a3 = np.array([1,0,0,0,0,1])

In [None]:
out = compute_union([a1, a2, a3]) 
out

In [None]:
out = compute_union([a1_mpz, a2_mpz, a3_mpz]) 
out

In [None]:
mpz2bag(out)

In [None]:
ordered_input_data = preprocess_transaction_list(
    X_bag, min_support
)
tree = build_fptree(ordered_input_data)

all_times_naive = [] 
all_times = [] 
all_times_approx_mc2= []

eps = 0.75
delta = 0.75

for min_support in [30, 40 , 50, 60]:  
    
    
    #print("min support " + str(min_support)) 
    #print("tree " + str(tree))

    frequent_itemsets = set(fpgrowth_on_tree(tree, set(), min_support))
    # Now create rules 
    rules = [] 
    for i,itemset in enumerate(frequent_itemsets):
        tt = compute_truthtable(X,itemset)
        this_Rule = Rule(id = i+1, name = "rule_" + str(i), cardinality=len(itemset), truthtable=tt)
        # we need the truthtable for this itemset 
        rules.append(this_Rule) 
    
    print(len(rules))

In [None]:
ordered_input_data = preprocess_transaction_list(
    X_bag, min_support
)
tree = build_fptree(ordered_input_data)

all_times_naive = [] 
all_times = [] 
all_times_approx_mc2= []

eps = 0.75
delta = 0.75

for min_support in [30, 40 , 50, 60]:  
    
    
    #print("min support " + str(min_support)) 
    #print("tree " + str(tree))

    frequent_itemsets = set(fpgrowth_on_tree(tree, set(), min_support))
    # Now create rules 
    rules = [] 
    for i,itemset in enumerate(frequent_itemsets):
        tt = compute_truthtable(X,itemset)
        this_Rule = Rule(id = i+1, name = "rule_" + str(i), cardinality=len(itemset), truthtable=tt)
        # we need the truthtable for this itemset 
        rules.append(this_Rule) 
    
    #    
    #
    
    print(len(rules))
    
    pickle.dump(rules, open("rules_compas_" + str(len(rules)), "wb" ))

    lmbd = 0.1
    ub = 0.4 
    cbb = ConstrainedBranchAndBoundNaive(rules, ub, y, lmbd)
    cbb_v2 = ConstrainedBranchAndBound(rules, ub, y, lmbd)
    
    #
    #
    
    rand_seed = 12
    num_constraints = 5
    num_rules = len(rules)
    
    from bds.random_hash import generate_h_and_alpha
    A, t = generate_h_and_alpha(
    num_rules, num_constraints, rand_seed, as_numpy=True
    )
    
    start_time = time.time() 
    actual_sols = solutions_to_dict(
    list(cbb_v2.run(return_objective=True, A=A, t=t)))
    all_times.append(time.time() - start_time)
    
    
    start_time = time.time() 
    actual_sols = solutions_to_dict(
    list(cbb.run(return_objective=True, A=A, t=t)))
    all_times_naive.append(time.time() - start_time)
    
    
    thresh = 72
    prev_m = 1
    start_time = time.time() 
    est = approx_mc2( rules =  rules,
    y=y,
    lmbd = lmbd,
    ub = ub,
    delta =delta,
    eps =  eps)
    all_times_approx_mc2.append(time.time() - start_time)
    


In [None]:
all_times_naive

In [None]:
import matplotlib.pyplot as plt 
fig, ax = plt.subplots(1, 1, figsize=(6.38, 3.7))
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f']

all_n_rules = [98, 77 , 61, 46]

plt.plot(all_n_rules, all_times_naive, label="CBB-Naive", marker = "o", linewidth=1.5 , color = colors[0],  markersize = 12 , markeredgewidth=0.5, markeredgecolor= "grey"  , alpha = 0.7) 
plt.plot(all_n_rules, all_times, label="CBB", marker = "^", linewidth=1.5 , color = colors[1],   markersize = 12 , markeredgewidth=0.5, markeredgecolor= "grey"  , alpha = 0.7) 
plt.plot(all_n_rules, all_times_approx_mc2, label="ApproxMC2", marker = "^", linewidth=1.5 , color = colors[2],   markersize = 12 , markeredgewidth=0.5, markeredgecolor= "grey"  , alpha = 0.7) 

plt.title("CBB vs CBB-Naive") 
plt.xlabel("# Rules", fontsize = 18)
plt.ylabel("Runtime (s)", fontsize = 18)
plt.legend()
plt.grid()
plt.tight_layout() 
plt.yscale("log")
plt.savefig("plots/comparison_naive_and_v2_compas_approx_mc_075" + ".pdf" )
plt.show()

In [None]:
X.shape

In [None]:
all_times_approx_mc2

In [None]:
all_n_rules = [187, 130, 98, 77, 61, 41]

In [None]:
all_times_naive

In [None]:
from bds.meel import approx_mc2

In [None]:
import matplotlib.pyplot as plt 
fig, ax = plt.subplots(1, 1, figsize=(6.38, 3.7))
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f']


plt.plot(all_n_rules, all_times_naive, label="CBB-Naive", marker = "o", linewidth=1.5 , color = colors[0],  markersize = 12 , markeredgewidth=0.5, markeredgecolor= "grey"  , alpha = 0.7) 
plt.plot(all_n_rules, all_times, label="CBB", marker = "^", linewidth=1.5 , color = colors[1],   markersize = 12 , markeredgewidth=0.5, markeredgecolor= "grey"  , alpha = 0.7) 
plt.plot(all_n_rules, all_times_approx_mc2rox_mc2rox_mc2rox_mc2, label="ApproxMC2", marker = "^", linewidth=1.5 , color = colors[2],   markersize = 12 , markeredgewidth=0.5, markeredgecolor= "grey"  , alpha = 0.7) 


plt.title("CBB vs CBB-Naive") 
plt.xlabel("# Rules", fontsize = 18)
plt.ylabel("Runtime (s)", fontsize = 18)
plt.legend()
plt.grid()
plt.tight_layout() 
plt.yscale("log")
plt.savefig("plots/comparison_naive_and_v2_compas_approx_mc" + ".pdf" )
plt.show()

In [None]:
import matplotlib.pyplot as plt 
fig, ax = plt.subplots(1, 1, figsize=(6.38, 3.7))
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f']


plt.plot(all_n_rules, all_times_naive, label="CBB-Naive", marker = "o", linewidth=1.5 , color = colors[0],  markersize = 12 , markeredgewidth=0.5, markeredgecolor= "grey"  , alpha = 0.5) 
plt.plot(all_n_rules, all_times, label="CBB", marker = "^", linewidth=1.5 , color = colors[1],   markersize = 12 , markeredgewidth=0.5, markeredgecolor= "grey"  , alpha = 0.5) 
plt.title("CBB vs CBB-Naive") 
plt.xlabel("# Rules", fontsize = 18)
plt.ylabel("Runtime (s)", fontsize = 18)
plt.legend()
plt.grid()
plt.tight_layout() 
plt.savefig("plots/comparison_naive_and_v2_compas" + ".pdf" )
plt.show()

In [None]:
all_times_naive

In [None]:
num_rules = len(rules) 

In [None]:
num_constraints = 5

In [None]:
rand_seed = 12

In [None]:
rand_seed = 12
num_constraints = 5
num_rules = len(rules) 
from bds.random_hash import generate_h_and_alpha
A, t = generate_h_and_alpha(
num_rules, num_constraints, rand_seed, as_numpy=True
)

In [None]:

actual_sols = solutions_to_dict(
list(cbb_v2.run(return_objective=True, A=A, t=t)))


In [None]:
actual_sols

In [None]:
all_times_naive = [] 
all_times_V0 = [] 
all_times_V1 = [] 
all_times_V2 = [] 
#
n_rep = 3 
#
lmbd = 0.1 
ub = 0.4
#
for _  in range(n_rep): 
    #
    start_time = time() 
    bbNaive = BranchAndBoundNaive(rules, ub=ub, y=y, lmbd=lmbd)
    feasible_solutions = list(bbNaive.run(return_objective=True))
    all_times_naive.append(time() - start_time) 
    actualNaive = solutions_to_dict(feasible_solutions)
    #
    print("HLB done")
    #
    start_time = time() 
    bbV0 = BranchAndBoundV0(rules, ub=ub, y=y, lmbd=lmbd)
    feasible_solutions = list(bbV0.run(return_objective=True))
    all_times_V0.append(time() - start_time)
    actual_V0 = solutions_to_dict(feasible_solutions)
    #
    assert_dict_allclose(actual_V0, actualNaive)
    #
    print("RSSB done")
    #
    start_time = time() 
    bbV1 = BranchAndBoundV1(rules, ub=ub, y=y, lmbd=lmbd)
    feasible_solutions = list(bbV1.run(return_objective=True))
    all_times_V1.append(time() - start_time)
    actual_V1 = solutions_to_dict(feasible_solutions)
    #
    assert_dict_allclose(actual_V0, actual_V1)
    #
    print("EPLB + RSSB done")
    #
    start_time = time() 
    bbV2 = BranchAndBoundV2(rules, ub=ub, y=y, lmbd=lmbd)
    feasible_solutions = list(bbV2.run(return_objective=True))
    all_times_V2.append(time() - start_time)
    actual_V2 = solutions_to_dict(feasible_solutions)
    #
    assert_dict_allclose(actual_V1, actual_V2)
    #
    print("HLB + RSSB done")

In [None]:
actual_V1

In [None]:
import matplotlib.pyplot as plt 

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(6.38, 3.7))
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f']


x = [i for i in range(n_rep)]
plt.plot(x, all_times_naive, label="HLB", marker = "o", linewidth=1.5 , color = colors[0],  markersize = 12 , markeredgewidth=0.5, markeredgecolor= "grey"  , alpha = 0.5) 
plt.plot(x, all_times_V0, label="RSSB", marker = "^", linewidth=1.5 , color = colors[1],   markersize = 12 , markeredgewidth=0.5, markeredgecolor= "grey"  , alpha = 0.5) 
plt.plot(x, all_times_V1, label="RSSB + EPLB",  marker = "s", linewidth=1.5 ,  color = colors[2],   markersize = 12 , markeredgewidth=0.5, markeredgecolor= "grey"  , alpha = 0.5) 
plt.plot(x, all_times_V2, label="RSSB + HLB",  marker = "P", linewidth=1.5 ,  color = colors[3],   markersize = 12 , markeredgewidth=0.5, markeredgecolor= "grey"  , alpha = 0.5) 
plt.title("RSSB VS HLB VS RSSB + EPLB VS RSSB + HLB") 
plt.xlabel("Run", fontsize = 18)
plt.ylabel("Runtime", fontsize = 18)
plt.legend()
plt.grid()
plt.tight_layout() 
plt.savefig("plots/first_comparison_bounds_compas" + ".pdf" )
plt.show()

When there are so few rule equivalent bounds seem to be useless. 

In [None]:
for k in bbV1.equivalence_classes: 
    print(bbV1.equivalence_classes[k].data_points)
