In [None]:
import logging
import random
import time

import numpy as np
import pandas as pd
import ray
from contexttimer import Timer
from gmpy2 import mpfr, mpz
from logzero import logger

from bds.bb import get_ground_truth_count
from bds.bbSampleTreeApproxCounting import BranchAndBoundNaive
from bds.meel import approx_mc2, approx_mc2_core
from bds.rule import Rule
from bds.utils import bin_random, mpz_set_bits, randints

In [None]:
ray.init(num_cpus=4)

In [None]:
# ray.shutdown()

In [None]:
%cd ..



logger.setLevel(logging.WARN)

num_pts = 1000

ub = 0.8
lmbd = 0.1

delta = 0.8
eps = 0.8

show_progres = True
rand_seed = 1234

In [None]:
n_reps = 1
num_rules_list = list(reversed([50]))
# num_rules_list = list(reversed([200]))


np.random.seed(rand_seed)
res_rows = []
for _ in range(n_reps):
    for num_rules in num_rules_list:
        rule_random_seeds = randints(num_rules)
        rules = [
            Rule.random(i + 1, num_pts, random_seed=rule_random_seeds[i])
            for i in range(num_rules)
        ]
        y = bin_random(num_pts)

        with Timer() as cbb_timer:
            test_cnt = approx_mc2(
                rules,
                y,
                lmbd=lmbd,
                ub=ub,
                delta=delta,
                eps=eps,
                rand_seed=rand_seed,
                show_progress=show_progres,
                parallel=False,
                log_level=logging.WARN,
            )
        test_elapsed = cbb_timer.elapsed

        with Timer() as bb_timer:
            ref_count = get_ground_truth_count(rules, y, lmbd, ub)
        ref_elapsed = bb_timer.elapsed

        res_rows.append((num_rules, test_elapsed, ref_elapsed, test_cnt, ref_count))

In [None]:
ref_count

In [None]:
test_cnt

In [None]:
len(rules)

In [None]:
for l in [1]:
    for k in [6500]:
        bb = BranchAndBoundNaive(rules, ub, y, lmbd, l=l, k=k)
        Z = bb.runST()
        print("count " + str(Z))

In [None]:
all_counts = []
for _ in range(100):
    for l in [1]:
        for k in [1500]:
            bb = BranchAndBoundNaive(rules, ub, y, lmbd, l=l, k=k)
            Z = bb.runST()
            all_counts.append(Z)

In [None]:
np.mean(all_counts), np.median(all_counts)

In [None]:
df = pd.DataFrame(
    res_rows,
    columns=[
        "num_rules",
        "running_time_approx_mc2",
        "running_time_bb",
        "estimate_count",
        "true_count",
    ],
)
df["runtime-factor"] = df["running_time_approx_mc2"] / df["running_time_bb"]
df["estimation-rel-diff"] = (df["estimate_count"] - df["true_count"]) / df["true_count"]

df

In [None]:
print(
    df.groupby("num_rules")[
        "running_time_approx_mc2", "running_time_bb", "runtime-factor"
    ]
    .mean()
    .to_markdown()
)

In [None]:
y

In [None]:
num_rules = 2
rule_random_seeds = randints(num_rules)
rules = [
    Rule.random(i + 1, num_pts, random_seed=rule_random_seeds[i])
    for i in range(num_rules)
]
y = bin_random(num_pts)

In [None]:
y = np.array([0, 0, 1, 0, 1], dtype=bool)


rules = [
    Rule(
        id=1,
        name="rule-1",
        cardinality=1,
        # truthtable=np.array([0, 1, 0, 1, 0], dtype=bool),
        truthtable=mpz_set_bits(mpz(), [1, 3]),
    ),
    Rule(
        id=2,
        name="rule-2",
        cardinality=1,
        # truthtable=np.array([0, 0, 1, 0, 1], dtype=bool),
        truthtable=mpz_set_bits(mpz(), [2, 4]),
    ),
    Rule(
        id=3,
        name="rule-3",
        cardinality=1,
        # truthtable=np.array([1, 0, 1, 0, 1], dtype=bool),
        truthtable=mpz_set_bits(mpz(), [0, 2, 4]),
    ),
]

In [None]:
bb = BranchAndBoundNaive(rules, 10, y, lmbd, l=1, k=100000)
Z = bb.runST()

In [None]:
2**3 - 1

In [None]:
len([{0, 1}, {0}, {0, 1, 2}, {0, 2}, {0, 1, 3}, {0, 3}, {0, 1, 2, 3}, {0, 2, 3}])

In [None]:
Z