The Rashomon ratio, as a property of a data set and a hypothesis space, serves as gauge of simplicity of the
learning problem. If the Rashomon set is large, many different reasonable optimization procedures could lead
to a model from the Rashomon set. Therefore, for large Rashomon sets, accurate models tend to be easier
to find (since optimization procedures can find them). In other words, if the Rashomon ratio is large, the
Rashomon set could contain many accurate and simple models, and the learning problem becomes simpler.
On the other hand, smaller Rashomon ratios might imply a harder learning problem, especially in the case
of few deep and narrow local minima.

We expect that in many real-world applications of machine learning, properties similar to the assumptions
behind our theorems hold, i.e., that large enough Rashomon sets intersect simpler hypothesis spaces in ways
that lead to or explain good performance. This conjecture is difficult to verify theoretically because it is not
a mathematical conjecture about the structure of two specific function spaces, but a statement about many
function spaces, and how they interact with commonly occurring data sets. Thus, we consider this question
empirically.

In [None]:
import logging
import math
import os
import pickle
import random
import sys

# from bds.bbSampleTree import BranchAndBoundNaive
import time
from time import time

import gmpy2 as gmp
import matplotlib as mpl
import matplotlib.pyplot as plt

# from bds.bounds_utils import find_equivalence_classes
import numpy as np
import pandas as pd
import pylab
from gmpy2 import mpfr, mpz

# from bds.bb import BranchAndBoundNaive, BranchAndBoundV1, BranchAndBoundV0, BranchAndBoundV2
from logzero import logger
from pylab import axes, boxplot, figure, legend, plot, savefig, setp, show, xlim, ylim
from sklearn import datasets

from bds.bb import get_ground_truth_count
from bds.bbSampleTreeApproxCounting import BranchAndBoundNaive

# now we need to extract rules
from bds.fpgrowth import build_fptree, fpgrowth_on_tree, preprocess_transaction_list
from bds.gf2 import GF
from bds.meel import approx_mc2, approx_mc2_core
from bds.rule import Rule
from bds.sat.min_freq import construct_min_freq_program
from bds.sat.solver import construct_solver
from bds.utils import bin_array, bin_random, mpz_set_bits, randints, solutions_to_dict
from tests.utils import assert_close_mpfr, assert_dict_allclose

%cd ..




logger.setLevel(logging.DEBUG)

num_pts = 500


show_progres = True
rand_seed = 1234

In [None]:
def read_pickle(filepath_y):
    objects = []
    with open(filepath_y, "rb") as openfile:
        while True:
            try:
                objects.append(pickle.load(openfile))
            except EOFError:
                break

    return objects


def mpz_set_bits(n: mpz, bits: np.ndarray) -> mpz:
    """return a copy of n and set `bits` to 1 in `n`"""
    for i in bits:
        n = gmp.bit_set(n, int(i))
    return n


def compute_union(vectors):
    result = vectors[0]  # Initialize result with the first vector
    for vector in vectors[1:]:
        result = result | vector  # Perform bitwise OR operation
    return result


def _captured_by_rules(rules):
    return compute_union([rule.truthtable for rule in rules])


def compute_false_positives(covered, y):
    return gmp.popcount(covered & ~y)


def compute_false_negatives(covered, y):
    return gmp.popcount(~covered & y)


def random_sample_rule_sets(rules, y, max_size, n, N_rep=100000, lmbd=1, ub=1):
    #

    y_mpz = mpz_set_bits(mpz(), y.nonzero()[0])
    rashomon_counter = 0
    for _ in range(N_rep):
        #
        size = random.randint(1, max_size)
        random.shuffle(rules)
        this_sample = rules[:size]
        #
        # compute loss function for the sample
        #
        cov = _captured_by_rules(this_sample)

        fp = compute_false_positives(cov, y_mpz) / n
        fn = compute_false_negatives(cov, y_mpz) / n

        loss = fp + fn + lmbd * size

        if loss <= ub:
            rashomon_counter += 1

    return rashomon_counter

In [None]:
def count_total(max_subset_size, set_size):
    total_subsets = 0
    for subset_size in range(1, max_subset_size):  # Sizes 1, 2, and 3
        num_subsets = math.comb(set_size, subset_size)
        total_subsets += num_subsets

    return total_subsets

### compas dataset 

In [None]:
# Load the CSV file using Pandas
data = pd.read_csv("data/compas_test-binary.csv")

# Print the data
data.head()

In [None]:
# read labels
f = open("data/compas_test.label")
labels = []
for row in f.readlines():
    labels.append(list(map(int, row.split(" ")[1:])))

y = np.array(labels[1], dtype=bool)
X = data.to_numpy()
assert len(y) == X.shape[0]
# now we need to extract rules
X_bag = [{j for j, x in enumerate(row) if x} for row in X]

In [None]:
def compute_truthtable(X, itemset):
    """poorly optimized function"""
    support_list = []
    for i in range(X.shape[0]):
        if sum([X[i][j] for j in itemset]) == len(itemset):
            support_list.append(i)

    truthtable = mpz_set_bits(mpz(), support_list)

    return truthtable

In [None]:
min_support = 50


ordered_input_data = preprocess_transaction_list(X_bag, min_support)
tree = build_fptree(ordered_input_data)
frequent_itemsets = set(fpgrowth_on_tree(tree, set(), min_support))


# Now create rules
rules = []
for i, itemset in enumerate(frequent_itemsets):
    tt = compute_truthtable(X, itemset)
    this_Rule = Rule(
        id=i + 1, name="rule_" + str(i), cardinality=len(itemset), truthtable=tt
    )
    # we need the truthtable for this itemset
    rules.append(this_Rule)

In [None]:
max_subset_size = 8
lmbd = 0.1
ub = 0.3
counts = random_sample_rule_sets(
    rules, y, max_size=max_subset_size, n=X.shape[0], N_rep=500000, lmbd=0.1, ub=0.3
)
ratio = counts / count_total(max_subset_size, len(rules))
ratio

In [None]:
all_n_rules = []
lmbd = 0.1
ub = 0.5
rand_seed = 1234
n_reps = 1

delta = 0.8
eps = 0.8

show_progres = True

all_averages_times_MC = []

all_averages_counts_MC = []
all_averages_counts_GT = []


for min_support in [7, 10, 20, 30, 40, 50][::-1]:
    ordered_input_data = preprocess_transaction_list(X_bag, min_support)
    tree = build_fptree(ordered_input_data)
    frequent_itemsets = set(fpgrowth_on_tree(tree, set(), min_support))

    all_n_rules.append(len(frequent_itemsets))

    # Now create rules
    rules = []
    for i, itemset in enumerate(frequent_itemsets):
        tt = compute_truthtable(X, itemset)
        this_Rule = Rule(
            id=i + 1, name="rule_" + str(i), cardinality=len(itemset), truthtable=tt
        )
        # we need the truthtable for this itemset
        rules.append(this_Rule)

    pickle.dump(rules, open("rules/rules_compas_05_" + str(len(rules)), "wb"))

    # approxMC
    all_times_MC = []
    all_counts_MC = []

    all_times_GT = []
    all_counts_GT = []

    t0 = count_total(len(rules), len(rules))

    for _ in range(n_reps):
        t = time.time()
        test_cnt = (
            approx_mc2(
                rules,
                y,
                lmbd=lmbd,
                ub=ub,
                delta=delta,
                eps=eps,
                rand_seed=rand_seed,
                show_progress=show_progres,
                parallel=False,
                log_level=logging.WARN,
            )
            / t0
        )

        elapsed = time.time() - t
        all_times_MC.append(elapsed)
        all_counts_MC.append(test_cnt)

        # complete enumeration
        ref_count = get_ground_truth_count(rules, y, lmbd, ub) / t0
        all_times_GT.append(elapsed)
        all_counts_GT.append(ref_count)

    all_averages_times_MC.append(np.mean(all_times_MC))
    all_averages_times_GT.append(np.mean(all_times_GT))

    all_averages_counts_MC.append(np.mean(all_counts_MC))
    all_averages_counts_GT.append(np.mean(all_counts_GT))

    pickle.dump(
        all_averages_counts_MC, open("all_averages_ratios_MC_" + str(len(rules)), "wb")
    )
    pickle.dump(
        all_averages_counts_GT, open("all_averages_ratios_GT_" + str(len(rules)), "wb")
    )

In [None]:
all_averages_counts_MC

In [None]:
results_sampleTree_counts = dict()
results_sampleTree_times = dict()
lmbd = 0.1
ub = 0.5
rand_seed = 1234
n_reps = 1

delta = 0.8
eps = 0.8

all_n_rules = [61, 77, 104]  # 151, 262, 312]


for l in [2]:
    for k in [5764]:  # [100, 225, 506, 1139, 2562, 2562, 5764]:
        times_by_rules = []
        counts_by_rules = []

        for n_rules in all_n_rules:
            rules = read_pickle("rules/rules_compas_05_" + str(n_rules))[0]

            all_times = []
            all_counts = []

            for h in range(10):
                ts = time.time()
                bb = BranchAndBoundNaive(rules, ub, y, lmbd, l=l, k=k)
                Z = bb.runST()
                elapsed = time.time() - ts
                all_times.append(elapsed)
                all_counts.append(Z / t0)

            times_by_rules.append(np.mean(all_times))
            counts_by_rules.append(np.mean(all_counts))

        pickle.dump(
            times_by_rules,
            open(
                "times_by_rules_2_" + str(len(rules)) + "_" + str(l) + "_" + str(k),
                "wb",
            ),
        )
        pickle.dump(
            counts_by_rules,
            open(
                "counts_by_rules_2_" + str(len(rules)) + "_" + str(l) + "_" + str(k),
                "wb",
            ),
        )

In [None]:
counts_by_rules_naive = []
all_n_rules = [61, 77, 104]

for n_rule in all_n_rules:
    rules = read_pickle("rules/rules_compas_05_" + str(n_rules))[0]
    all_counts = []

    for h in range(10):
        max_subset_size = 8
        counts = random_sample_rule_sets(
            rules,
            y,
            max_size=max_subset_size,
            n=X.shape[0],
            N_rep=250000,
            lmbd=lmbd,
            ub=ub,
        )
        ratio = counts / count_total(max_subset_size, len(rules))
        all_counts.append(ratio)

    counts_by_rules_naive.append(np.mean(all_counts))

In [None]:
plt.style.use(["seaborn-notebook", "seaborn-darkgrid"])
mpl.rcParams["axes.facecolor"] = "white"
mpl.rcParams["grid.color"] = "lightgray"
mpl.rcParams["mathtext.default"] = "regular"

plt.figure(figsize=((5.5, 3.5)))
# Plot the first data on the first subplot
# ax1.plot([61 , 77, 104], y1, color='red')
# ax1.set_title('Plot 1')

x = [61, 77, 104]
plt.xlim(x[0] - 2, x[-1] + 2)

# print(times_MC)

plt.plot(
    x,
    counts_by_rules,
    marker="X",
    color="firebrick",
    linewidth=5,
    markersize=12,
    markeredgewidth=0.5,
    markeredgecolor="grey",
    alpha=0.5,
)
# this is below

plt.plot(
    x,
    all_averages_counts_MC,
    marker="o",
    color="orange",
    linewidth=5,
    markersize=12,
    markeredgewidth=0.5,
    markeredgecolor="grey",
    alpha=0.5,
)

plt.plot(
    x,
    all_averages_counts_GT,
    marker="v",
    color="purple",
    linewidth=5,
    markersize=12,
    markeredgewidth=0.5,
    markeredgecolor="grey",
    alpha=0.5,
)

plt.plot(
    x,
    counts_by_rules_naive,
    marker="s",
    color="blue",
    linewidth=5,
    markersize=12,
    markeredgewidth=0.5,
    markeredgecolor="grey",
    alpha=0.5,
)

# ax1.legend(fontsize = 16)

# ax1.set_title('Runtime VS # Rules ( l = ' + str(l) + " k = " + str(k) + " )" , fontsize = 16)
plt.title("Rashomon Ratio by # Rules", fontsize=21)

plt.xlabel(" # Rules ", fontsize=21)

plt.ylabel(" Rashomon Ratio ", fontsize=21)

plt.yscale("log")  # Set y-axis to logarithmic scale
plt.xticks(fontsize=21)  # Change the fontsize to your desired value
plt.yticks(fontsize=21)

(hB,) = plot(
    [1, 1],
    "firebrick",
    marker="X",
    linewidth=3,
    markersize=10,
    markeredgewidth=0.5,
    markeredgecolor="grey",
    alpha=0.8,
)
(hHP,) = plot(
    [1, 1],
    "orange",
    marker="o",
    linewidth=3,
    markersize=10,
    markeredgewidth=0.5,
    markeredgecolor="grey",
    alpha=0.8,
)
(hS,) = plot(
    [1, 1],
    "purple",
    marker="v",
    linewidth=3,
    markersize=10,
    markeredgewidth=0.5,
    markeredgecolor="grey",
    alpha=0.8,
)
#   hD, = plot([1,1],'salmon' , marker='D' ,  linewidth=3  , markersize = 10 , markeredgewidth=0.5, markeredgecolor= "grey" , alpha = 0.8)
#   hF, = plot([1,1], 'brown' , marker='P' ,  linewidth=3  , markersize = 10 , markeredgewidth=0.5, markeredgecolor= "grey" , alpha = 0.8)
# legend(( hB, hHP, hS,), ( "SampleTree-RS" , "ApproxMC-RS",   "FullBB-RS") ,loc='upper center', bbox_to_anchor=(-1, 1.25 ), ncol=3, fancybox=False, shadow=False, borderaxespad=0, fontsize = 14 )
# legend(( hB, hHP, hS, hD, hF), ( "Viterbi" , "Dijkstra",   "DijkstraBound", "BidirectionalDijkstra" , "BidirectionalDijkstraBound" ) ,loc='upper center', bbox_to_anchor=(0.5, 1.21 ), ncol=3, fancybox=True, shadow=True, borderaxespad=0 )
hB.set_visible(False)
hS.set_visible(False)
hHP.set_visible(False)

# Adjust spacing between subplots
plt.tight_layout()


plt.savefig("compas_1000_runtime_vs_n_rules_with_naive" + "_log.pdf")

# Display the plots
plt.show()

In [None]:
plt.style.use(["seaborn-notebook", "seaborn-darkgrid"])
mpl.rcParams["axes.facecolor"] = "white"
mpl.rcParams["grid.color"] = "lightgray"
mpl.rcParams["mathtext.default"] = "regular"

plt.figure(figsize=((5.5, 3.5)))
# Plot the first data on the first subplot
# ax1.plot([61 , 77, 104], y1, color='red')
# ax1.set_title('Plot 1')

x = [61, 77, 104]
plt.xlim(x[0] - 2, x[-1] + 2)

# print(times_MC)

plt.plot(
    x,
    counts_by_rules,
    marker="X",
    color="firebrick",
    linewidth=5,
    markersize=12,
    markeredgewidth=0.5,
    markeredgecolor="grey",
    alpha=0.5,
)
# this is below

plt.plot(
    x,
    all_averages_counts_MC,
    marker="o",
    color="orange",
    linewidth=5,
    markersize=12,
    markeredgewidth=0.5,
    markeredgecolor="grey",
    alpha=0.5,
)

plt.plot(
    x,
    all_averages_counts_GT,
    marker="v",
    color="purple",
    linewidth=5,
    markersize=12,
    markeredgewidth=0.5,
    markeredgecolor="grey",
    alpha=0.5,
)

plt.plot(
    x,
    counts_by_rules_naive,
    marker="s",
    color="blue",
    linewidth=5,
    markersize=12,
    markeredgewidth=0.5,
    markeredgecolor="grey",
    alpha=0.5,
)

# ax1.legend(fontsize = 16)

# ax1.set_title('Runtime VS # Rules ( l = ' + str(l) + " k = " + str(k) + " )" , fontsize = 16)
plt.title("Rashomon Ratio by # Rules", fontsize=21)

plt.xlabel(" # Rules ", fontsize=21)

plt.ylabel(" Rashomon Ratio ", fontsize=21)

# plt.yscale('log')  # Set y-axis to logarithmic scale
plt.xticks(fontsize=21)  # Change the fontsize to your desired value
plt.yticks(fontsize=21)

(hB,) = plot(
    [1, 1],
    "firebrick",
    marker="X",
    linewidth=3,
    markersize=10,
    markeredgewidth=0.5,
    markeredgecolor="grey",
    alpha=0.8,
)
(hHP,) = plot(
    [1, 1],
    "orange",
    marker="o",
    linewidth=3,
    markersize=10,
    markeredgewidth=0.5,
    markeredgecolor="grey",
    alpha=0.8,
)
(hS,) = plot(
    [1, 1],
    "purple",
    marker="v",
    linewidth=3,
    markersize=10,
    markeredgewidth=0.5,
    markeredgecolor="grey",
    alpha=0.8,
)
#   hD, = plot([1,1],'salmon' , marker='D' ,  linewidth=3  , markersize = 10 , markeredgewidth=0.5, markeredgecolor= "grey" , alpha = 0.8)
#   hF, = plot([1,1], 'brown' , marker='P' ,  linewidth=3  , markersize = 10 , markeredgewidth=0.5, markeredgecolor= "grey" , alpha = 0.8)
# legend(( hB, hHP, hS,), ( "SampleTree-RS" , "ApproxMC-RS",   "FullBB-RS") ,loc='upper center', bbox_to_anchor=(-1, 1.25 ), ncol=3, fancybox=False, shadow=False, borderaxespad=0, fontsize = 14 )
# legend(( hB, hHP, hS, hD, hF), ( "Viterbi" , "Dijkstra",   "DijkstraBound", "BidirectionalDijkstra" , "BidirectionalDijkstraBound" ) ,loc='upper center', bbox_to_anchor=(0.5, 1.21 ), ncol=3, fancybox=True, shadow=True, borderaxespad=0 )
hB.set_visible(False)
hS.set_visible(False)
hHP.set_visible(False)

# Adjust spacing between subplots
plt.tight_layout()


plt.savefig("compas_1000_runtime_vs_n_rules_with_naive" + ".pdf")

# Display the plots
plt.show()

In [None]:
# Load the CSV file using Pandas
dataset = "mushrooms"
data = pd.read_csv("data/mushrooms.csv")

# Print the data
data.head()

In [None]:
y = data["class"]
data = data.drop("class", axis=1)
data = pd.get_dummies(data).astype("int")
X = data.to_numpy()

In [None]:
y = np.array(y, dtype=bool)
assert len(y) == X.shape[0]

In [None]:
def compute_truthtable(X, itemset):
    """poorly optimized function"""
    support_list = []
    for i in range(X.shape[0]):
        if sum([X[i][j] for j in itemset]) == len(itemset):
            support_list.append(i)

    truthtable = mpz_set_bits(mpz(), support_list)

    return truthtable


X_bag = [{j for j, x in enumerate(row) if x} for row in X]

In [None]:
dataset = "mushroom"

n_reps = 1
lmbd = 0.1
ub = 0.5
rand_seed = 1234
n_reps = 1

delta = 0.8
eps = 0.8
all_averages_times_MC = []
all_averages_times_GT = []

all_averages_counts_MC = []
all_averages_counts_GT = []

all_n_rules = []

for min_support in [5000, 5500, 6000]:
    print("min support " + str(min_support))

    ordered_input_data = preprocess_transaction_list(X_bag, min_support)
    tree = build_fptree(ordered_input_data)
    frequent_itemsets = set(fpgrowth_on_tree(tree, set(), min_support))

    all_n_rules.append(len(frequent_itemsets))

    print(len(frequent_itemsets))

    # Now create rules
    rules = []
    for i, itemset in enumerate(frequent_itemsets):
        tt = compute_truthtable(X, itemset)
        this_Rule = Rule(
            id=i + 1, name="rule_" + str(i), cardinality=len(itemset), truthtable=tt
        )
        # we need the truthtable for this itemset
        rules.append(this_Rule)

        t0 = count_total(len(rules), len(rules))

    pickle.dump(rules, open("rules/rules_05_" + dataset + "_" + str(len(rules)), "wb"))

    # approxMC
    all_times_MC = []
    all_counts_MC = []

    all_times_GT = []
    all_counts_GT = []

    for _ in range(n_reps):
        t = time.time()
        test_cnt = approx_mc2(
            rules,
            y,
            lmbd=lmbd,
            ub=ub,
            delta=delta,
            eps=eps,
            rand_seed=rand_seed,
            show_progress=show_progres,
            parallel=False,
            log_level=logging.WARN,
        )

        elapsed = time.time() - t
        all_times_MC.append(elapsed)
        all_counts_MC.append(test_cnt / t0)

        # complete enumeration
        ref_count = get_ground_truth_count(rules, y, lmbd, ub)
        all_times_GT.append(elapsed)
        all_counts_GT.append(ref_count / t0)

    all_averages_times_MC.append(np.mean(all_times_MC))
    all_averages_times_GT.append(np.mean(all_times_GT))

    all_averages_counts_MC.append(np.mean(all_counts_MC))
    all_averages_counts_GT.append(np.mean(all_counts_GT))


pickle.dump(
    all_averages_times_MC,
    open(
        "all_averages_times_MC_"
        + dataset
        + "_"
        + str(len(rules))
        + "_"
        + str(eps)
        + "_"
        + str(delta),
        "wb",
    ),
)
pickle.dump(
    all_averages_times_GT,
    open(
        "all_averages_times_GT_"
        + dataset
        + "_"
        + str(len(rules))
        + "_"
        + str(eps)
        + "_"
        + str(delta),
        "wb",
    ),
)

pickle.dump(
    all_averages_counts_MC,
    open(
        "all_averages_counts_MC_"
        + dataset
        + "_"
        + "_"
        + str(len(rules))
        + "_"
        + str(eps)
        + "_"
        + str(delta),
        "wb",
    ),
)
pickle.dump(
    all_averages_counts_GT,
    open(
        "all_averages_counts_GT_"
        + dataset
        + "_"
        + str(len(rules))
        + "_"
        + str(eps)
        + "_"
        + str(delta),
        "wb",
    ),
)

In [None]:
all_averages_counts_

In [None]:
results_sampleTree_counts = dict()
results_sampleTree_times = dict()
# lmbd = 0.1
# ub = 0.3
rand_seed = 1234
n_reps = 1

all_n_rules = [31, 33, 41]

for l in [2]:
    for k in [5764]:  # [100, 225, 506, 1139, 2562, 2562, 5764]:
        times_by_rules = []
        counts_by_rules = []

        for n_rules in all_n_rules:
            rules = read_pickle("rules/rules_05_" + str(dataset) + "_" + str(n_rules))[
                0
            ]

            t0 = count_total(len(rules), len(rules))

            all_times = []
            all_counts = []

            for _ in range(10):
                ts = time.time()
                bb = BranchAndBoundNaive(rules, ub, y, lmbd, l=l, k=k)
                Z = bb.runST()
                elapsed = time.time() - ts
                all_times.append(elapsed)
                all_counts.append(Z / t0)

            times_by_rules.append(np.mean(all_times))
            counts_by_rules.append(np.mean(all_counts))

        pickle.dump(
            times_by_rules,
            open(
                "times_by_rules_"
                + dataset
                + "_"
                + str(len(rules))
                + "_"
                + str(l)
                + "_"
                + str(k),
                "wb",
            ),
        )
        pickle.dump(
            counts_by_rules,
            open(
                "counts_by_rules_"
                + dataset
                + "_"
                + str(len(rules))
                + "_"
                + str(l)
                + "_"
                + str(k),
                "wb",
            ),
        )

In [None]:
counts_by_rules

In [None]:
counts_by_rules_naive = []
all_n_rules = [31, 33, 41]

for n_rule in all_n_rules:
    rules = read_pickle("rules/rules_05_" + str(dataset) + "_" + str(n_rules))[0]

    all_counts = []

    for h in range(10):
        max_subset_size = 8
        counts = random_sample_rule_sets(
            rules,
            y,
            max_size=max_subset_size,
            n=X.shape[0],
            N_rep=250000,
            lmbd=lmbd,
            ub=ub,
        )
        ratio = counts / count_total(max_subset_size, len(rules))
        all_counts.append(ratio)

    counts_by_rules_naive.append(np.mean(all_counts))

In [None]:
plt.style.use(["seaborn-notebook", "seaborn-darkgrid"])
mpl.rcParams["axes.facecolor"] = "white"
mpl.rcParams["grid.color"] = "lightgray"
mpl.rcParams["mathtext.default"] = "regular"

plt.figure(figsize=((5.5, 3.5)))
# Plot the first data on the first subplot
# ax1.plot([61 , 77, 104], y1, color='red')
# ax1.set_title('Plot 1')

x = [31, 33, 41]
plt.xlim(x[0] - 2, x[-1] + 2)

# print(times_MC)

plt.plot(
    x,
    counts_by_rules[::-1],
    marker="X",
    color="firebrick",
    linewidth=5,
    markersize=12,
    markeredgewidth=0.5,
    markeredgecolor="grey",
    alpha=0.5,
)
# this is below

plt.plot(
    x,
    all_averages_counts_MC,
    marker="o",
    color="orange",
    linewidth=5,
    markersize=12,
    markeredgewidth=0.5,
    markeredgecolor="grey",
    alpha=0.5,
)

plt.plot(
    x,
    all_averages_counts_GT,
    marker="v",
    color="purple",
    linewidth=5,
    markersize=12,
    markeredgewidth=0.5,
    markeredgecolor="grey",
    alpha=0.5,
)

plt.plot(
    x,
    counts_by_rules_naive,
    marker="s",
    color="blue",
    linewidth=5,
    markersize=12,
    markeredgewidth=0.5,
    markeredgecolor="grey",
    alpha=0.5,
)

# ax1.legend(fontsize = 16)

# ax1.set_title('Runtime VS # Rules ( l = ' + str(l) + " k = " + str(k) + " )" , fontsize = 16)
plt.title("Rashomon Ratio by # Rules", fontsize=21)

plt.xlabel(" # Rules ", fontsize=21)

plt.ylabel(" Rashomon Ratio ", fontsize=21)

plt.yscale("log")  # Set y-axis to logarithmic scale
plt.xticks(fontsize=21)  # Change the fontsize to your desired value
plt.yticks(fontsize=21)

(hB,) = plot(
    [1, 1],
    "firebrick",
    marker="X",
    linewidth=3,
    markersize=10,
    markeredgewidth=0.5,
    markeredgecolor="grey",
    alpha=0.8,
)
(hHP,) = plot(
    [1, 1],
    "orange",
    marker="o",
    linewidth=3,
    markersize=10,
    markeredgewidth=0.5,
    markeredgecolor="grey",
    alpha=0.8,
)
(hS,) = plot(
    [1, 1],
    "purple",
    marker="v",
    linewidth=3,
    markersize=10,
    markeredgewidth=0.5,
    markeredgecolor="grey",
    alpha=0.8,
)
#   hD, = plot([1,1],'salmon' , marker='D' ,  linewidth=3  , markersize = 10 , markeredgewidth=0.5, markeredgecolor= "grey" , alpha = 0.8)
#   hF, = plot([1,1], 'brown' , marker='P' ,  linewidth=3  , markersize = 10 , markeredgewidth=0.5, markeredgecolor= "grey" , alpha = 0.8)
# legend(( hB, hHP, hS,), ( "SampleTree-RS" , "ApproxMC-RS",   "FullBB-RS") ,loc='upper center', bbox_to_anchor=(-1, 1.25 ), ncol=3, fancybox=False, shadow=False, borderaxespad=0, fontsize = 14 )
# legend(( hB, hHP, hS, hD, hF), ( "Viterbi" , "Dijkstra",   "DijkstraBound", "BidirectionalDijkstra" , "BidirectionalDijkstraBound" ) ,loc='upper center', bbox_to_anchor=(0.5, 1.21 ), ncol=3, fancybox=True, shadow=True, borderaxespad=0 )
hB.set_visible(False)
hS.set_visible(False)
hHP.set_visible(False)

# Adjust spacing between subplots
plt.tight_layout()


plt.savefig("mushroom_1000_runtime_vs_n_rules_with_naive" + "_log.pdf")

# Display the plots
plt.show()

In [None]:
figlegend = pylab.figure(figsize=(18, 1))
# lines = ax.plot(range(10), pylab.randn(10), range(10), pylab.randn(10))
# draw temporary red and blue lines and use them to create a legend
(hB,) = plot(
    [1, 1],
    marker="X",
    color="firebrick",
    linewidth=7,
    markersize=14,
    markeredgewidth=0.5,
    markeredgecolor="grey",
    alpha=0.5,
)
(hHP,) = plot(
    [1, 1],
    marker="o",
    color="orange",
    linewidth=7,
    markersize=14,
    markeredgewidth=0.5,
    markeredgecolor="grey",
    alpha=0.5,
)
(hS,) = plot(
    [1, 1],
    marker="v",
    color="purple",
    linewidth=7,
    markersize=14,
    markeredgewidth=0.5,
    markeredgecolor="grey",
    alpha=0.5,
)
(hD,) = plot(
    [1, 1],
    marker="s",
    color="blue",
    linewidth=7,
    markersize=14,
    markeredgewidth=0.5,
    markeredgecolor="grey",
    alpha=0.5,
)

# legend(( hB, hHP, hS, hD, hF, hF2), ( "Viterbi" , "Dijkstra",   "DijkstraBound", "BidirectionalDijkstra" , "BidirectionalDijkstraBound", "DijkstraSpaceEfficient" ) ,loc='upper center', bbox_to_anchor=(0.5, 1.12 ), ncol=3, fancybox=True, shadow=True, borderaxespad=0 )
legend(
    (hB, hHP, hS, hD),
    ("BB-SampleTree", "BB-ApproxMC", "BB-GT", "Naive Random Sampling"),
    loc="upper center",
    bbox_to_anchor=(0.5, 1.12),
    ncol=5,
    fancybox=False,
    shadow=False,
    borderaxespad=0,
    fontsize=25,
)
# legend(( hB, hHP, hS, hD, hF), ( "Viterbi" , "Dijkstra",   "DijkstraBound", "BidirectionalDijkstra" , "BidirectionalDijkstraBound" ) ,loc='upper center', bbox_to_anchor=(0.5, 1.12 ), ncol=3, fancybox=True, shadow=True, borderaxespad=0 )
hB.set_visible(False)
hS.set_visible(False)
hHP.set_visible(False)
hD.set_visible(False)

plt.axis("off")

plt.xticks([0], [""])
plt.yticks([1], [""])

figlegend.savefig("plots/legend_ratio.pdf")

In [None]:
figlegend = pylab.figure(figsize=(14, 1))
# lines = ax.plot(range(10), pylab.randn(10), range(10), pylab.randn(10))
# draw temporary red and blue lines and use them to create a legend
(hB,) = plot(
    [1, 1],
    marker="X",
    color="firebrick",
    linewidth=7,
    markersize=14,
    markeredgewidth=0.5,
    markeredgecolor="grey",
    alpha=0.5,
)
(hHP,) = plot(
    [1, 1],
    marker="o",
    color="orange",
    linewidth=7,
    markersize=14,
    markeredgewidth=0.5,
    markeredgecolor="grey",
    alpha=0.5,
)
(hS,) = plot(
    [1, 1],
    marker="v",
    color="purple",
    linewidth=7,
    markersize=14,
    markeredgewidth=0.5,
    markeredgecolor="grey",
    alpha=0.5,
)
(hD,) = plot(
    [1, 1],
    marker="s",
    color="blue",
    linewidth=7,
    markersize=14,
    markeredgewidth=0.5,
    markeredgecolor="grey",
    alpha=0.5,
)

# legend(( hB, hHP, hS, hD, hF, hF2), ( "Viterbi" , "Dijkstra",   "DijkstraBound", "BidirectionalDijkstra" , "BidirectionalDijkstraBound", "DijkstraSpaceEfficient" ) ,loc='upper center', bbox_to_anchor=(0.5, 1.12 ), ncol=3, fancybox=True, shadow=True, borderaxespad=0 )
legend(
    (hB, hHP, hS, hD),
    ("BB-SampleTree", "BB-ApproxMC", "BB-GT", "SAT"),
    loc="upper center",
    bbox_to_anchor=(0.5, 1.12),
    ncol=5,
    fancybox=False,
    shadow=False,
    borderaxespad=0,
    fontsize=25,
)
# legend(( hB, hHP, hS, hD, hF), ( "Viterbi" , "Dijkstra",   "DijkstraBound", "BidirectionalDijkstra" , "BidirectionalDijkstraBound" ) ,loc='upper center', bbox_to_anchor=(0.5, 1.12 ), ncol=3, fancybox=True, shadow=True, borderaxespad=0 )
hB.set_visible(False)
hS.set_visible(False)
hHP.set_visible(False)
hD.set_visible(False)

plt.axis("off")

plt.xticks([0], [""])
plt.yticks([1], [""])

figlegend.savefig("plots/legend_ratio_SAT.pdf")