In [None]:
import ruleminer
import logging
import sys
import pandas as pd
from sklearn.preprocessing import OrdinalEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier

In [None]:
logging.basicConfig(stream=sys.stdout, 
                    format='%(asctime)s %(message)s',
                    level=logging.WARNING)

# Decision tree for iris dataset

In [None]:
from sklearn.datasets import load_iris
iris = load_iris()

## Setup dataframe

In [None]:
df = pd.DataFrame(
    columns = [c.replace(")", "").replace("(", "") for c in iris['feature_names']], 
    data = iris['data']
)
df['target'] = [iris['target_names'][x] for x in iris['target']]
df[['target']] = OrdinalEncoder(dtype=int).fit_transform(df[['target']])

## Fit ensemble of decision tree (AdaBoost)

In [None]:
features = [col for col in df.columns if col != "target"]
X = df[features]
Y = df[["target"]].values.ravel()
target_dtype = df.dtypes[df.columns.get_loc("target")]

base, estimator = DecisionTreeClassifier, AdaBoostClassifier

regressor = estimator(
    base_estimator = base(
        random_state=0, 
        max_depth=2, 
        min_samples_split=2, 
        min_samples_leaf=1),
    n_estimators=25,
    random_state=0)
regressor = regressor.fit(X, Y)

In [None]:
from matplotlib import pyplot as plt
from sklearn import tree
fig = plt.figure(figsize=(10,8))
_ = tree.plot_tree(regressor[0], 
                   feature_names=iris.feature_names,  
                   class_names=iris.target_names,
                   filled=True)

In [None]:
# derive expression from tree
ruleminer.tree_to_expressions(regressor[0], features, "target")

In [None]:
# the above can also be done with the following function
ensemble_expressions = ruleminer.fit_ensemble_and_extract_expressions(df, "target", max_depth = 2)

In [None]:
# expressions from the firstt tree in the ensemble
ensemble_expressions[0]

## Get best rules from the ensemble

In [None]:
# finding the best rules in the ensemble (based on absolute support)
miners = [ruleminer.RuleMiner(templates=[{'expression': expression} for expression in expressions], 
                    data=df) for expressions in ensemble_expressions]

In [None]:
best = max(miners, key=lambda miner: miner.rules['abs support'].sum())

In [None]:
# rule in the miner object
best.rules

In [None]:
print(best.rules.to_html())

In [None]:
# total support (150 elements in original dataset)
best.rules['abs support'].sum()

# Decision tree with insurance data

In [None]:
df = pd.DataFrame(
    columns=[
        "Name",
        "Type",
        "Assets",
        "TV-life",
        "TV-nonlife",
        "Own funds",
        "Excess",
    ],
    data=[
        ["Insurer1", "life insurer", 1000.0, 800.0, 0.0, 200.0, 200.0],
        ["Insurer2", "non-life insurer", 4000.0, 0.0, 3200.0, 800.0, 800.0],
        ["Insurer3", "non-life insurer", 800.0, 0.0, 700.0, 100.0, 100.0],
        ["Insurer4", "life insurer", 2500.0, 1800.0, 0.0, 700.0, 700.0],
        ["Insurer5", "non-life insurer", 2100.0, 0.0, 2200.0, 200.0, 200.0],
        ["Insurer6", "life insurer", 9001.0, 8701.0, 0.0, 300.0, 200.0],
        ["Insurer7", "life insurer", 9002.0, 8802.0, 0.0, 200.0, 200.0],
        ["Insurer8", "life insurer", 9003.0, 8903.0, 0.0, 100.0, 200.0],
        ["Insurer9", "non-life insurer", 9000.0, 8850.0, 0.0, 150.0, 200.0],
        ["Insurer10", "non-life insurer", 9000.0, 0, 8750.0, 250.0, 199.99],
    ],
)
df.index.name="id"
for i in range(0, 0):
    df = df.append(df, ignore_index=True)
    
df[['Type']] = OrdinalEncoder(dtype=int).fit_transform(df[['Type']])
df[['Name']] = OrdinalEncoder(dtype=int).fit_transform(df[['Name']])

In [None]:
expressions = ruleminer.fit_dataframe_to_ensemble(df, max_depth = 1)

In [None]:
# number of expressions
print(len(expressions))

In [None]:
templates = [{'expression': solution} for solution in expressions]
params = {"filter": {'confidence': 0.75, 'abs support': 2}}
r = ruleminer.RuleMiner(templates=templates, data=df, params = params)

In [None]:
r.rules

In [None]:
print(r.rules.to_html())

In [None]:
df_r = r.evaluate()

In [None]:
df_r[df_r['result']==False]