In [1]:
import ruleminer
import logging
import sys
import pandas as pd
from sklearn.preprocessing import OrdinalEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier

In [2]:
logging.basicConfig(stream=sys.stdout, 
                    format='%(asctime)s %(message)s',
                    level=logging.WARNING)

# Decision tree for iris dataset

In [3]:
from sklearn.datasets import load_iris
iris = load_iris()

## Setup dataframe

In [4]:
df = pd.DataFrame(
    columns = [c.replace(")", "").replace("(", "") for c in iris['feature_names']], 
    data = iris['data']
)
df['target'] = [iris['target_names'][x] for x in iris['target']]
df[['target']] = OrdinalEncoder(dtype=int).fit_transform(df[['target']])

## Fit ensemble of decision tree (AdaBoost)

In [5]:
features = [col for col in df.columns if col != "target"]
X = df[features]
Y = df[["target"]].values.ravel()
target_dtype = df.dtypes[df.columns.get_loc("target")]

base, estimator = DecisionTreeClassifier, AdaBoostClassifier

regressor = estimator(
    base_estimator = base(random_state=0, max_depth=2),
    n_estimators=25,
    random_state=0)
regressor = regressor.fit(X, Y)

In [6]:
# derive expression from tree
ruleminer.tree_to_expressions(regressor[0], features, "target")

{'if (({"petal width cm"} <= 0.800000011920929)) then ({"target"} == 0)',
 'if (({"petal width cm"} > 0.800000011920929) & ({"petal width cm"} <= 1.75)) then ({"target"} == 1)',
 'if (({"petal width cm"} > 0.800000011920929) & ({"petal width cm"} > 1.75)) then ({"target"} == 2)'}

In [7]:
# the above can also be done with the following function
ensemble_expressions = ruleminer.fit_ensemble_and_extract_expressions(df, "target", max_depth = 2)

In [8]:
# expressions from the firstt tree in the ensemble
ensemble_expressions[0]

{'if (({"petal width cm"} <= 0.800000011920929)) then ({"target"} == 0)',
 'if (({"petal width cm"} > 0.800000011920929) & ({"petal width cm"} <= 1.75)) then ({"target"} == 1)',
 'if (({"petal width cm"} > 0.800000011920929) & ({"petal width cm"} > 1.75)) then ({"target"} == 2)'}

## Get best rules from the ensemble

In [9]:
# finding the best rules in the ensemble (based on absolute support)
sum_support = 0
params = {"filter": {'confidence': 0.0, 'abs support': 0},
          "decimal": 1}
for expressions in ensemble_expressions:
    templates = [{'expression': expression} for expression in expressions]
    miner = ruleminer.RuleMiner(templates=templates, 
                            data=df, params=params)
    if miner.rules['abs support'].sum() > sum_support:
        sum_support = miner.rules['abs support'].sum()
        best = miner

In [10]:
# rule in the miner object
best.rules

Unnamed: 0,rule_id,rule_group,rule_definition,rule_status,abs support,abs exceptions,confidence,encodings
0,0,0,"if(({""petal width cm""}>0.800000011920929)&({""p...",,49,5,0.907407,{}
1,1,0,"if({""petal width cm""}<=0.800000011920929)then(...",,50,0,1.0,{}
2,2,0,"if(({""petal width cm""}>0.800000011920929)&({""p...",,45,1,0.978261,{}


In [11]:
# total support (150 elements in original dataset)
best.rules['abs support'].sum()

144

# Decision tree with insurance data

In [12]:
df = pd.DataFrame(
    columns=[
        "Name",
        "Type",
        "Assets",
        "TV-life",
        "TV-nonlife",
        "Own funds",
        "Excess",
    ],
    data=[
        ["Insurer1", "life insurer", 1000.0, 800.0, 0.0, 200.0, 200.0],
        ["Insurer2", "non-life insurer", 4000.0, 0.0, 3200.0, 800.0, 800.0],
        ["Insurer3", "non-life insurer", 800.0, 0.0, 700.0, 100.0, 100.0],
        ["Insurer4", "life insurer", 2500.0, 1800.0, 0.0, 700.0, 700.0],
        ["Insurer5", "non-life insurer", 2100.0, 0.0, 2200.0, 200.0, 200.0],
        ["Insurer6", "life insurer", 9000.0, 8700.0, 0.0, 300.0, 200.0],
        ["Insurer7", "life insurer", 9000.0, 8800.0, 0.0, 200.0, 200.0],
        ["Insurer8", "life insurer", 9000.0, 8900.0, 0.0, 100.0, 200.0],
        ["Insurer9", "non-life insurer", 9000.0, 8850.0, 0.0, 150.0, 200.0],
        ["Insurer10", "non-life insurer", 9000.0, 0, 8750.0, 250.0, 199.99],
    ],
)
df.index.name="id"
for i in range(0, 0):
    df = df.append(df, ignore_index=True)
    
df[['Type']] = OrdinalEncoder(dtype=int).fit_transform(df[['Type']])
df[['Name']] = OrdinalEncoder(dtype=int).fit_transform(df[['Name']])

In [13]:
expressions = ruleminer.fit_dataframe_to_ensemble(df, max_depth = 1)

In [14]:
# first ten rules in the set
list(expressions)[0:10]

['if (({"Name"} > 0.5)) then ({"Assets"} == 8375.0)',
 'if (({"Excess"} <= 450.0)) then ({"Own funds"} == 172.22222222222223)',
 'if (({"TV-nonlife"} <= 5975.0)) then ({"Name"} == 8)',
 'if (({"TV-life"} > 8875.0)) then ({"Name"} == 8)',
 'if (({"Own funds"} <= 750.0)) then ({"Name"} == 0)',
 'if (({"Excess"} <= 199.99500274658203)) then ({"TV-nonlife"} == 8750.0)',
 'if (({"Excess"} <= 149.99500274658203)) then ({"Name"} == 3)',
 'if (({"Assets"} > 5750.0)) then ({"TV-life"} == 8810.0)',
 'if (({"Assets"} <= 900.0)) then ({"Name"} == 3)',
 'if (({"Name"} > 5.0)) then ({"TV-life"} == 8875.0)']

In [15]:
templates = [{'expression': solution} for solution in expressions]
params = {"filter": {'confidence': 0.75, 'abs support': 2},
          "metrics": ['confidence', 'abs support', 'abs exceptions']}
r = ruleminer.RuleMiner(templates=templates, data=df, params = params)

In [16]:
r.rules

Unnamed: 0,rule_id,rule_group,rule_definition,rule_status,confidence,abs support,abs exceptions,encodings
0,0,0,"if({""TV-life""}<=400.0)then({""Type""}==1)",,1.0,4,0,{}
1,1,0,"if({""TV-life""}>400.0)then({""Type""}==0)",,0.833333,5,1,{}
2,2,0,"if({""TV-life""}>400.0)then({""TV-nonlife""}==0.0)",,1.0,6,0,{}
3,3,0,"if({""TV-life""}>5250.0)then({""Assets""}==9000.0)",,1.0,4,0,{}


In [17]:
df_r = r.evaluate()

In [18]:
df_r[df_r['result']==False]

Unnamed: 0,rule_id,rule_group,rule_definition,rule_status,abs support,abs exceptions,confidence,result,indices
9,1,0,"if({""TV-life""}>400.0)then({""Type""}==0)",,5,1,0.833333,False,8
