In [1]:
import pandas as pd
import numpy as np
import re

In [2]:
def evaluate_rules(data, tuned_rules, output):
    predictions_all = []
    for i, rule in tuned_rules.iterrows():
        pred_rule = []
        tptot = 0
        tntot = 0
        fntot = 0
        fptot = 0
        tuned_antecedent = rule['Rule'].strip()
        print(f"rule: {tuned_antecedent}")
        for _, row in data.iterrows():
            y_row = row[output]
            tp = 0
            tn = 0
            fn = 0
            fp = 0
            # check if the point row satifies rule 
            if evaluate_rule_conditions(row, tuned_antecedent):
                # rule is satisfied
                satisfied = True
                pred_rule.append(0)
            else:
                satisfied = False
                pred_rule.append(1)
    
            
            if satisfied and y_row == 0:
                tp=1
            elif satisfied and y_row == 1:
                fp=1
            elif not satisfied and y_row == 1:
                tn=1
            
            elif not satisfied and y_row == 0:
                fn=1

            predictions_all.append(pred_rule)
            tptot+=tp
            tntot+=tn
            fptot+=fp
            fntot+=fn

        if (tptot+fptot) == 0:
            print("no points satisfy the rule\n")
            continue
        else:
            
            print(f"tp = {tptot}, tn = {tntot}, fp = {fptot}, fn = {fntot}")
            prec = tptot / (tptot+fptot)
            cov = tptot/(tptot+fntot)
            acc = (tptot+tntot)/(tptot+tntot+fptot+fntot)
            f1score = (2*tptot)/(2*tptot+fptot+fntot)
            error = fptot/(fptot+tntot)
            print(f"precision: {prec}, covering: {cov}, error: {error}, accuracy: {acc}, f1: {f1score}\n")              

    return predictions_all


def evaluate_rule_conditions(row, condition_part):

    # Checks if any of the conditions in the rule are satisfied
    if all(check_condition(row, part) for part in condition_part.split(" and ")):
        return True  # Return True if all conditions in the rule are satisfied
    
    return False  # Return False if any of the conditions in the rule is not satisfied


def check_condition(row, condition_part):
    # Check if a single condition part is satisfied
    
    #parts = [part.strip('()') for part in condition_part.split()]
    parts = re.split(r'\s*(==|<=|>=|<|>|!=)\s*', condition_part.strip('()'))
    #print("parts: ", parts)
    if len(parts) == 3:
        column, op, value = parts
        return eval(f"{row[column]} {op} {value}")
    # handle the case of a 2-thresholds conditions of the kind: a < Column <= b
    elif len(parts) == 5:
        val1,op1,column,op2,val2 = parts
        # Use the original condition from the rule
        return eval(f"{val1} {op1} {row[column]} {op2} {val2}")        
    else:
        # Handle cases where the condition is not in the expected format
        pass


# LLM performance evaluation rule by rule

In [3]:
# Loading rules actual rules
rules = pd.read_csv('/Users/saranarteni/OneDrive - CNR/REXASI-PRO/T3.1/navground_code/simulation2/confiderai/supsinavground_rules.csv', header=None, names=["Rule", "Covering", "Error"])

# adjust columns values
for i in range(len(rules)+1):   
    rules["Rule"] = rules["Rule"].apply(lambda x: x.replace("RULE {}: ".format(i),""))
rules["Rule"] = rules["Rule"].apply(lambda x: x.replace("AND","and"))
rules["Rule"] = rules["Rule"].apply(lambda x: x.replace("{",""))
rules["Rule"] = rules["Rule"].apply(lambda x: x.replace("}",""))
rules["Rule"] = rules["Rule"].apply(lambda x: x.replace("output in ","output = "))
rules["Covering"] = rules["Covering"].apply(lambda x: x.replace("COVERING: ",""))
rules["Error"] = rules["Error"].apply(lambda x: x.replace("ERROR: ",""))
rules['Output'] = rules['Rule'].str.extract(r'output = (\d+)', expand=False).astype(int)
rules["Rule"] = rules["Rule"].apply(lambda x: x.replace("IF ",""))
rules["Rule"] = rules["Rule"].apply(lambda x: x.replace(x[x.find("THEN"):],""))
# load test data
data = pd.read_csv("simulation2/test.csv")

rules_noncoll = rules[rules["Output"]==0]
preds_LLM = evaluate_rules(data, rules_noncoll, "output")

rule: SafetyMargin > 0.071752 and Tau <= 0.798864
tp = 219, tn = 431, fp = 18, fn = 332
precision: 0.9240506329113924, covering: 0.397459165154265, error: 0.0400890868596882, accuracy: 0.65, f1: 0.5558375634517766

rule: SafetyMargin > 0.019271 and 0.096355 < Tau <= 0.359004
tp = 216, tn = 429, fp = 20, fn = 335
precision: 0.9152542372881356, covering: 0.39201451905626133, error: 0.044543429844097995, accuracy: 0.645, f1: 0.5489199491740788

rule: SafetyMargin > 0.070627 and Eta > 0.372908
tp = 192, tn = 441, fp = 8, fn = 359
precision: 0.96, covering: 0.3484573502722323, error: 0.017817371937639197, accuracy: 0.633, f1: 0.511318242343542

rule: 0.041272 < SafetyMargin <= 0.071004 and Tau <= 0.619041
tp = 142, tn = 434, fp = 15, fn = 409
precision: 0.9044585987261147, covering: 0.2577132486388385, error: 0.0334075723830735, accuracy: 0.576, f1: 0.4011299435028249

rule: SafetyMargin > 0.010931 and Eta > 0.748042 and Tau <= 0.682408
tp = 145, tn = 430, fp = 19, fn = 406
precision: 0.884

# Skope Rules performance

In [3]:
#mix both rules, model for collision and model for no collision.
skrules_col = pd.read_csv('simulationVictorTopological/skope/skope_rules_collisions.csv', header=None, names=["Rule","Covering","Error"])
skrules_nocol = pd.read_csv('simulationVictorTopological/skope/skope_rules_noncollisions.csv', names=["Rule","Covering","Error"])

skrules = pd.concat([skrules_col, skrules_nocol], ignore_index=True)
skrules

Unnamed: 0,Rule,Covering,Error
0,RULE 1: IF meanEntropy <= 2.658132314682007 an...,COVERING: 0.5931131412508784,ERROR: 0.04416761041902606
1,RULE 2: IF meanEntropy <= 2.658132314682007 TH...,COVERING: 0.5848119233498935,ERROR: 0.041860465116279055
2,RULE 3: IF meanEntropy <= 2.660180926322937 an...,COVERING: 0.5765895953757225,ERROR: 0.04431137724550893
3,RULE 4: IF meanEntropy <= 2.6630786657333374 a...,COVERING: 0.5102328863796753,ERROR: 0.030831099195710476
4,RULE 5: IF meanEntropy <= 2.658132314682007 an...,COVERING: 0.38146551724137934,ERROR: 0.013011152416356864
5,RULE 6: IF meanEntropy <= 2.6601980924606323 a...,COVERING: 0.319375443577005,ERROR: 0.09456740442655931
6,RULE 7: IF meanEntropy <= 2.658819079399109 an...,COVERING: 0.2275618374558304,ERROR: 0.0980392156862745
7,RULE 8: IF meanEntropy <= 2.658132314682007 an...,COVERING: 0.20186781609195403,ERROR: 0.07868852459016396
8,RULE 9: IF meanEntropy <= 2.6694496870040894 a...,COVERING: 0.18014184397163122,ERROR: 0.3153638814016172
9,RULE 10: IF meanEntropy <= 2.664484739303589 a...,COVERING: 0.10608203677510608,ERROR: 0.23469387755102045


In [4]:
skrules["Rule"][0]

'RULE 1: IF meanEntropy <= 2.658132314682007 and autoCorrEntropy <= 0.9989591240882874 THEN output = 1'

In [5]:
# adjust columns values
for i in range(len(skrules)+1):   
    skrules["Rule"] = skrules["Rule"].apply(lambda x: x.replace("RULE {}: ".format(i),""))
skrules["Rule"] = skrules["Rule"].apply(lambda x: x.replace("AND","and"))
skrules["Rule"] = skrules["Rule"].apply(lambda x: x.replace("{",""))
skrules["Rule"] = skrules["Rule"].apply(lambda x: x.replace("}",""))
#skrules["Rule"] = skrules["Rule"].apply(lambda x: x.replace("output in ","output = "))
skrules["Covering"] = skrules["Covering"].apply(lambda x: x.replace("COVERING: ",""))
skrules["Error"] = skrules["Error"].apply(lambda x: x.replace("ERROR: ",""))
skrules['Output'] = skrules['Rule'].str.extract(r'output = (\d+)', expand=False).astype(int)
skrules["Rule"] = skrules["Rule"].apply(lambda x: x.replace("IF ",""))
skrules["Rule"] = skrules["Rule"].apply(lambda x: x.replace(x[x.find("THEN"):],""))

In [6]:
# for noncoll 
data = pd.read_csv("simulationVictorTopological/test.csv")
skrules_noncoll = skrules[skrules["Output"]==0]
preds_skope = evaluate_rules(data, skrules_noncoll, "output")

rule: meanEntropy > 2.6678194999694824 and percentile75Entropy > 2.7067649364471436
tp = 219, tn = 21, fp = 2, fn = 324
precision: 0.9909502262443439, covering: 0.40331491712707185, error: 0.08695652173913043, accuracy: 0.42402826855123676, f1: 0.5732984293193717

rule: meanEntropy > 2.6678937673568726 and percentile25Entropy > 2.67653751373291 and percentile75Entropy > 2.6972150802612305
tp = 199, tn = 21, fp = 2, fn = 344
precision: 0.9900497512437811, covering: 0.3664825046040516, error: 0.08695652173913043, accuracy: 0.38869257950530034, f1: 0.5349462365591398

rule: meanEntropy > 2.6938308477401733
tp = 198, tn = 21, fp = 2, fn = 345
precision: 0.99, covering: 0.36464088397790057, error: 0.08695652173913043, accuracy: 0.3869257950530035, f1: 0.5329744279946165

rule: meanEntropy > 2.6694501638412476 and percentile25Entropy > 2.676337480545044 and percentile90Entropy > 2.721514940261841
tp = 188, tn = 21, fp = 2, fn = 355
precision: 0.9894736842105263, covering: 0.3462246777163904,

In [7]:
# for coll
data = pd.read_csv("simulationVictorTopological/test.csv")
skrules_noncoll = skrules[skrules["Output"]==1]
preds_skope = evaluate_rules(data, skrules_noncoll, "output")

rule: meanEntropy <= 2.658132314682007 and autoCorrEntropy <= 0.9989591240882874
tp = 12, tn = 18, fp = 5, fn = 531
precision: 0.7058823529411765, covering: 0.022099447513812154, error: 0.21739130434782608, accuracy: 0.053003533568904596, f1: 0.04285714285714286

rule: meanEntropy <= 2.658132314682007
tp = 12, tn = 18, fp = 5, fn = 531
precision: 0.7058823529411765, covering: 0.022099447513812154, error: 0.21739130434782608, accuracy: 0.053003533568904596, f1: 0.04285714285714286

rule: meanEntropy <= 2.660180926322937 and percentile75Entropy <= 2.6744799613952637
tp = 12, tn = 18, fp = 5, fn = 531
precision: 0.7058823529411765, covering: 0.022099447513812154, error: 0.21739130434782608, accuracy: 0.053003533568904596, f1: 0.04285714285714286

rule: meanEntropy <= 2.6630786657333374 and medianEntropy <= 2.654175043106079
tp = 9, tn = 21, fp = 2, fn = 534
precision: 0.8181818181818182, covering: 0.016574585635359115, error: 0.08695652173913043, accuracy: 0.053003533568904596, f1: 0.0324