In [None]:
import os
import time
import pandas as pd
from Declare4Py.ProcessMiningTasks.ASPLogGeneration.ASPUtils.damerauLevenshtein import DamerauLevenshteinDistance
from Declare4Py.ProcessModels.DeclareModel import DeclareModel
from Declare4Py.ProcessMiningTasks.ASPLogGeneration.asp_generator import AspGenerator

Test incrociato per cercare la migliore combinazione tra configurazione, frequenza e threads per migliorare la velocita' del solver

In [ ]:
model_name = 'sepsis'
model: DeclareModel = DeclareModel().parse_from_file(os.path.join("../../../", "tests", "test_models", f"{model_name}.decl"))

# Number of cases that have be generated
num_of_cases = 100

# Minimum and maximum number of events a case can contain
(num_min_events, num_max_events) = (6,10)

asp_gen: AspGenerator = AspGenerator(model, num_of_cases, num_min_events, num_max_events)

In [ ]:
%%time

configuration = ["frumpy", "jumpy", "tweety", "handy", "crafty", "trendy"]
frequency = [1, 0.9, 0.8]
threads = [4,6,8,10,12]

for config in configuration:
    for freq in frequency:
        for t in threads:
        
            clingo_config = {"config": config, "freq": str(freq), "threads": str(t)}
            start = time.time()
            
            asp_gen.run(clingo_config)

            
            end = time.time()
            asp_gen.to_csv(f'Log_Results/time_{round(end - start, 4)}_config_{config}_threads_{t}_freq_{freq}.csv') 



Filtraggio dei risultati per thread creando dei report file per thread

In [None]:
%%time

Cols_Name = ["Configuration", "Distance", "Time", "Frequency"]
threads_dict = {"4": [], "6": [], "8": [],"10": [],"12": []}

for file in os.listdir("Log_results"):
    
    stripped_file = file.split("_")
    
    time = stripped_file[1]
    config = stripped_file[3]
    thread = stripped_file[5]
    freq = stripped_file[7].split(".c")[0]
    distance = DamerauLevenshteinDistance.analize_csv(f"Log_results/{file}")
    
    threads_dict[thread].append([config, distance, time, freq])
    
    try:
        os.rename("Log_results/" + file, f"Log_results/similarity_{distance}_" + file)
    except FileNotFoundError:
        print("file not found")
    
for key, value in threads_dict.items():
    df = pd.DataFrame(value, columns=Cols_Name)
    df.to_csv(f"Log_results/similarity_report_threads_{key}.csv")

Analisi di due altri modelli declare che dovrebbero causare problemi al solver. Si cerca correttezza nei modelli

In [6]:
import os
from Declare4Py.ProcessModels.DeclareModel import DeclareModel
from Declare4Py.ProcessMiningTasks.ASPLogGeneration.asp_generator import AspGenerator
from Declare4Py.ProcessMiningTasks.ASPLogGeneration.ASPTranslator.asp_translator import ASPModel

In [7]:
model1_name = "data-model1"
model2_name = "data-model2"

model1: DeclareModel = DeclareModel().parse_from_file(os.path.join("../../../", "tests", "test_models", f"{model1_name}.decl"))
model2: DeclareModel = DeclareModel().parse_from_file(os.path.join("../../../", "tests", "test_models", f"{model2_name}.decl"))

# Number of cases that have be generated
num_of_cases = 30

# Minimum and maximum number of events a case can contain
(num_min_events, num_max_events) = (6,10)

# activity, attribute, events encoding
encode = True

model1_gen: AspGenerator = AspGenerator(model1, num_of_cases, num_min_events, num_max_events, encode)
model2_gen: AspGenerator = AspGenerator(model2, num_of_cases, num_min_events, num_max_events, encode)

DEBUG:ASP generator:Distribution for traces uniform
DEBUG:ASP generator:traces: 30, events can have a trace min(6) max(10)
INFO:ASP generator:Computing distribution
DEBUG:Distributor:Distribution() uniform min_mu: 6 max_sigma: 10 num_traces: 30 custom_prob: None
DEBUG:Distributor:Uniform() probabilities: [Fraction(1, 5), Fraction(1, 5), Fraction(1, 5), Fraction(1, 5), Fraction(1, 5)]
DEBUG:Distributor:Custom_dist() min_mu:6 max_sigma:10 num_traces:30
DEBUG:Distributor:Probabilities sum 1
DEBUG:Distributor:Distribution result: [ 9 10  7  7 10 10  8  7  6  9 10  8  9  7  8  9  6 10  9  7  7  9  8  9
  9 10  6  9 10 10]
INFO:ASP generator:Distribution result Counter({9: 9, 10: 8, 7: 6, 8: 4, 6: 3})
DEBUG:ASP generator:Distribution for traces uniform
DEBUG:ASP generator:traces: 30, events can have a trace min(6) max(10)
INFO:ASP generator:Computing distribution
DEBUG:Distributor:Distribution() uniform min_mu: 6 max_sigma: 10 num_traces: 30 custom_prob: None
DEBUG:Distributor:Uniform() prob

In [8]:
%%time

model1_gen.run()
model1_gen.to_csv(f"Log_results/{model1_name}.csv")

DEBUG:ASP generator:Using custom traces length
INFO:ASP generator:Computing distribution
DEBUG:Distributor:Distribution() uniform min_mu: 6 max_sigma: 10 num_traces: 30 custom_prob: None
DEBUG:Distributor:Uniform() probabilities: [Fraction(1, 5), Fraction(1, 5), Fraction(1, 5), Fraction(1, 5), Fraction(1, 5)]
DEBUG:Distributor:Custom_dist() min_mu:6 max_sigma:10 num_traces:30
DEBUG:Distributor:Probabilities sum 1
DEBUG:Distributor:Distribution result: [10 10  8  6  9  7  8  7  7  9  7 10  7  8 10  8  7  9  6 10  8 10  7  9
  9 10  9  9  9  7]
INFO:ASP generator:Distribution result Counter({9: 8, 7: 8, 10: 7, 8: 5, 6: 2})
INFO:ASP generator:Computing distribution
DEBUG:ASP generator:Generating traces
DEBUG:ASP generator:Translate declare model to ASP
DEBUG:ASP generator:Declare model translated to ASP. Total Facts 13
DEBUG:ASP generator:ASP encoding generated
DEBUG:ASP generator:Start generating traces: Counter({9: 8, 7: 8, 10: 7, 8: 5, 6: 2})
DEBUG:ASP generator: Total trace to generat

CPU times: total: 32 s
Wall time: 4.88 s


In [10]:
print(model1_gen.lp_model)


activity(evt_val_0).
has_attribute(evt_val_0, attr_name_0).
has_attribute(evt_val_0, attr_name_1).
has_attribute(evt_val_0, attr_name_2).

activity(evt_val_1).
has_attribute(evt_val_1, attr_name_3).
has_attribute(evt_val_1, attr_name_0).
has_attribute(evt_val_1, attr_name_4).
has_attribute(evt_val_1, attr_name_5).
has_attribute(evt_val_1, attr_name_6).
has_attribute(evt_val_1, attr_name_7).
has_attribute(evt_val_1, attr_name_8).
has_attribute(evt_val_1, attr_name_9).
has_attribute(evt_val_1, attr_name_10).
has_attribute(evt_val_1, attr_name_2).
has_attribute(evt_val_1, attr_name_11).
has_attribute(evt_val_1, attr_name_12).
has_attribute(evt_val_1, attr_name_13).
has_attribute(evt_val_1, attr_name_14).
has_attribute(evt_val_1, attr_name_15).
has_attribute(evt_val_1, attr_name_16).
has_attribute(evt_val_1, attr_name_17).
has_attribute(evt_val_1, attr_name_18).
has_attribute(evt_val_1, attr_name_19).
has_attribute(evt_val_1, attr_name_20).
has_attribute(evt_val_1, attr_name_21).
has_attr

In [None]:
%%time

model2_gen.run()
model2_gen.to_csv(f"Log_results/{model2_name}.csv")

In [None]:
sorted_list = sorted(similarity_dict.items())
filtered_list = []

threshold = 45

for csv_file in sorted_list:
    if csv_file[1] > threshold:
        filtered_list.append(csv_file)
        
filtered_list.sort(key = lambda x: x[1], reverse = True)

files = [file[0] for file in filtered_list]

def get_similarity(csv_file: str):
    for name, similarity in filtered_list:
        if name == csv_file:
            return similarity
    return 0

for file in os.listdir("Log_results"):
    if file not in files:
        try:
            os.remove(f"Log_results/{file}")
        except FileNotFoundError:
            pass
    else:
        try:
            os.rename("Log_results/" + file, f"Log_results/similarity_{get_similarity(file)}_" + file)
        except FileNotFoundError:
            print("file not found")
            pass
            

In [None]:
sorted_list = sorted(similarity_dict.items())

def get_similarity(csv_file: str):
    for name, similarity in filtered_list:
        if name == csv_file:
            return similarity
    return 0

for file in os.listdir("Log_results"):
    try:
        os.rename("Log_results/" + file, f"Log_results/similarity_{get_similarity(file)}_" + file)
    except FileNotFoundError:
        print("file not found")
        pass
