In [1]:
import os
import time
import pandas as pd
from Declare4Py.ProcessMiningTasks.ASPLogGeneration.ASPUtils.damerauLevenshtein import DamerauLevenshteinDistance
from Declare4Py.ProcessModels.DeclareModel import DeclareModel
from Declare4Py.ProcessMiningTasks.LogGenerator.ASP.ASPLogGenerator import AspGenerator

Test incrociato per cercare la migliore combinazione tra configurazione, frequenza e threads per migliorare la velocita' del solver

In [2]:
model_name = 'data-model1'
model: DeclareModel = DeclareModel().parse_from_file(os.path.join("../../../", "tests", "test_models", f"{model_name}.decl"))

# Number of cases that have be generated
num_of_cases = 30

# Minimum and maximum number of events a case can contain
(num_min_events, num_max_events) = (5,10)

asp_gen: AspGenerator = AspGenerator(model, num_of_cases, num_min_events, num_max_events)

In [3]:
%%time
# "frumpy",
configuration = ["frumpy","tweety", "crafty", "jumpy", "trendy", "handy"]
frequency = [0, 0.3, 0.6, 0.9, 1]
sign_def = ["rnd", "asp"]
modes = ["optN", "ignore"]

for config in configuration:
    for freq in frequency:
        for sign in sign_def:
            for mode in modes:
        
                clingo_config = {"config": config, "freq": str(freq), "threads": os.cpu_count(), "sign": sign, "mode": mode}
                start = time.time()
                
                asp_gen.run(clingo_config)
                
                end = time.time()
                asp_gen.to_csv(f'Log_Results/time_{round(end - start, 4)}_config_{config}_threads_{os.cpu_count()}_freq_{freq}_sign_{sign}_mode_{mode}_.csv') 



DEBUG:ASP generator:Using custom traces length
DEBUG:ASP generator:Generating traces
DEBUG:ASP generator:Translate declare model to ASP
DEBUG:ASP generator:Declare model translated to ASP. Total Facts 13
DEBUG:ASP generator:ASP encoding generated
DEBUG:ASP generator:Start generating traces: Counter({5: 7, 7: 6, 10: 5, 6: 5, 9: 4, 8: 3})
DEBUG:ASP generator: Total trace to generate and events: Traces:5, Events: 10, RandFrequency: 0
DEBUG:ASP generator: Traces generated :[assigned_value(attr_name_0,attr_value_2,1), assigned_value(attr_name_0,attr_value_25,2), assigned_value(attr_name_0,attr_value_10,3), assigned_value(attr_name_0,attr_value_10,4), assigned_value(attr_name_0,attr_value_10,5), assigned_value(attr_name_0,attr_value_10,6), assigned_value(attr_name_0,attr_value_10,7), assigned_value(attr_name_0,attr_value_10,8), assigned_value(attr_name_0,attr_value_9,9), assigned_value(attr_name_0,attr_value_9,10), assigned_value(attr_name_1,attr_value_26,1), assigned_value(attr_name_2,20,1)

CPU times: total: 36min
Wall time: 8min 59s


Filtraggio dei risultati per thread creando dei report file per thread

In [4]:
%%time

Cols_Name = ["Configuration", "Distance", "Time", "Frequency", "Sign-Def", "Opt-Mode"]
results = []

t = 16

for file in os.listdir("Log_results"):
    
    stripped_file = file.split("_")
    
    i = 0
    if stripped_file[0] == "similarity":
        if stripped_file[1] == "report":
            continue
        else:
            i = 2
    
    print(stripped_file)
    
    time = stripped_file[1 + i]
    config = stripped_file[3 + i]
    # thread = stripped_file[5 + i]
    freq = stripped_file[7 + i]
    sign = stripped_file[9 + i]
    mode = stripped_file[11 + i]
    
    if i == 0:
        distance = DamerauLevenshteinDistance.analize_csv(f"Log_results/{file}")
    else:
        distance = stripped_file[1]
        
    results.append([config, str(round(float(distance),4)), time, freq, sign, mode])
    
    if i == 0:
        try:
            os.rename("Log_results/" + file, f"Log_results/similarity_{round(distance, 4)}_" + file)
        except FileNotFoundError:
            print("file not found")
            
if len(results) > 0:
    df = pd.DataFrame(results, columns=Cols_Name)
    df.to_csv(f"Log_results/similarity_report_threads_{t}.csv")

['time', '2.4927', 'config', 'frumpy', 'threads', '16', 'freq', '0', 'sign', 'asp', 'mode', 'ignore', '.csv']
['time', '2.5016', 'config', 'frumpy', 'threads', '16', 'freq', '0', 'sign', 'rnd', 'mode', 'ignore', '.csv']
['time', '2.5143', 'config', 'frumpy', 'threads', '16', 'freq', '0', 'sign', 'asp', 'mode', 'optN', '.csv']
['time', '2.6691', 'config', 'frumpy', 'threads', '16', 'freq', '0', 'sign', 'rnd', 'mode', 'optN', '.csv']
['time', '2.7407', 'config', 'frumpy', 'threads', '16', 'freq', '0.3', 'sign', 'rnd', 'mode', 'ignore', '.csv']
['time', '2.7474', 'config', 'frumpy', 'threads', '16', 'freq', '0.9', 'sign', 'rnd', 'mode', 'ignore', '.csv']
['time', '2.7527', 'config', 'tweety', 'threads', '16', 'freq', '0', 'sign', 'asp', 'mode', 'optN', '.csv']
['time', '2.75', 'config', 'tweety', 'threads', '16', 'freq', '0', 'sign', 'asp', 'mode', 'ignore', '.csv']
['time', '2.7642', 'config', 'frumpy', 'threads', '16', 'freq', '0.3', 'sign', 'rnd', 'mode', 'optN', '.csv']
['time', '2.77

Analisi di due altri modelli declare che dovrebbero causare problemi al solver. Si cerca correttezza nei modelli

In [None]:
import os
from Declare4Py.ProcessModels.DeclareModel import DeclareModel
from Declare4Py.ProcessMiningTasks.LogGenerator.ASP.ASPLogGenerator import AspGenerator
from Declare4Py.ProcessMiningTasks.ASPLogGeneration.ASPTranslator.asp_translator import ASPModel

In [None]:
model1_name = "data-model1"
model2_name = "data-model2"

model1: DeclareModel = DeclareModel().parse_from_file(os.path.join("../../../", "tests", "test_models", f"{model1_name}.decl"))
model2: DeclareModel = DeclareModel().parse_from_file(os.path.join("../../../", "tests", "test_models", f"{model2_name}.decl"))

# Number of cases that have be generated
num_of_cases = 10

# Minimum and maximum number of events a case can contain
(num_min_events, num_max_events) = (10,20)

# activity, attribute, events encoding
encode = True

model1_gen: AspGenerator = AspGenerator(model1, num_of_cases, num_min_events, num_max_events, encode)
model2_gen: AspGenerator = AspGenerator(model2, num_of_cases, num_min_events, num_max_events, encode)

In [None]:
print(ASPModel(False).from_decl_model(model1_gen.process_model, None))

In [None]:
%%time

model1_gen.run()
model1_gen.to_csv(f"Log_results/{model1_name}.csv")

In [None]:
print(model1_gen.lp_model)

In [None]:
%%time

model2_gen.run()
model2_gen.to_csv(f"Log_results/{model2_name}.csv")

Controlliamo quali solver rispettano le regole asp

In [18]:
import os
import time
from Declare4Py.ProcessModels.DeclareModel import DeclareModel
from Declare4Py.ProcessMiningTasks.LogGenerator.ASP.ASPLogGenerator import AspGenerator

model_name = "data-model1-simplified"
model: DeclareModel = DeclareModel().parse_from_file(os.path.join("../../../", "tests", "test_models", f"{model_name}.decl"))


num_of_cases = 20

# Minimum and maximum number of events a case can contain
(num_min_events, num_max_events) = (20,20)

# activity, attribute, events encoding
encode = True

asp: AspGenerator = AspGenerator(model, num_of_cases, num_min_events, num_max_events, encode)

In [19]:
configuration = ["frumpy","tweety", "crafty", "jumpy", "trendy", "handy"]
freq = 1
sign_def = "asp"
mode = "optN"

for config in configuration:
        
    clingo_config = {"config": config, "freq": str(freq), "threads": os.cpu_count(), "sign": sign_def, "mode": mode}
    start = time.time()
    
    asp.run(clingo_config)
    
    end = time.time()
    asp.to_csv(f'Config_Test/time_{round(end - start, 4)}_config_{config}_threads_{os.cpu_count()}_freq_{freq}_sign_{sign_def}_mode_{mode}_.csv') 

DEBUG:ASP generator:Using custom traces length
DEBUG:ASP generator:Generating traces
DEBUG:ASP generator:Translate declare model to ASP
DEBUG:ASP generator:Declare model translated to ASP. Total Facts 13
DEBUG:ASP generator:ASP encoding generated
DEBUG:ASP generator:Start generating traces: Counter({20: 20})
DEBUG:ASP generator: Total trace to generate and events: Traces:20, Events: 20, RandFrequency: 1
DEBUG:ASP generator: Traces generated :[trace(evt_val_1,10), trace(evt_val_1,20), trace(evt_val_2,11), trace(evt_val_7,4), trace(evt_val_7,9), trace(evt_val_7,13), trace(evt_val_7,14), trace(evt_val_7,18), trace(evt_val_8,5), trace(evt_val_8,8), trace(evt_val_9,2), trace(evt_val_9,15), trace(evt_val_10,3), trace(evt_val_10,16), trace(evt_val_12,1), trace(evt_val_12,6), trace(evt_val_12,7), trace(evt_val_12,12), trace(evt_val_12,17), trace(evt_val_12,19), assigned_value(attr_name_0,attr_value_25,1), assigned_value(attr_name_0,attr_value_25,2), assigned_value(attr_name_0,attr_value_25,3),

In [3]:
from Declare4Py.ProcessModels.DeclareModel import DeclareModel
from Declare4Py.ProcessMiningTasks.ASPLogGeneration.asp_generator import AspGenerator as OldASPGenerator
import os

model_name = "data-model1-simplified"
model: DeclareModel = DeclareModel().parse_from_file(os.path.join("../../../", "tests", "test_models", f"{model_name}.decl"))

asp: OldASPGenerator = OldASPGenerator(model, 30, 10, 10)
asp.run()
asp.to_csv(f'Config_Test/manpreet.csv')


DEBUG:ASP generator:Distribution for traces uniform
DEBUG:ASP generator:traces: 30, events can have a trace min(10) max(10)
INFO:ASP generator:Computing distribution
DEBUG:Distributor:Distribution() uniform min_mu: 10 max_sigma: 10 num_traces: 30 custom_prob: None
DEBUG:Distributor:Uniform() probabilities: [Fraction(1, 1)]
DEBUG:Distributor:Custom_dist() min_mu:10 max_sigma:10 num_traces:30
DEBUG:Distributor:Probabilities sum 1
DEBUG:Distributor:Distribution result: [10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10 10
 10 10 10 10 10 10]
INFO:ASP generator:Distribution result Counter({10: 30})
DEBUG:ASP generator:Using custom traces length
INFO:ASP generator:Computing distribution
DEBUG:Distributor:Distribution() uniform min_mu: 10 max_sigma: 10 num_traces: 30 custom_prob: None
DEBUG:Distributor:Uniform() probabilities: [Fraction(1, 1)]
DEBUG:Distributor:Custom_dist() min_mu:10 max_sigma:10 num_traces:30
DEBUG:Distributor:Probabilities sum 1
DEBUG:Distributor:Distri

In [None]:
sorted_list = sorted(similarity_dict.items())
filtered_list = []

threshold = 45

for csv_file in sorted_list:
    if csv_file[1] > threshold:
        filtered_list.append(csv_file)
        
filtered_list.sort(key = lambda x: x[1], reverse = True)

files = [file[0] for file in filtered_list]

def get_similarity(csv_file: str):
    for name, similarity in filtered_list:
        if name == csv_file:
            return similarity
    return 0

for file in os.listdir("Log_results"):
    if file not in files:
        try:
            os.remove(f"Log_results/{file}")
        except FileNotFoundError:
            pass
    else:
        try:
            os.rename("Log_results/" + file, f"Log_results/similarity_{get_similarity(file)}_" + file)
        except FileNotFoundError:
            print("file not found")
            pass
            

In [None]:
sorted_list = sorted(similarity_dict.items())

def get_similarity(csv_file: str):
    for name, similarity in filtered_list:
        if name == csv_file:
            return similarity
    return 0

for file in os.listdir("Log_results"):
    try:
        os.rename("Log_results/" + file, f"Log_results/similarity_{get_similarity(file)}_" + file)
    except FileNotFoundError:
        print("file not found")
        pass
