In [None]:
from Declare4Py.ProcessMiningTasks.LogGenerator.PositionalBased.PositionalBasedLogGenerator import PositionalBasedLogGenerator
from Declare4Py.ProcessMiningTasks.LogGenerator.PositionalBased.PositionalBasedModel import PositionalBasedModel
import pdb

polarity = ['pos', 'neg']
df = []
for pol in polarity:
    print(pol)
    model_path = f"experimental_model_{pol}.decl"
    Model: PositionalBasedModel = PositionalBasedModel().parse_from_file(model_path)
    noise = 10
    generator: PositionalBasedLogGenerator = PositionalBasedLogGenerator(1000, 20, 20, Model, True)
    generator.run(generate_negatives_traces=False, positive_noise_percentage=noise, negative_noise_percentage=noise)
    result_dataframe: pd.DataFrame = generator.get_results_as_dataframe().copy()
    if pol == 'neg':
        result_dataframe['case:label'] = 'Negative'
        result_dataframe['case:concept:name'] = 'neg_' + result_dataframe['case:concept:name']
    df.append(result_dataframe)

result_dataframe = pd.concat(df).to_csv(f"experimental_model_pos_neg_{noise}.csv")

In [None]:
""" Alternative way of generating logs
from Declare4Py.ProcessMiningTasks.LogGenerator.PositionalBased.PositionalBasedLogGenerator import PositionalBasedLogGenerator
from Declare4Py.ProcessMiningTasks.LogGenerator.PositionalBased.PositionalBasedModel import PositionalBasedModel

import pdb

model_path = f"experimental_model_pos.decl"
Model: PositionalBasedModel = PositionalBasedModel().parse_from_file(model_path)
noise = 0
generator: PositionalBasedLogGenerator = PositionalBasedLogGenerator(500, 20, 20, Model, True)
generator.run(generate_negatives_traces=True, positive_noise_percentage=noise, negative_noise_percentage=noise)
result_dataframe: pd.DataFrame = generator.get_results_as_dataframe().copy()

generator.to_csv(f"experimental_model_{noise}.csv")
"""

In [None]:
import pandas as pd
import matplotlib.patches as mpatches
from sklearn.svm import SVC
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from sklearn.tree import DecisionTreeClassifier, plot_tree
from Declare4Py.Encodings.IndexBased import IndexBased
from sklearn.model_selection import train_test_split, GridSearchCV, KFold
from sklearn.metrics import classification_report, f1_score
from Declare4Py.Encodings.Aggregate import Aggregate
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import Perceptron
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_recall_fscore_support

RNG = 0

noise_list = [0, 5, 10, 15]

results = []

encoders = {IndexBased(case_id_col="case:concept:name", cat_cols = ['concept:name'], num_cols=['valore', 'age'], create_dummies=True): "ComplexIdx",
            Aggregate(case_id_col="case:concept:name", cat_cols=['concept:name'], boolean=True): "Boolean",
            Aggregate(case_id_col="case:concept:name", cat_cols=['concept:name'], boolean=False): "Frequency",
            Aggregate(case_id_col="case:concept:name", cat_cols=['concept:name'], num_cols=['valore', 'age'], boolean=False, aggregation_functions=['min', 'mean', 'max']): "Aggregate",
            IndexBased(case_id_col="case:concept:name", cat_cols=['concept:name'], create_dummies=True): "SimpleIdx"}

classifiers = {LogisticRegression(random_state=0): "Log. Regr.",
               Perceptron(tol=1e-3, random_state=0): "Perceptron",
               SVC(kernel='rbf'): "SVM",
               DecisionTreeClassifier(max_depth=5, random_state = RNG): "DT", 
               GradientBoostingClassifier(max_depth=5, random_state=RNG): "XGBoost",
               RandomForestClassifier(max_depth=5, random_state=RNG): "RF",
               MLPClassifier(random_state=1, activation='tanh', hidden_layer_sizes=(100, 100), max_iter=1000): "DNN"}

for encoder, enc_name in encoders.items():
    for clf, clf_name in classifiers.items():
        results_tmp = [enc_name, clf_name]
        for noise in noise_list:
            print(enc_name, clf_name, noise)
            result_dataframe = pd.read_csv(f"experimental_model_pos_neg_{noise}.csv")
            mean_valore = result_dataframe['valore'].mean()
            result_dataframe['valore'].fillna(mean_valore, inplace=True)
            #result_dataframe
            enc_df = encoder.fit_transform(result_dataframe)
            target_df = result_dataframe[["case:concept:name", "case:label"]].drop_duplicates()
            enc_df = pd.merge(enc_df, target_df, on="case:concept:name").drop(["case:concept:name"], axis=1)

            X = enc_df.iloc[:,:-1]
            y = enc_df.iloc[:,-1]
            X_new = X

            x_train, x_test, y_train, y_test = train_test_split(X_new, y, test_size=0.2, random_state = RNG)
            clf.fit(x_train, y_train)
            y_pred = clf.predict(x_test) 

            prec, rec, f1, _ = precision_recall_fscore_support(y_test, y_pred, pos_label="Positive", average="binary")
            results_tmp = results_tmp + [prec, rec, f1]
        results.append(results_tmp)

In [None]:
from datetime import datetime
enc_df.to_csv(f"train{datetime.now()}.csv")

In [None]:
import csv

with open("results_whole.csv", 'w') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerows(results)

print(results)

In [None]:
figsize = (10, 10)

clf = classifiers[3]
plt.figure(figsize=figsize)
plot_tree(clf, feature_names=enc_df.columns[:-1], class_names=["Negatives", "Positives"], filled=True, fontsize=6)
plt.title("Decision Tree", fontsize=22.5, ha='center')

plt.savefig("dt.pdf")
plt.show()