# Implementazione del task 7 - Process Mining

Questo task consiste nel trasformare la sequenza di transazioni calcolata nel task 5 in un file `.xes` in modo che possa essere correttamente trasformata in un processo da [Apromore](https://apromore.com/download/).

In [1]:
# IMPORT
import nbimporter
import ETLBasics_t1 as task1
import ProfilingBasics_t2 as task2
import TimeSeriesToEvents_t3 as task3
import FeatureReduction_t4 as task4
import MakeTransactions_t5 as task5

from datetime import datetime
import dateutil as du
import pandas as pd
from xml.etree.ElementTree import Element, SubElement, Comment, tostring, ElementTree
from xml.dom import minidom

import warnings
warnings.simplefilter("ignore", UserWarning)

In [2]:
# VARIABILI
PATH = './pmdata/'
people = [1,2,3]

In [3]:
# IPER-PARAMETRI

# raggruppamento task 3
K_GROUP_DAY = 2
K_GROUP_HOUR = 4

# attributi temporali per le label task 5
times_attributes = []  #['week_day','weekend']

### Implementazione del task 1

In [4]:
sedentary_minutes = task1.sedentary_minutes_to_df(PATH, people)
sleep_0, sleep_1 = task1.sleep_to_df(PATH, people)
exercise_0, exercise_1 = task1.exercise_to_df(PATH, people)
lightly_active_minutes = task1.lightly_active_minutes_to_df(PATH, people)
time_in_heart_rate_zones = task1.time_in_heart_rate_zones_to_df(PATH, people)
moderately_active_minutes = task1.moderately_active_minutes_to_df(PATH, people)
very_active_minutes = task1.very_active_minutes_to_df(PATH, people)
resting_heart_rate = task1.resting_heart_rate_to_df(PATH, people)
srpe = task1.srpe_to_df(PATH, people)
wellness = task1.wellness_to_df(PATH, people)
injury = task1.injury_to_df(PATH, people)

try:
    steps = pd.read_pickle("dataframes/steps.pkl").loc[people]
    distance = pd.read_pickle("dataframes/distance.pkl").loc[people]
    calories = pd.read_pickle("dataframes/calories.pkl").loc[people]
    heart_rate = pd.read_pickle("dataframes/heart_rate.pkl").loc[people]
except:
    steps = task1.steps_to_df(PATH, people)
    distance = task1.distance_to_df(PATH, people)
    calories = task1.calories_to_df(PATH, people)
    heart_rate = task1.heart_rate_to_df(PATH, people)

### Implementazione dei task 3, 4 e 5

In [5]:
group_time_series_map = {
    'DAY':  [sedentary_minutes,lightly_active_minutes,moderately_active_minutes,very_active_minutes,resting_heart_rate,time_in_heart_rate_zones],
    'HOUR': [calories]  #,distance,heart_rate,steps
}
event_name = [exercise_0,exercise_1]  #,sleep_0,sleep_1,srpe,wellness,injury

day_kmeans_data = []
hour_kmeans_data = []
event_kmeans_data = []

for df in group_time_series_map['DAY']:
    df = task3.from_ts_to_event_based_data(df,'DAY', K_GROUP_DAY) # questo andrà ad aggiungere per ogni colonna numerica _mean e _std
    df = task4.kmeans_discretization(df)
    day_kmeans_data.append(df)

for df in group_time_series_map['HOUR']:
    df = task3.from_ts_to_event_based_data(df,'HOUR', K_GROUP_HOUR)
    df = task4.kmeans_discretization(df)
    hour_kmeans_data.append(df)

for event in event_name:
    event = task4.kmeans_discretization(event, is_event=True)
    # estraggo le colonna TS per utilizzare la procedura che crea le transazioni
    # Viene estratto dall'indice TS e viene creata la corrispondente colonna TS
    event = event.reset_index(level='TS')
    event['obj_TS'] = [x for x in event['TS']]  # per evitare errori in caso di ricalcolo del task 5
    event_kmeans_data.append(event)

transactions = task5.make_transactions(day_kmeans_data, hour_kmeans_data, event_kmeans_data, times_attributes=times_attributes)

## Process Mining

Il process mining permette l'analisi di processi basati sui log degli eventi allo scopo di estrarre conoscenza da questi ultimi. Infatti, l'obiettivo del process mining è quello di migliorare la descrizione di un processo attraverso tecniche e strumenti utili a scoprire strutture, modelli e molte altre informazioni a partire dai log.

In [6]:
# FUNZIONI TASK 7
def time_string(_str):
    return _str.replace(' ', 'T') + 'Z'

def prettify(elem):
    """
    Return a pretty-printed XML string for the Element.
    
    """
    
    rough_string = tostring(elem, 'utf-8')
    reparsed = minidom.parseString(rough_string)
    return reparsed.toprettyxml(indent="  ")

def current_tree():
    print(prettify(root))

In [7]:
# CREAZIONE PREAMBOLO DOCUMENTO XES
root = Element("log", {'xes.version':"2.0", 'xes.features':"nested-attributes", 'openxes.version':"2.27"})

SubElement(root, "extension", name="Lifecycle", prefix="lifecycle", uri="http://www.xes-standard.org/lifecycle.xesext" )
SubElement(root, "extension", name="Organizational", prefix="org", uri="http://www.xes-standard.org/org.xesext" )
SubElement(root, "extension", name="Time", prefix="time", uri="http://www.xes-standard.org/time.xesext" )
SubElement(root, "extension", name="Concept", prefix="concept", uri="http://www.xes-standard.org/concept.xesext" )
SubElement(root, "extension", name="Cost", prefix="cost", uri="http://www.xes-standard.org/cost.xesext" )

current_tree()

<?xml version="1.0" ?>
<log openxes.version="2.27" xes.features="nested-attributes" xes.version="2.0">
  <extension name="Lifecycle" prefix="lifecycle" uri="http://www.xes-standard.org/lifecycle.xesext"/>
  <extension name="Organizational" prefix="org" uri="http://www.xes-standard.org/org.xesext"/>
  <extension name="Time" prefix="time" uri="http://www.xes-standard.org/time.xesext"/>
  <extension name="Concept" prefix="concept" uri="http://www.xes-standard.org/concept.xesext"/>
  <extension name="Cost" prefix="cost" uri="http://www.xes-standard.org/cost.xesext"/>
</log>



In [8]:
# SCRITTURA CONTENUTO FILE XES
traces = [ SubElement(root, "trace") for _ in transactions ]  # viene creato l'insieme di oggetti SubElement che hanno come contenitore la root

for i, trace in enumerate(traces):  # per ogni traccia
    SubElement(trace, "string", key="concept:name", value=f'trace_{i}' )
    for item in transactions[i]:  # per ogni evento di una traccia
        e = SubElement(trace, "event")
        SubElement(e, "date", key="time:timestamp", value=time_string(item[0]) )
        SubElement(e, "string", key="concept:name", value=item[1] )
        SubElement(e, "string", key="org:group", value="partecipant" )
        SubElement(e, "string", key="lifecycle:transition", value="complete" )

#current_tree()

In [9]:
# SALVATAGGIO NEL FILE SYSTEM
with open('task7.xes', 'w') as f:
    f.write(prettify(root))

## Alcuni risultati mostrati da Apromore

![Apri Jupyter per visualizzare l'immagine](./images/task7_case_frequency_5_10.png)

![Apri Jupyter per visualizzare l'immagine](./images/task7_average_duration_5_10_bpmn.png)