# read XES from BPI Challenge 2020
### Create all prefixes including attribute next activity & prefix length

In [17]:
import pm4py
from pm4py.objects.log.importer.xes import importer as xes_importer
import csv

In [18]:
# Load the XES file
raw_log = xes_importer.apply("RequestForPayment.xes")

# Specify the output CSV file path
output_csv_file = "BPIC_2020_prefixes_all.csv"

parsing log, completed traces ::   0%|          | 0/6886 [00:00<?, ?it/s]

In [19]:
delete_event_list = {'Request For Payment FINAL_APPROVED by BUDGET OWNER', 'Request For Payment FOR_APPROVAL by ADMINISTRATION', 'Request For Payment FOR_APPROVAL by SUPERVISOR'}

log = pm4py.filter_event_attribute_values(raw_log, 'concept:name', delete_event_list, level='event', retain=False)



In [20]:
# Get all unique attribute names from the event log
all_attributes = set()
for trace in log:
    for event in trace:
        all_attributes.update(event.keys())

### get dictionary of all activities

In [21]:
all_tasks = set()

# Iterate through the event log to gather all tasks and assign indices
for trace in log:
    for event in trace:
        all_tasks.add(event["concept:name"])

# Create a dictionary to map tasks to their indices
task_to_index = {task: index for index, task in enumerate(all_tasks)}
task_to_index

{'Request For Payment FINAL_APPROVED by DIRECTOR': 0,
 'Request For Payment REJECTED by PRE_APPROVER': 1,
 'Request For Payment REJECTED by MISSING': 2,
 'Request For Payment REJECTED by ADMINISTRATION': 3,
 'Request For Payment APPROVED by ADMINISTRATION': 4,
 'Request For Payment APPROVED by SUPERVISOR': 5,
 'Request For Payment SUBMITTED by EMPLOYEE': 6,
 'Request For Payment REJECTED by EMPLOYEE': 7,
 'Request For Payment APPROVED by BUDGET OWNER': 8,
 'Request For Payment APPROVED by PRE_APPROVER': 9,
 'Request For Payment FINAL_APPROVED by SUPERVISOR': 10,
 'Request For Payment SAVED by EMPLOYEE': 11,
 'Request For Payment REJECTED by BUDGET OWNER': 12,
 'Request Payment': 13,
 'Request For Payment REJECTED by SUPERVISOR': 14,
 'Payment Handled': 15}

## make csv of prefixes and add attributes

### version for accuracy with all prefixes

In [30]:
# Open the CSV file for writing
with open(output_csv_file, mode='w', newline='') as csv_file:
    # Create fieldnames with the desired column names
    fieldnames = ["prefix_id", "caseid", "end_timestamp", "task", "next_activity", "prefix_length", "task_index", "trace_start", "timelapsed", "milestone"] + [column if column != "org:role" else "role" for column in all_attributes]
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

    # Write the header
    writer.writeheader()

    # Iterate through the event log
    for trace in log:
        # Get the case name (case ID)
        case_name = trace.attributes["concept:name"]

        # Remove the "_nr" suffix from the case name (if it exists)
        if case_name.endswith("_nr"):
            case_name = case_name[:-3]

        # Iterate through the events, creating and writing each event with the "next_activity" attribute
        for i in range(len(trace) - 1):
            prefix_trace = trace[:i + 2]
            
            if i < len(prefix_trace) - 1:
                next_activity = prefix_trace[i + 1]["concept:name"]
            else:
                next_activity = None

            # if the previous event has concept:name "Request For Payment APPROVED by ADMINISTRATION", set the milestone column to "Request For Payment APPROVED by ADMINISTRATION" else none
            if prefix_trace[i]["concept:name"] == "Request For Payment APPROVED by ADMINISTRATION":
                milestone = "Request For Payment APPROVED by ADMINISTRATION"
            else:
                milestone = None

            for j, prefix_event in enumerate(prefix_trace):
                prefix_event["prefix_id"] = f"{case_name}_{i + 1}"

                if j != len(prefix_trace) - 1:
                    event_with_next_activity = {
                        "prefix_id": prefix_event["prefix_id"],
                        "caseid": case_name,
                        "end_timestamp": prefix_event["time:timestamp"],
                        "task": prefix_event["concept:name"],
                        "next_activity": next_activity,
                        "prefix_length": i + 1,
                        "task_index": task_to_index[prefix_event["concept:name"]], 
                        "trace_start": prefix_trace[0]["time:timestamp"],
                        "milestone": milestone,
                        "timelapsed": int((prefix_event["time:timestamp"] - prefix_trace[0]["time:timestamp"]).total_seconds() / 3600),
                        **{column if column != "org:role" else "role": value for column, value in prefix_event.items()}
                    }

                    # Write the event to the CSV file
                    writer.writerow(event_with_next_activity)

print(f"Event log data with all prefixes and next_activity has been exported to {output_csv_file}")


Event log data with all prefixes and next_activity has been exported to BPIC_2020_prefixes_all.csv
