In [None]:
import pandas as pd
import pm4py

**PROCESS MINING**

In [None]:
log = pd.read_excel("data/log.xlsx")

In [None]:
activities = pm4py.get_event_attribute_values(log, "concept:name")
start_activities = pm4py.get_start_activities(log)
end_activities = pm4py.get_end_activities(log)
print("Start activities: {}\nEnd activities: {}".format(start_activities, end_activities))

In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Sorting data for Pareto chart
sorted_activities = dict(sorted(activities.items(), key=lambda item: item[1], reverse=True))
activity_names = list(sorted_activities.keys())
activity_values = list(sorted_activities.values())

# Cumulative sum for Pareto
cumulative_values = np.cumsum(activity_values)
cumulative_percentage = 100 * cumulative_values / cumulative_values[-1]

# Plotting
fig, ax1 = plt.subplots(figsize=(10, 6))

# Bar chart (activities)
bars = ax1.bar(activity_names, activity_values, color='skyblue')
ax1.set_xlabel('Activities')
ax1.set_ylabel('Frequency')
ax1.tick_params(axis='x', rotation=45)

# Adding text (values) below each bar
for bar, value in zip(bars, activity_values):
    ax1.text(bar.get_x() + bar.get_width() / 2, 100, f'{value}', ha='center', va='bottom', fontsize=9, rotation=0)

# Line chart (cumulative percentage)
ax2 = ax1.twinx()
ax2.plot(activity_names, cumulative_percentage, color='red', marker='o', linestyle='-')
ax2.set_ylabel('Cumulative Percentage %')
ax2.axhline(80, color='green', linestyle='--', label='80% Threshold')
ax2.legend()

plt.title('Pareto Chart of Activities')
plt.tight_layout()
plt.show()

**Heuristic Miner**

In [None]:
heu_net = pm4py.discover_heuristics_net(log)
pm4py.view_heuristics_net(heu_net, format='png')

***Petri-net of heuristic miner output (FREQUENCY)***

In [None]:
net, im, fm = pm4py.discover_petri_net_heuristics(log)
pm4py.view_petri_net(net, im, fm, format='png')

**Directly Flow Graph Frequency**

In [None]:
dfg, start_activities, end_activities = pm4py.discover_dfg(log)
pm4py.view_dfg(dfg, start_activities, end_activities)

**Directly Flow Graph Performance**

In [None]:
performance_dfg, start_activities, end_activities = pm4py.discover_performance_dfg(log)
pm4py.view_performance_dfg(performance_dfg, start_activities, end_activities)

**Inductive Miner Tree**

In [None]:
bpmn_graph = pm4py.discover_bpmn_inductive(log)
pm4py.view_bpmn(bpmn_graph)

***In case you want to Convert the inductive miner into petri-net***

In [None]:
net, im, fm = pm4py.convert_to_petri_net(bpmn_graph)
pm4py.view_petri_net(net, im, fm, format='png')

In [None]:
# Step 1: Discover process model using Inductive Miner
petri_net, initial_marking, final_marking = pm4py.discover_petri_net_inductive(log)

# Visualize the discovered Petri net
pm4py.view_petri_net(petri_net, initial_marking, final_marking)