In [1]:
import pm4py
from collections import Counter

from sklearn.cluster import DBSCAN, KMeans

log_path = "logs/running-example.jsonocel"
ocel = pm4py.read_ocel_json(log_path)



In [2]:
object_types = list(set(ocel.objects["ocel:type"]))
object_type_to_id = {k:i for i, k in enumerate(object_types)}
object_type_to_id

{'packages': 0, 'orders': 1, 'customers': 2, 'items': 3, 'products': 4}

In [3]:
ocel.objects[ocel.objects["ocel:type"]=="customers"]

Unnamed: 0,ocel:oid,ocel:type,cost,producer,age,bankaccount
20,Marco Pegoraro,customers,,,50.0,91248.0
21,Gyunam Park,customers,,,55.0,27275.0
22,Majid Rafiei,customers,,,46.0,74370.0
23,Junxiong Gao,customers,,,52.0,96270.0
24,Seran Uysal,customers,,,40.0,69940.0
25,Christina Rensinghof,customers,,,41.0,17020.0
26,Wil van der Aalst,customers,,,34.0,48846.0
27,Christine Dobbert,customers,,,43.0,59702.0
28,Luis Santos,customers,,,48.0,37644.0
29,Kefang Ding,customers,,,38.0,76896.0


In [4]:
activity_to_numeric = {k:i for i, k in enumerate(list(set(ocel.events["ocel:activity"])))}
activity_to_numeric

{'reorder item': 0,
 'pick item': 1,
 'send package': 2,
 'confirm order': 3,
 'payment reminder': 4,
 'item out of stock': 5,
 'pay order': 6,
 'package delivered': 7,
 'create package': 8,
 'failed delivery': 9,
 'place order': 10}

In [5]:
import numpy as np
x = []
event_ids = list(ocel.events["ocel:eid"])
for eid in event_ids:
    eid_num = float(eid)
    if eid_num%1000==0:
        print(eid_num)
    counted = Counter(ocel.relations[ocel.relations["ocel:eid"]==eid]["ocel:type"])
    event_vec = np.zeros(len(object_type_to_id))
    for k in counted.keys():
        vec_id = object_type_to_id[k]
        if counted[k] > 0:
            event_vec[vec_id] = 1
    x.append(event_vec)
x = np.array(x)

1000.0
2000.0
3000.0
4000.0
5000.0
6000.0
7000.0
8000.0
9000.0
10000.0
11000.0
12000.0
13000.0
14000.0
15000.0
16000.0
17000.0
18000.0
19000.0
20000.0
21000.0
22000.0


In [17]:
from clustering_api import ClusteringOCEL

dbscan1 = DBSCAN(eps=0.1, min_samples=5)
dbscan2 = DBSCAN(eps=0.5, min_samples=5)
kmeans = KMeans(n_clusters=6, init="k-means++")
ocel_dbscan1 = ClusteringOCEL(model=dbscan1, ocel=ocel, dataset_from_ocel=x)
ocel_dbscan2 = ClusteringOCEL(model=dbscan2, ocel=ocel, dataset_from_ocel=x)
ocel_kmeans = ClusteringOCEL(model=kmeans, ocel=ocel, dataset_from_ocel=x)

In [18]:
ocel_dbscan1.fit(normalize=True)
print(ocel_dbscan1.unique_labels())

[0 1]


In [19]:
ocel_kmeans.fit(normalize=True)
print(ocel_kmeans.unique_labels())

[0 1]


  self.model.fit(x)


In [30]:
from collections import defaultdict
clusters = defaultdict(list)
labels = ocel_dbscan1.model.labels_
for i in range(labels.size):
    clusters[labels[i]].append(ocel_dbscan1.ocel.events.iloc[i]["ocel:eid"])

In [31]:
for k, v in clusters.items():
    print(k, v[:10])

0 ['1.0', '2.0', '3.0', '4.0', '5.0', '6.0', '7.0', '8.0', '9.0', '10.0']
1 ['121.0', '140.0', '142.0', '160.0', '161.0', '164.0', '178.0', '204.0', '206.0', '227.0']


In [32]:
ocel.relations[ocel.relations["ocel:eid"]=="8.0"]

Unnamed: 0,ocel:eid,ocel:activity,ocel:timestamp,ocel:oid,ocel:type,ocel:qualifier
58,8.0,item out of stock,2019-05-20 13:54:37,880004,items,
59,8.0,item out of stock,2019-05-20 13:54:37,Marco Pegoraro,customers,
60,8.0,item out of stock,2019-05-20 13:54:37,Echo Studio,products,
61,8.0,item out of stock,2019-05-20 13:54:37,990001,orders,


In [33]:
for k, v in clusters.items():
    print(f"cluster {k}, number of events {len(v)}")

cluster 0, number of events 18001
cluster 1, number of events 4366


In [40]:
plot_data = {}
object_types = list(set(ocel.objects["ocel:type"]))
object_types_to_id = {object_type: i  for i, object_type in enumerate(object_types)}
print(object_types_to_id)
for k, v in clusters.items():
    plot_data[k] = np.zeros(len(object_types))
    for eid in v:
        counted = Counter(ocel.relations[ocel.relations["ocel:eid"]==eid]["ocel:type"])
        for c_key in counted.keys():
            if counted[c_key] > 0:
                plot_data[k][object_types_to_id[c_key]]=1

{'packages': 0, 'orders': 1, 'customers': 2, 'items': 3, 'products': 4}


In [49]:
import plotly.graph_objects as go
fig = go.Figure()

for k, v in plot_data.items():
    fig.add_trace(go.Scatterpolar(
        r=v,
        theta=object_types,
        fill='toself',
        name=f'Cluster {k}'
    ))
fig.update_layout(
  polar=dict(
    radialaxis=dict(
      visible=True,
      range=[0, 1]
    )),
  showlegend=True
)

fig.show()
