In [9]:
import numpy as np
from tslearn.metrics import dtw
import scipy as sp
import pandas as pd
import plotly.express as px
import warnings
np.warnings = warnings

from pyclustering.cluster.kmeans import kmeans
from pyclustering.utils.metric import type_metric, distance_metric
from pyclustering.cluster.center_initializer import kmeans_plusplus_initializer

In [10]:
path = '../../data/'

In [11]:
df = pd.read_csv(path + 'data_smooth.csv')
df.head()

Unnamed: 0,x,y,z
0,0.0,0.0,0.0
1,0.077816,0.0425,-0.120315
2,0.229005,0.1275,-0.354938
3,0.455788,0.251128,-0.708374
4,0.764831,0.415965,-1.186632


In [12]:
def my_metric(a: np.ndarray, b: np.ndarray):
    return dtw(a.reshape(-1, 3), b.reshape(-1, 3))

In [13]:
df.shape

(21619, 3)

In [14]:
signals = []
l = 600
s = 10
for i in range(0, df.shape[0]-l, s):
    sig = df.iloc[i:i+l]
    signals.append(sig.values.ravel())

In [15]:
df_signals = np.array(signals)

In [16]:
df_signals.shape

(2102, 1800)

In [17]:
df_signals = (df_signals - df_signals.mean(0)) / df_signals.std(0)

In [18]:
metric = distance_metric(type_metric.USER_DEFINED, func=my_metric)
initial_centers = kmeans_plusplus_initializer(df_signals, 4, metric=metric).initialize()
kmeans_instance = kmeans(df_signals, initial_centers, metric=metric)
kmeans_instance.process()

<pyclustering.cluster.kmeans.kmeans at 0x2442c23c2c0>

In [19]:
clusters = np.zeros(df_signals.shape[0])
for cluster, idx in enumerate(kmeans_instance.get_clusters()):
    clusters[idx] = cluster

In [20]:
clusters = clusters.astype('int')

In [21]:
df_clustered = df.iloc[:df.shape[0] - l]

In [22]:
point_clusters = []

In [23]:
for i in range(df.shape[0] - l):
    point_clusters.append(np.argmax(np.bincount(clusters[max(0, (i - l) // s + 1) : i // s + 1])))

In [24]:
df_clustered['cluster'] = point_clusters

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_clustered['cluster'] = point_clusters


In [25]:
df_clustered.cluster = df_clustered.cluster.astype('category')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_clustered.cluster = df_clustered.cluster.astype('category')


In [26]:
fig = px.scatter_3d(df_clustered, 'x', 'y', 'z', color='cluster', height=1200, width=1200)
fig.update_traces(marker=dict(size=2))

In [27]:
fig = px.scatter(df_clustered, y=['x', 'y', 'z'], color='cluster', height=450, width=1500)
fig.update_traces(marker=dict(size=3))