<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Exmplo-1" data-toc-modified-id="Exmplo-1-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Exmplo 1</a></span></li><li><span><a href="#Exemplo-2" data-toc-modified-id="Exemplo-2-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Exemplo 2</a></span></li></ul></div>

In [2]:
#! pip install tslearn

Collecting tslearn
  Downloading tslearn-0.4.1-cp37-cp37m-manylinux2010_x86_64.whl (773 kB)
[K     |████████████████████████████████| 773 kB 5.5 MB/s eta 0:00:01
Installing collected packages: tslearn
Successfully installed tslearn-0.4.1


In [17]:
from tslearn.generators import random_walks
from tslearn.clustering import TimeSeriesKMeans
from tslearn.clustering import TimeSeriesCentroidBasedClusteringMixin
from tslearn.utils import to_time_series_dataset


#### Exmplo 1

In [9]:
X = random_walks(n_ts=50, sz=32, d=1)


In [10]:
km = TimeSeriesKMeans(n_clusters=3, metric="euclidean", max_iter=5, random_state=0).fit(X)

In [11]:
km.cluster_centers_.shape

(3, 32, 1)

In [12]:
km_dba = TimeSeriesKMeans(n_clusters=3, metric="dtw", max_iter=5, max_iter_barycenter=5,
                          random_state=0).fit(X)

In [13]:
km_dba.cluster_centers_.shape

(3, 32, 1)

In [14]:
km_sdtw = TimeSeriesKMeans(n_clusters=3, metric="softdtw", max_iter=5,
                           max_iter_barycenter=5,
                           metric_params={"gamma": .5},
                           random_state=0).fit(X)

In [15]:
km_sdtw.cluster_centers_.shape

(3, 32, 1)

In [18]:
X_bis = to_time_series_dataset([[1, 2, 3, 4],
                                [1, 2, 3],
                                [2, 5, 6, 7, 8, 9]])

In [19]:
X_bis

array([[[ 1.],
        [ 2.],
        [ 3.],
        [ 4.],
        [nan],
        [nan]],

       [[ 1.],
        [ 2.],
        [ 3.],
        [nan],
        [nan],
        [nan]],

       [[ 2.],
        [ 5.],
        [ 6.],
        [ 7.],
        [ 8.],
        [ 9.]]])

In [20]:
km = TimeSeriesKMeans(n_clusters=2, max_iter=5,
                       metric="dtw", random_state=0).fit(X_bis)
km.cluster_centers_.shape

(2, 6, 1)

#### Exemplo 2

In [22]:
# Author: Romain Tavenard
# License: BSD 3 clause

import numpy
import matplotlib.pyplot as plt

from tslearn.clustering import TimeSeriesKMeans
from tslearn.datasets import CachedDatasets
from tslearn.preprocessing import TimeSeriesScalerMeanVariance, \
    TimeSeriesResampler

In [None]:
# carregando dataset e criando dataset de treino e test

seed = 0
numpy.random.seed(seed)
X_train, y_train, X_test, y_test = CachedDatasets().load_dataset("Trace")
X_train = X_train[y_train < 4]  # Keep first 3 classes
numpy.random.shuffle(X_train)


In [None]:
# Keep only 50 time series
X_train = TimeSeriesScalerMeanVariance().fit_transform(X_train[:50])
# Make time series shorter
X_train = TimeSeriesResampler(sz=40).fit_transform(X_train)
sz = X_train.shape[1]

In [None]:



# Euclidean k-means
print("Euclidean k-means")
km = TimeSeriesKMeans(n_clusters=3, verbose=True, random_state=seed)
y_pred = km.fit_predict(X_train)

plt.figure()
for yi in range(3):
    plt.subplot(3, 3, yi + 1)
    for xx in X_train[y_pred == yi]:
        plt.plot(xx.ravel(), "k-", alpha=.2)
    plt.plot(km.cluster_centers_[yi].ravel(), "r-")
    plt.xlim(0, sz)
    plt.ylim(-4, 4)
    plt.text(0.55, 0.85,'Cluster %d' % (yi + 1),
             transform=plt.gca().transAxes)
    if yi == 1:
        plt.title("Euclidean $k$-means")

# DBA-k-means
print("DBA k-means")
dba_km = TimeSeriesKMeans(n_clusters=3,
                          n_init=2,
                          metric="dtw",
                          verbose=True,
                          max_iter_barycenter=10,
                          random_state=seed)
y_pred = dba_km.fit_predict(X_train)

for yi in range(3):
    plt.subplot(3, 3, 4 + yi)
    for xx in X_train[y_pred == yi]:
        plt.plot(xx.ravel(), "k-", alpha=.2)
    plt.plot(dba_km.cluster_centers_[yi].ravel(), "r-")
    plt.xlim(0, sz)
    plt.ylim(-4, 4)
    plt.text(0.55, 0.85,'Cluster %d' % (yi + 1),
             transform=plt.gca().transAxes)
    if yi == 1:
        plt.title("DBA $k$-means")

# Soft-DTW-k-means
print("Soft-DTW k-means")
sdtw_km = TimeSeriesKMeans(n_clusters=3,
                           metric="softdtw",
                           metric_params={"gamma": .01},
                           verbose=True,
                           random_state=seed)
y_pred = sdtw_km.fit_predict(X_train)

for yi in range(3):
    plt.subplot(3, 3, 7 + yi)
    for xx in X_train[y_pred == yi]:
        plt.plot(xx.ravel(), "k-", alpha=.2)
    plt.plot(sdtw_km.cluster_centers_[yi].ravel(), "r-")
    plt.xlim(0, sz)
    plt.ylim(-4, 4)
    plt.text(0.55, 0.85,'Cluster %d' % (yi + 1),
             transform=plt.gca().transAxes)
    if yi == 1:
        plt.title("Soft-DTW $k$-means")

plt.tight_layout()
plt.show()