In [1115]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class TripletEmbeddingCriterion(nn.Module):
    def __init__(self, margin=0.5, gamma=2):
        super(TripletEmbeddingCriterion, self).__init__()
        self.margin = margin
        self.gamma = gamma

    def forward(self, anchor, positive, negative):
        N = anchor.size(0)
        
        delta_pos = anchor - positive
        delta_neg = anchor - negative

        norm_delta_pos = torch.norm(delta_pos, p=2, dim=1)
        norm_delta_neg = torch.norm(delta_neg, p=2, dim=1)

        norm_delta_pos = norm_delta_pos * norm_delta_pos * self.gamma
        norm_delta_neg = norm_delta_neg * norm_delta_neg

        delta_pos_neg = norm_delta_pos - norm_delta_neg + self.margin

        loss = F.relu(delta_pos_neg)
        return loss.mean()

    def backward(self, anchor, positive, negative):
        N = anchor.size(0)
        
        delta_pos = anchor - positive
        delta_neg = anchor - negative

        norm_delta_pos = torch.norm(delta_pos, p=2, dim=1)
        norm_delta_neg = torch.norm(delta_neg, p=2, dim=1)

        norm_delta_pos = norm_delta_pos * norm_delta_pos * self.gamma
        norm_delta_neg = norm_delta_neg * norm_delta_neg

        delta_pos_neg = norm_delta_pos - norm_delta_neg + self.margin

        mask = (delta_pos_neg > 0).float().view(-1, 1)

        grad_anchor = mask * (delta_neg - delta_pos * self.gamma) * (2 / N)
        grad_positive = mask * (delta_pos * self.gamma) * (-2 / N)
        grad_negative = mask * delta_neg * (2 / N)

        return grad_anchor, grad_positive, grad_negative




In [1116]:
import random
import numpy as np
import torch

def torch_fix_seed(seed=42):
    # Python random
    random.seed(seed)
    # Numpy
    np.random.seed(seed)
    # Pytorch
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    #torch.use_deterministic_algorithms = True


torch_fix_seed()

In [1117]:
import torch
import torch.nn.functional as F

class AggClustering:
    def __init__(self):
        self.K_c = 5

    def init(self, indices):
        # nsamples = indices.size(0)
        # visited = torch.full((nsamples, 1), -1, dtype=torch.int32)
        # count = 0
        # for i in range(nsamples):
        #     cur_idx = i
        #     pos = []
        #     while visited[cur_idx, 0] == -1:
        #         pos.append(cur_idx)
        #         neighbor = 0
        #         for k in range(indices.size(0)):
        #             neighbor = indices[cur_idx, k].item()
        #             if cur_idx != neighbor:
        #                 break
        #         visited[cur_idx, 0] = -2
        #         cur_idx = neighbor
        #         if len(pos) > 50:
        #             break
        #     if visited[cur_idx, 0] < 0:
        #         visited[cur_idx, 0] = count
        #         count += 1
        #     for p in pos:
        #         visited[p, 0] = visited[cur_idx, 0]
        # label_indice = [[] for _ in range(count)]
        # for i in range(nsamples):
        #     label_indice[visited[i, 0]].append(i)
        # return label_indice

        # Initialize labels for input data given KNN indices
        nsamples = indices.size(0)
        k = indices.size(1)
        visited = torch.full((nsamples, 1), -1, dtype=torch.int)
        count = 0
        
        for i in range(nsamples):
            cur_idx = i
            pos = []
            while visited[cur_idx][0] == -1:
                pos.append(cur_idx)
                neighbor = 0
                for k_idx in range(indices[cur_idx].size(0)):
                    neighbor = indices[cur_idx][k_idx].item()
                    if cur_idx != neighbor:
                        break
                visited[cur_idx][0] = -2
                cur_idx = neighbor
                if len(pos) > 50:
                    break
            
            if visited[cur_idx][0] < 0:
                visited[cur_idx][0] = count
                count += 1
            
            for j in pos:
                visited[j][0] = visited[cur_idx][0]
        
        label_indices = [[] for _ in range(count)]
        
        for i in range(nsamples):
            label_indices[visited[i][0]].append(i)
        
        for i in range(count):
            if len(label_indices[i]) == 0:
                print("error")
        
        return label_indices

    # def merge_two_clusters(self, W, A_s_t, A_us_t, Y_t, idx_c_a, idx_c_b):
    #     A_us_t[:, idx_c_a] += A_us_t[:, idx_c_b]
    #     nsamples_c_a = len(Y_t[idx_c_a])
    #     nsamples_c_b = len(Y_t[idx_c_b])
    #     ratio = nsamples_c_a / (nsamples_c_a + nsamples_c_b)
    #     A_us_t[idx_c_a, :] *= ratio
    #     A_us_t[idx_c_b, :] *= (1 - ratio)
    #     A_us_t[idx_c_a, :] += A_us_t[idx_c_b, :]
    #     A_us_t[idx_c_a, idx_c_a] = 0
    #     A_us_t[:, idx_c_b] = 0
    #     A_us_t[idx_c_b, :] = 0
    #     Y_t[idx_c_a].extend(Y_t[idx_c_b])
    #     Y_t[idx_c_b] = []
    #     for i in range(len(Y_t)):
    #         if len(Y_t[i]) == 0 or i == idx_c_a:
    #             A_s_t[i, idx_c_a] = 0
    #             A_s_t[idx_c_a, i] = 0
    #         elif i < idx_c_a:
    #             A_s_t[i, idx_c_a] = A_us_t[idx_c_a, i] / len(Y_t[idx_c_a])**2 + A_us_t[i, idx_c_a] / len(Y_t[i])**2
    #         elif i > idx_c_a:
    #             A_s_t[idx_c_a, i] = A_us_t[idx_c_a, i] / len(Y_t[idx_c_a])**2 + A_us_t[i, idx_c_a] / len(Y_t[i])**2
    #     return A_s_t, A_us_t, Y_t


    def merge_two_clusters(W, A_s_t, A_us_t, Y_t, idx_c_a, idx_c_b):
        nclusters = len(Y_t)

        idx_c_a_tensor = torch.tensor([idx_c_a], dtype=torch.long)
        idx_c_b_tensor = torch.tensor([idx_c_b], dtype=torch.long)

        A_us_t.index_add_(1, idx_c_a_tensor, A_us_t.index_select(1, idx_c_b_tensor))

        nsamples_c_a = len(Y_t[idx_c_a])
        nsamples_c_b = len(Y_t[idx_c_b])
        ratio = nsamples_c_a / (nsamples_c_a + nsamples_c_b)

        A_us_t[idx_c_a, :] *= ratio
        A_us_t[idx_c_b, :] *= (1 - ratio)
        A_us_t.index_add_(0, idx_c_a_tensor, A_us_t.index_select(0, idx_c_b_tensor))

        A_us_t[idx_c_a, idx_c_a] = 0
        A_us_t[:, idx_c_b] = 0
        A_us_t[idx_c_b, :] = 0

        Y_t[idx_c_a].extend(Y_t[idx_c_b])
        Y_t[idx_c_b] = []

        for i in range(nclusters):
            if len(Y_t[i]) == 0 or i == idx_c_a:
                A_s_t[i, idx_c_a] = 0
                A_s_t[idx_c_a, i] = 0
            elif i < idx_c_a:
                A_s_t[i, idx_c_a] = A_us_t[idx_c_a, i] / (len(Y_t[idx_c_a]) ** 2) + A_us_t[i, idx_c_a] / (len(Y_t[i]) ** 2)
            elif i > idx_c_a:
                A_s_t[idx_c_a, i] = A_us_t[idx_c_a, i] / (len(Y_t[idx_c_a]) ** 2) + A_us_t[i, idx_c_a] / (len(Y_t[i]) ** 2)

        return A_s_t, A_us_t, Y_t

    # def search_clusters(self, A_s_t):
    #     A_sorted, idx_sort = torch.sort(A_s_t, dim=1, descending=True)
    #     aff = torch.zeros(1, A_sorted.size(1))
    #     for i in range(A_sorted.size(1)):
    #         aff[0, i] = A_sorted[0, i]
    #         if A_sorted.size(1) > 100:
    #             for k in range(1, self.K_c):
    #                 aff[0, i] += (A_sorted[0, i] - A_sorted[k, i]) / (self.K_c - 1)
    #     v_c, idx_c = torch.max(aff, 1)
    #     idx_c_b = idx_c.item()
    #     idx_c_a = idx_sort[0, idx_c_b].item()
    #     if idx_c_a == idx_c_b:
    #         raise ValueError("Error: idx_c_a == idx_c_b")
    #     if idx_c_a > idx_c_b:
    #         idx_c_a, idx_c_b = idx_c_b, idx_c_a
    #     return idx_c_a, idx_c_b

    def search_clusters(self, A_s_t):
        # print("cluster numbers:", nclusters)
        nclusters = A_s_t.size(0)  # クラスタの数を取得
        A_sorted, idx_sort = torch.sort(A_s_t, dim=0, descending=True)
        # print("A_s_t: ", A_s_t.size())
        # print("nclusters: ", nclusters)
        # print("A_sorted: ", A_sorted)
        # print("idx_sort: ", idx_sort)
        aff = torch.zeros(1, A_sorted.size(1), dtype=torch.float32)

        for i in range(A_sorted.size(1)):
            aff[0, i] = A_sorted[0, i]
            if A_sorted.size(1) > 100:
                for k in range(1, self.K_c):
                    aff[0, i] += (A_sorted[0, i] - A_sorted[k, i]) / (self.K_c - 1)

        v_c, idx_c = torch.max(aff, dim=1)  # each row
        # print("idx_c: ", idx_c)
        # find corresponding cluster labels for two clusters
        idx_c_b = idx_c[0].item()         # col
        idx_c_a = idx_sort[0, idx_c_b].item()        # row

        # インデックスの検証と調整
        if idx_c_a >= nclusters or idx_c_b >= nclusters:
            raise ValueError(f"Error: idx_c_a ({idx_c_a}) or idx_c_b ({idx_c_b}) is out of range (nclusters: {nclusters})")
        if idx_c_a == idx_c_b:
            print("error")
            raise ValueError("idx_c_a and idx_c_b are the same")
        elif idx_c_a > idx_c_b:
            idx_c_a, idx_c_b = idx_c_b, idx_c_a

        return idx_c_a, idx_c_b

    # def search_clusters(self,A_s_t):
    #     # Sort the tensor along the first dimension in descending order
    #     A_sorted, Idx_sort = torch.sort(A_s_t, dim=0, descending=True)
        
    #     # Initialize affinity tensor
    #     aff = torch.zeros(1, A_sorted.size(1))
        
    #     for i in range(A_sorted.size(1)):
    #         aff[0, i] = A_sorted[0, i]
    #         if A_sorted.size(1) > 100:
    #             for k in range(1, self.K_c ):  # Adjusting index for Python's 0-based indexing
    #                 aff[0, i] += (A_sorted[0, i] - A_sorted[k, i]) / (self.K_c - 1)
        
    #     # Find the maximum value in the affinity tensor along the second dimension
    #     v_c, idx_c = torch.max(aff, dim=1)
        
    #     # Find corresponding cluster labels for two clusters
    #     idx_c_b = idx_c.item()  # Converting tensor to integer
    #     idx_c_a = Idx_sort[0, idx_c_b].item()  # Converting tensor to integer
        
    #     if idx_c_a == idx_c_b:
    #         print("error")
    #         raise ValueError("Cluster indices are equal, which indicates an error.")
    #     elif idx_c_a > idx_c_b:
    #         idx_c_a, idx_c_b = idx_c_b, idx_c_a  # Swap values
        
    #     return idx_c_a, idx_c_b


    def run_step(self, W, A_s_t, A_us_t, Y_t):
        nclusters = len(Y_t)
        # print("Cluster Num: ", nclusters)
        # print("numc",A_s_t.size(0))
        idx_c_a, idx_c_b = self.search_clusters(A_s_t)
        A_us_t[:, idx_c_a] += A_us_t[:, idx_c_b]
        Y_t[idx_c_a].extend(Y_t[idx_c_b])
        Y_t[idx_c_b] = []
        for i in range(len(Y_t)):
            if len(Y_t[i]) > 0 and i != idx_c_a:
                W_i = W[Y_t[i], :]
                W_i_idx_c_a = W_i[:, Y_t[idx_c_a]]
                W_idx_c_a = W[Y_t[idx_c_a], :]
                W_idx_c_a_i = W_idx_c_a[:, Y_t[i]]
                A_us_t[idx_c_a, i] = torch.sum(torch.mm(W_idx_c_a_i, W_i_idx_c_a))
        A_us_t[idx_c_a, idx_c_a] = 0
        A_us_t[:, idx_c_b] = 0
        A_us_t[idx_c_b, :] = 0
        for i in range(nclusters):
            if len(Y_t[i]) == 0 or i == idx_c_a:
                A_s_t[i, idx_c_a] = 0
                A_s_t[idx_c_a, i] = 0
            elif i < idx_c_a:
                A_s_t[i, idx_c_a] = A_us_t[idx_c_a, i] / len(Y_t[idx_c_a])**2 + A_us_t[i, idx_c_a] / len(Y_t[i])**2
            elif i > idx_c_a:
                A_s_t[idx_c_a, i] = A_us_t[idx_c_a, i] / len(Y_t[idx_c_a])**2 + A_us_t[i, idx_c_a] / len(Y_t[i])**2
        A_s_t[:, idx_c_b] = 0
        A_s_t[idx_c_b, :] = 0
        return A_s_t, A_us_t, Y_t

    # def run_step_fast(self,W, A_s_t, A_us_t, Y_t):
    #     # timer = torch.Timer()
    #     # get the number of clusters
    #     nclusters = len(Y_t)
    #     print("Cluster Num: ", nclusters)
    #     print("numc",A_s_t.size(0))
    #     # print("Cluster Num: ", nclusters)
    #     # find maximal value in A_t
    #     idx_c_a, idx_c_b = self.search_clusters(A_s_t)
    #     # update affinity matrix A_t
    #     # update A_t(idx_c_a->i) = A_t(idx_c_a->i) + A_t(idx_c_b->i)
    #     A_us_t.index_add_(1, torch.LongTensor([idx_c_a]), A_us_t.index_select(1, torch.LongTensor([idx_c_b])))

    #     # update A_t(i->idx_c_a) = r_a * A_t(i->idx_c_a) + r_b * A_t(i->idx_c_b) (fast algorithm)
    #     # nsamples in cluster idx_c_a
    #     A_us_t.index_add_(0, torch.LongTensor([idx_c_a]), A_us_t.index_select(0, torch.LongTensor([idx_c_b])))
        
    #     # update cluster labels Y_t   
    #     print("y_t: ", Y_t)
    #     print("idx_c_a: ", idx_c_a)
    #     print("idx_c_b: ", idx_c_b)
    #     Y_t[idx_c_a].extend(Y_t[idx_c_b])
    #     Y_t[idx_c_b] = []
        
    #     # update A_s_t   
    #     for i in range(nclusters):
    #         if len(Y_t[i]) == 0 or i == idx_c_a:
    #             A_s_t[i, idx_c_a] = 0
    #             A_s_t[idx_c_a, i] = 0
    #         elif i < idx_c_a:
    #             A_s_t[i, idx_c_a] =  A_us_t[idx_c_a, i] / (len(Y_t[idx_c_a]) ** 2) + A_us_t[i, idx_c_a] / (len(Y_t[i]) ** 2)
    #         elif i > idx_c_a:
    #             A_s_t[idx_c_a, i] =  A_us_t[idx_c_a, i] / (len(Y_t[idx_c_a]) ** 2) + A_us_t[i, idx_c_a] / (len(Y_t[i]) ** 2)

    #     # print(A_us_t.size())
    #     # print(nclusters)
    #     if idx_c_b != nclusters:
    #         # print(idx_c_b)
    #         # print(A_us_t.index_select(0, torch.LongTensor([1])))
    #         A_us_t.index_copy_(0, torch.LongTensor([idx_c_b]), A_us_t.index_select(0, torch.LongTensor([nclusters-1])))
    #         A_us_t.index_copy_(1, torch.LongTensor([idx_c_b]), A_us_t.index_select(1, torch.LongTensor([nclusters-1])))
    #         A_us_t[idx_c_b, idx_c_b] = 0

    #         # print("Pre: ", A_s_t[:idx_c_b+1, idx_c_b])
    #         # print("Pre: ", A_s_t[idx_c_b, idx_c_b:nclusters])
    #         A_s_t[:idx_c_b+1, idx_c_b] = A_s_t[:idx_c_b+1, nclusters-1]      
    #         A_s_t[idx_c_b, idx_c_b:nclusters] = A_s_t[idx_c_b:nclusters, nclusters-1].t()
    #         A_s_t[idx_c_b, idx_c_b] = 0
    #         # print("Cur: ", A_s_t[:idx_c_b+1, idx_c_b])
    #         # print("Cur: ", A_s_t[idx_c_b, idx_c_b:nclusters])

    #         Y_t[idx_c_b].extend(Y_t[nclusters-1])

    #     A_us_t = A_us_t[:nclusters-1, :nclusters-1]
    #     A_s_t = A_s_t[:nclusters-1, :nclusters-1]
    #     del Y_t[nclusters-1]   
    #     # print(Y_t)
    #     # timer = torch.Timer()
    #     # print('Time-2 elapsed: ' .. timer:time().real .. ' seconds')
    #     # return updated A_s_t, A_us_t and Y_t
    #     return A_s_t, A_us_t, Y_t

    def run_step_fast(self, W, A_s_t, A_us_t, Y_t):
        nclusters = len(Y_t)
        idx_c_a, idx_c_b = self.search_clusters(A_s_t)

        A_us_t.index_add_(1, torch.LongTensor([idx_c_a]), A_us_t.index_select(1, torch.LongTensor([idx_c_b])))
        A_us_t.index_add_(0, torch.LongTensor([idx_c_a]), A_us_t.index_select(0, torch.LongTensor([idx_c_b])))

        # print("y_t: ", Y_t)
        # print("idx_c_a: ", idx_c_a)
        # print("idx_c_b: ", idx_c_b)
        Y_t[idx_c_a].extend(Y_t[idx_c_b])
        Y_t[idx_c_b] = []

        for i in range(nclusters):
            if len(Y_t[i]) == 0 or i == idx_c_a:
                A_s_t[i, idx_c_a] = 0
                A_s_t[idx_c_a, i] = 0
            elif i < idx_c_a:
                A_s_t[i, idx_c_a] = A_us_t[idx_c_a, i] / (len(Y_t[idx_c_a]) ** 2) + A_us_t[i, idx_c_a] / (len(Y_t[i]) ** 2)
            elif i > idx_c_a:
                A_s_t[idx_c_a, i] = A_us_t[idx_c_a, i] / (len(Y_t[idx_c_a]) ** 2) + A_us_t[i, idx_c_a] / (len(Y_t[i]) ** 2)

        if idx_c_b != nclusters - 1:
            A_us_t.index_copy_(0, torch.LongTensor([idx_c_b]), A_us_t.index_select(0, torch.LongTensor([nclusters-1])))
            A_us_t.index_copy_(1, torch.LongTensor([idx_c_b]), A_us_t.index_select(1, torch.LongTensor([nclusters-1])))
            A_us_t[idx_c_b, idx_c_b] = 0

            A_s_t[:idx_c_b+1, idx_c_b] = A_s_t[:idx_c_b+1, nclusters-1].clone()
            A_s_t[idx_c_b, idx_c_b:nclusters] = A_s_t[idx_c_b:nclusters, nclusters-1].clone().t()
            A_s_t[idx_c_b, idx_c_b] = 0

            Y_t[idx_c_b].extend(Y_t[nclusters-1])

        # Update the size of A_s_t and A_us_t to match the new number of clusters
        A_us_t = A_us_t[:nclusters-1, :nclusters-1]
        A_s_t = A_s_t[:nclusters-1, :nclusters-1]
        del Y_t[nclusters-1]

        return A_s_t, A_us_t, Y_t





    def run(self, W, A_unsym_0, A_sym_0, Y_0, T, K_c_in, use_fast):
        nclusters = len(Y_0)
        A_sym_0_sum = torch.sum(A_sym_0, dim=1)
        self.K_c = K_c_in
        t = 0
        while t < T:
            if use_fast:
                A_sym_0, A_unsym_0, Y_0 = self.run_step_fast(W, A_sym_0, A_unsym_0, Y_0)
            else:
                A_sym_0, A_unsym_0, Y_0 = self.run_step(W, A_sym_0, A_unsym_0, Y_0)
            t += 1
        Y_T = [cluster for cluster in Y_0 if len(cluster) > 0]
        return Y_T

In [1118]:
import torch
from sklearn.neighbors import NearestNeighbors
import time

class Affinity:
    def compute(self, X, k):
        if X.size(0) > 50000:
            ind = torch.arange(1, X.size(0) + 1).long().split(10000)
            dists = torch.zeros(X.size(0), k + 1, dtype=X.dtype)
            indices = torch.zeros(X.size(0), k + 1, dtype=torch.int)

            for v in ind:
                nbrs = NearestNeighbors(n_neighbors=k+1, algorithm='auto').fit(X)
                dists_batch, indices_batch = nbrs.kneighbors(X[v - 1])
                dists[v - 1] = torch.tensor(dists_batch)
                indices[v - 1] = torch.tensor(indices_batch)
        else:
            nbrs = NearestNeighbors(n_neighbors=k+1, algorithm='auto').fit(X)
            dists, indices = nbrs.kneighbors(X)
            dists = torch.tensor(dists)
            indices = torch.tensor(indices)

        sigma_square = torch.mean(dists[:, 1:k+1])
        print("sigma:", torch.sqrt(sigma_square))

        nsamples = X.size(0)
        W = torch.zeros(nsamples, nsamples)

        for i in range(nsamples):
            for j in range(1, k + 1):
                nn_ind = indices[i][j]
                W[i][nn_ind] = torch.exp(-dists[i][j] / sigma_square)
        
        return dists, indices, W

    def compute4cluster(self, X, W, Y_0, k, k_target):
        nclusters = len(Y_0)
        dim = X.size(1)
        X_clusters = torch.zeros(nclusters, dim)

        for i in range(nclusters):
            X_clusters[i] = torch.mean(X[torch.LongTensor(Y_0[i])], dim=0)

        nbrs = NearestNeighbors(n_neighbors=k, algorithm='auto').fit(X_clusters)
        dists, indices = nbrs.kneighbors(X_clusters)
        dists = torch.tensor(dists)
        indices = torch.tensor(indices)

        NNs = torch.zeros(nclusters, nclusters)
        # print(indices.size())

        for i in range(nclusters):
            for j in range(1, indices.size(1)):
                nn_ind = indices[i][j]
                NNs[i][nn_ind] = 1

        max_number = max(len(y) for y in Y_0)
        Y_0_tensor = torch.zeros(nclusters, max_number)

        for i in range(nclusters):
            for j in range(len(Y_0[i])):
                Y_0_tensor[i][j] = Y_0[i][j]

        timer = time.time()
        A_unsym_0_c, A_sym_0_c = self.compute_CAff(W, NNs, Y_0_tensor)

        if k > 20 * k_target:
            A_unsym_0_c = A_unsym_0_c.double()
            A_sym_0_c = A_sym_0_c.double()

            A_unsym_0_c_sum_r = torch.sum(A_unsym_0_c, dim=1)
            A_unsym_0_c_sum_c = torch.sum(A_unsym_0_c, dim=0)

            for i in range(nclusters):
                if A_unsym_0_c_sum_r[i] == 0 and A_unsym_0_c_sum_c[i] == 0:
                    idx_a = i
                    idx_b = 0
                    for k in range(indices.size(1)):
                        if indices[i][k] != i:
                            idx_b = indices[i][k]
                            break

                    if idx_b > 0:
                        if idx_a > idx_b:
                            print("merge", idx_b, idx_a)
                            A_sym_0_c, A_unsym_0_c, Y_0 = self.merge_two_clusters(W, A_sym_0_c, A_unsym_0_c, Y_0, idx_b, idx_a)
                        else:
                            print("merge", idx_a, idx_b)
                            A_sym_0_c, A_unsym_0_c, Y_0 = self.merge_two_clusters(W, A_sym_0_c, A_unsym_0_c, Y_0, idx_a, idx_b)

                        A_unsym_0_c_sum_r = torch.sum(A_unsym_0_c, dim=1)
                        A_unsym_0_c_sum_c = torch.sum(A_unsym_0_c, dim=0)

        print('Time elapsed for computing cluster affinity:', time.time() - timer, 'seconds')
        # print("Y_0: ", Y_0)
        # print("A_unsym_0_c: ", A_unsym_0_c.size())
        # print("A_sym_0_c: ", A_sym_0_c.size())
        return A_unsym_0_c, A_sym_0_c, Y_0

    # def compute_CAff(self, W, NNs, Y_0_tensor):
    #     # Placeholder function to simulate compute_CAff. Actual implementation required.
    #     A_unsym_0_c = torch.rand(W.size())
    #     A_sym_0_c = torch.rand(W.size())
    #     return A_unsym_0_c, A_sym_0_c

    def compute_CAff(self,W, NNs, Y):
        nclusters = NNs.size(0)
        
        A_us = torch.zeros_like(NNs)
        A_s = torch.zeros_like(NNs)

        for i in range(nclusters):
            for j in range(i, nclusters):
                if NNs[i, j] == 0 and NNs[j, i] == 0:
                    A_us[j, i] = 0
                    A_us[i, j] = 0
                    A_s[j, i] = 0
                    A_s[i, j] = 0
                    continue

                if i == j:
                    A_us[j, i] = 0
                    A_s[j, i] = 0
                    continue

                # get the size of Y[i] and Y[j]
                Y_i_size = (Y[i] != 0).sum().item()
                Y_j_size = (Y[j] != 0).sum().item()

                # compute affinity from cluster i to cluster j
                A_c_i_j = 0
                for m in range(Y_i_size):
                    s_W_c_j_i = 0
                    s_W_c_i_j = 0
                    for n in range(Y_j_size):
                        s_W_c_j_i += W[Y[j, n].long() - 1, Y[i, m].long() - 1]
                        s_W_c_i_j += W[Y[i, m].long() - 1, Y[j, n].long() - 1]
                    A_c_i_j += s_W_c_j_i * s_W_c_i_j

                # compute affinity from cluster j to cluster i
                A_c_j_i = 0
                for m in range(Y_j_size):
                    s_W_c_j_i = 0
                    s_W_c_i_j = 0
                    for n in range(Y_i_size):
                        s_W_c_j_i += W[Y[j, m].long() - 1, Y[i, n].long() - 1]
                        s_W_c_i_j += W[Y[i, n].long() - 1, Y[j, m].long() - 1]
                    A_c_j_i += s_W_c_i_j * s_W_c_j_i

                A_us[j, i] = A_c_i_j
                A_us[i, j] = A_c_j_i
                A_s[i, j] = A_c_i_j / (Y_j_size ** 2) + A_c_j_i / (Y_i_size ** 2)
                A_s[j, i] = 0

        return A_us, A_s


    def merge_two_clusters(self, W, A_s_t, A_us_t, Y_t, idx_c_a, idx_c_b):
        nclusters = len(Y_t)

        A_us_t[:, idx_c_a] += A_us_t[:, idx_c_b]

        nsamples_c_a = len(Y_t[idx_c_a])
        nsamples_c_b = len(Y_t[idx_c_b])
        ratio = nsamples_c_a / (nsamples_c_a + nsamples_c_b)

        A_us_t[idx_c_a] *= ratio
        A_us_t[idx_c_b] *= 1 - ratio
        A_us_t[idx_c_a] += A_us_t[idx_c_b]
        A_us_t[idx_c_a, idx_c_a] = 0
        A_us_t[:, idx_c_b] = 0
        A_us_t[idx_c_b, :] = 0

        Y_t[idx_c_a].extend(Y_t[idx_c_b])
        Y_t[idx_c_b] = []

        for i in range(nclusters):
            if len(Y_t[i]) == 0 or i == idx_c_a:
                A_s_t[i, idx_c_a] = 0
                A_s_t[idx_c_a, i] = 0
            elif i < idx_c_a:
                A_s_t[i, idx_c_a] = A_us_t[idx_c_a, i] / (len(Y_t[idx_c_a]) ** 2) + A_us_t[i, idx_c_a] / (len(Y_t[i]) ** 2)
            elif i > idx_c_a:
                A_s_t[idx_c_a, i] = A_us_t[idx_c_a, i] / (len(Y_t[idx_c_a]) ** 2) + A_us_t[i, idx_c_a] / (len(Y_t[i]) ** 2)

        return A_s_t, A_us_t, Y_t

In [1119]:
class Evaluate:
    def NMI(self, labels_gt, labels_pre):
        N = sum(len(l) for l in labels_gt)
        # Compute entropy for labels_gt
        pr_gt = torch.zeros(len(labels_gt), 1)
        for i, label in enumerate(labels_gt):
            pr_gt[i] = len(label) / N
        pr_gt_log = torch.log(pr_gt)
        H_gt = -torch.sum(pr_gt * pr_gt_log)

        # Compute entropy for labels_pre
        pr_pre = torch.zeros(len(labels_pre), 1)
        for i, label in enumerate(labels_pre):
            pr_pre[i] = len(label) / N
        pr_pre_log = torch.log(pr_pre)
        H_pre = -torch.sum(pr_pre * pr_pre_log)

        # Compute mutual information
        # Build M_gt
        M_gt = torch.zeros(N, len(labels_gt))
        for i, label in enumerate(labels_gt):
            for j in label:
                if j < N:  # Ensure the index is within bounds
                    M_gt[j, i] = 1  # Keep it zero-based

        # Build M_pre
        M_pre = torch.zeros(N, len(labels_pre))
        for i, label in enumerate(labels_pre):
            for j in label:
                if j < N:  # Ensure the index is within bounds
                    M_pre[j, i] = 1  # Keep it zero-based

        pr_gp = torch.mm(M_gt.t(), M_pre) / N
        pr_gp_log = torch.log(pr_gp + 1e-10)
        H_gp = -torch.sum(pr_gp * pr_gp_log)

        # Compute mutual information
        MI = H_gt + H_pre - H_gp
        NMI = MI / torch.sqrt(H_gt * H_pre)

        return NMI.item()

In [1120]:
import torch
import torch.nn as nn
import torch.optim as optim
import h5py
import numpy as np
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset
import random
import os
import argparse

In [1121]:
# 直接スクリプト内で設定するオプション
class Options:
    dataset = 'custom'
    eta = 0.2
    epoch_rnn = 1
    batchSize = 10
    learningRate = 0.01
    weightDecay = 5e-5
    momentum = 0.9
    gamma_lr = 0.0001
    power_lr = 0.75
    num_nets = 1
    epoch_pp = 20
    epoch_max = 1000
    K_s = 5
    K_c = 5
    gamma_tr = 1
    margin_tr = 0.2
    num_nsampling = 5
    use_fast = 1
    updateCNN = 1
    centralize_input = 0
    centralize_feature = 0
    normalize = 1

opt = Options()


In [1137]:
import pandas as pd
from datautils import _get_time_features,load_forecast_csv
data, train_slice, valid_slice, test_slice, scaler, pred_lens, data1=load_forecast_csv("phone/phone_data_10",False)

data
(7352, 10)
Index([   0.0,    1.0,    2.0,    3.0,    4.0,    5.0,    6.0,    7.0,    8.0,
          9.0,
       ...
       7342.0, 7343.0, 7344.0, 7345.0, 7346.0, 7347.0, 7348.0, 7349.0, 7350.0,
       7351.0],
      dtype='float64', name='date', length=7352)
(7352, 10)
data
(7352, 10)
slice(None, 4411, None)
slice(4411, 5881, None)
slice(5881, None, None)
(1, 7352, 10)


  data = pd.read_csv(f'datasets/{name}.csv', index_col='date', parse_dates=True)


In [1138]:
from ts2vec import TS2Vec
ts2_model = TS2Vec(
    input_dims=data.shape[-1],
    length_dim=275,
    device="cpu",
    output_dims=320,
    input_total=1,
    max_train_length=300,
    #output_dims=10
)
ts2_model.load('phone_600_wind1.pth')

1
10


In [1139]:
all_repr = ts2_model.encode(
        data,
        causal=False,
        sliding_length=1,
        sliding_padding= 200,
        batch_size=256,
        #encoding_window='multiscale'
    )

a


In [1140]:
test_2d = all_repr.reshape(7352, 320)
test_2d=test_2d[:1000]

In [1142]:
X=torch.from_numpy(test_2d).float()

In [1158]:
# ファイルのパス
file_path = 'datasets/phone/y_train.txt'

# ファイルを読み込む
with open(file_path, 'r') as file:
    lines = file.readlines()

# データをリストに変換
data = []
for line in lines:
    # 行の空白を削除し、カンマで分割して浮動小数点数に変換
    numbers = [float(num) for num in line.strip().split(',')]
    data.append(numbers)

# リストをPyTorchのテンソルに変換
tensor_data = torch.tensor(data)[:1000]
tensor_data=tensor_data.reshape(1000)
y=tensor_data
#print(tensor_data)

In [1178]:
# # ベクトルデータの定義
# input_size = 320  # 入力ベクトルの次元
# X = torch.randn(50, input_size)  # 50個の320次元ベクトル
# y = torch.randint(0, 5, (50,))  # 0から4までのランダムなラベル
print(X.size())
print(y.size())
print("NaN in X:", torch.isnan(X).any())
print("NaN in y:", torch.isnan(y).any())

class CustomVectorDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

# データセットとデータローダーの作成
dataset = CustomVectorDataset(X, y)
data_loader = DataLoader(dataset, batch_size=opt.batchSize, shuffle=False)

torch.Size([1000, 320])
torch.Size([1000])
NaN in X: tensor(False)
NaN in y: tensor(False)


In [1179]:
# データのロード
def load_data(data_loader):
    data = []
    labels = []
    for vectors, lbls in data_loader:
        data.append(vectors)
        labels.append(lbls)
    data = torch.cat(data)
    labels = torch.cat(labels)
    return data, labels

train_data, train_labels = load_data(data_loader)

In [1180]:
# if opt.centralize_input == 1:
#     train_data -= train_data.mean(dim=0, keepdim=True)

test_data = train_data.clone()
test_labels = train_labels.clone()
print(test_labels)

tensor([5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5.,
        5., 5., 5., 5., 5., 5., 5., 5., 5., 4., 4., 4., 4., 4., 4., 4., 4., 4.,
        4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 6., 6., 6.,
        6., 6., 6., 6., 6., 6., 6., 6., 6., 6., 6., 6., 6., 6., 6., 6., 6., 6.,
        6., 6., 6., 6., 6., 6., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 3.,
        3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3., 3.,
        3., 3., 3., 3., 3., 3., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,
        2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 5., 5., 5., 5.,
        5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5., 5.,
        5., 5., 5., 5., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 4., 6., 6., 6.,
        6., 6., 6., 6., 6., 6., 6., 6., 

In [1162]:
# Initialize networks
def init_weights(m):
    if isinstance(m, nn.Linear):
        nn.init.normal_(m.weight, 0, 0.01)
        if m.bias is not None:
            nn.init.zeros_(m.bias)

affinity = Affinity()
evaluate = Evaluate()
agg_clustering = AggClustering()

In [1163]:
def load_model(input_size):
    model = nn.Sequential(
        nn.Linear(input_size, 128),
        nn.ReLU(),
        nn.Linear(128, 10)
    )
    return model

In [1221]:
# Initialize CNN models and variables
print('==> configuring model')
num_networks = opt.num_nets
network_table = []
optimizer_table = []
criterion_triplet = TripletEmbeddingCriterion(opt.margin_tr, opt.gamma_tr)

==> configuring model


In [1222]:
for _ in range(num_networks):
    model = load_model(input_size)
    model.apply(init_weights)
    network_table.append(model)
    optimizer = optim.SGD(model.parameters(), lr=opt.learningRate, weight_decay=opt.weightDecay, momentum=opt.momentum)
    optimizer_table.append(optimizer)

In [1223]:
label_gt_table_table = []
label_pre_table_table = []
label_pre_tensor_table = []
target_nclusters_table = []

In [1224]:
def cvt2TableLabels(labels):
    unique_labels = torch.unique(labels)
    label_table = {label.item(): [] for label in unique_labels}
    for idx, label in enumerate(labels):
        label_table[label.item()].append(idx)
    return list(label_table.values())

print(cvt2TableLabels(test_labels))
for _ in range(num_networks):
    label_gt_table_table.append(cvt2TableLabels(test_labels))
    label_pre_table_table.append([])
    label_pre_tensor_table.append([])
    target_nclusters_table.append(len(label_gt_table_table[-1]))
print(target_nclusters_table)


[[78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792, 793, 794, 795, 796, 920, 921, 922, 923, 924, 925, 926, 927, 928, 929, 930, 931, 932, 933, 934, 935, 936, 937, 938, 939, 940, 

In [1225]:

epoch_reset_labels = [0] * num_networks

def getnClusters(label_pre):
    nClusters = 0
    for cluster in label_pre:
        if len(cluster) > 0:
            nClusters += 1
    return nClusters

def update_labels(features, label_pre, target_clusters, iter):
    # print("compute affinity, ", features.size())
    d, ind, W = affinity.compute(features, opt.K_s)  # sigma_l not used here
    # sigma = sigma_l
    if iter == 0:
        print("initialize clusters...")
        # print("ind", ind.size())
        label_pre = agg_clustering.init(ind)
        # print("nclusters: ", getnClusters(label_pre))
        return label_pre

    print("nclusters: ", getnClusters(label_pre))
    A_us, A_s, label_pre = affinity.compute4cluster(features, W, label_pre, getnClusters(label_pre), target_clusters)
    # print("nclusters affinity_compute: ", getnClusters(label_pre))
    n_clusters = getnClusters(label_pre)
    # print("A_s", A_s.size())
    # print("new_n_clusters", n_clusters)
    #12
    print("run agglomerative clustering...")

    # Convert n_clusters to tensor
    n_clusters_tensor = torch.tensor(n_clusters, dtype=torch.float32)
    unfold_iter = torch.ceil(n_clusters_tensor * opt.eta).item()
    unfold_valid_iter = n_clusters - target_clusters
    iterations = min(unfold_iter, unfold_valid_iter)

    if iterations <= 0:
        print("nclusters:1Time: ", getnClusters(label_pre))
        return label_pre

    label_pre = agg_clustering.run(W, A_us, A_s, label_pre, iterations, opt.K_c, opt.use_fast)
    return label_pre

def extract_features(model, data):
    model.eval()
    with torch.no_grad():
        features = []
        for batch in DataLoader(CustomVectorDataset(data, torch.zeros(len(data))), batch_size=opt.batchSize, shuffle=False):
            inputs, _ = batch
            inputs = inputs
            outputs = model(inputs)
            features.append(outputs.cpu())
    return torch.cat(features)

def cvt2TensorLabels(labels):
    tensor_labels = torch.zeros(sum(len(l) for l in labels), dtype=torch.long)
    for cluster_id, cluster in enumerate(labels):
        tensor_labels[cluster] = cluster_id + 1
    return tensor_labels.unsqueeze(1)

def merge_labels(network_table, epoch_reset_labels, train_data):
    for i, model in enumerate(network_table):
        if epoch_reset_labels[i] == 0 or opt.updateCNN == 0:
            features = train_data
        else:
            features = extract_features(model, train_data)
        
        if opt.centralize_feature == 1:
            features -= features.mean(dim=0, keepdim=True)
        
        if opt.normalize == 1:
            features = nn.functional.normalize(features, p=2, dim=1)

        print("feature dims:", features.size())
        label_pre_table_table[i] = update_labels(features, label_pre_table_table[i], target_nclusters_table[i], epoch_reset_labels[i])
        epoch_reset_labels[i] += 1
        nclusters = len(label_pre_table_table[i])
        print("nclusters:", nclusters)
        label_pre_tensor_table[i] = cvt2TensorLabels(label_pre_table_table[i])

def merge_labels_final():
    for i, model in enumerate(network_table):
        features = extract_features(model, train_data)
        if opt.centralize_feature == 1:
            features -= features.mean(dim=0, keepdim=True)
        
        if opt.normalize == 1:
            features = nn.functional.normalize(features, p=2, dim=1)

        label_pre_table_table[i] = update_labels(features, label_pre_table_table[i], target_nclusters_table[i], epoch_reset_labels[i])
        epoch_reset_labels[i] += 1
        nclusters = len(label_pre_table_table[i])
        print("nclusters:", nclusters)
        label_pre_tensor_table[i] = cvt2TensorLabels(label_pre_table_table[i])

def organize_samples(X, y):
    num_s = X.size(0)
    y_table = cvt2TableLabels(y)
    nclusters = len(y_table)
    if nclusters == 1:
        return None, None
    num_neg_sampling = min(opt.num_nsampling, nclusters - 1)
    num_triplet = sum(len(cluster) * (len(cluster) - 1) * num_neg_sampling // 2 for cluster in y_table if len(cluster) > 1)
    if num_triplet == 0:
        return None, None

    A = torch.zeros(num_triplet, X.size(1), device=X.device)
    B = torch.zeros(num_triplet, X.size(1), device=X.device)
    C = torch.zeros(num_triplet, X.size(1), device=X.device)
    A_ind = torch.zeros(num_triplet, dtype=torch.long)
    B_ind = torch.zeros(num_triplet, dtype=torch.long)
    C_ind = torch.zeros(num_triplet, dtype=torch.long)
    id_triplet = 0

    for i, cluster in enumerate(y_table):
        if len(cluster) > 1:
            for m in range(len(cluster)):
                for n in range(m + 1, len(cluster)):
                    is_chosen = torch.zeros(num_s, dtype=torch.bool)
                    chosen_count = 0
                    while chosen_count < num_neg_sampling:
                        id_s = random.randint(0, num_s - 1)
                        if not is_chosen[id_s] and y[id_s] != y[cluster[m]]:
                            A_ind[id_triplet] = cluster[m]
                            B_ind[id_triplet] = cluster[n]
                            C_ind[id_triplet] = id_s
                            is_chosen[id_s] = True
                            chosen_count += 1
                            id_triplet += 1

    A.copy_(X[A_ind])
    B.copy_(X[B_ind])
    C.copy_(X[C_ind])
    return [A, B, C], [A_ind, B_ind, C_ind]

def cvt2df_do(df_do, df_dtriplets, triplets_ind):
    df_do.index_add_(0, triplets_ind[0], df_dtriplets[0])
    df_do.index_add_(0, triplets_ind[1], df_dtriplets[1])
    df_do.index_add_(0, triplets_ind[2], df_dtriplets[2])
    return df_do

# def update_CNN():
#     for model, optimizer in zip(network_table, optimizer_table):
#         model.train()
#     epoch = 1
#     print(f'==> online epoch # {epoch} [batchSize = {opt.batchSize}] [learningRate = {opt.learningRate}]')
#     indices = torch.randperm(len(train_data)).split(opt.batchSize)

#     for t, v in enumerate(indices, 1):
#         iter = epoch * len(indices) + t - 1
#         learning_rate = opt.learningRate * (1 + opt.gamma_lr * iter) ** -opt.power_lr
#         for param_group in optimizer.param_groups:
#             param_group['lr'] = learning_rate
        
#         inputs = train_data[v]
#         targets = label_pre_tensor_table[0][v].squeeze()  # Change this if you have multiple networks
        
#         for model, optimizer in zip(network_table, optimizer_table):
#             def closure():
#                 optimizer.zero_grad()
#                 outputs = model(inputs)
#                 triplets, triplets_ind = organize_samples(outputs, targets)
#                 loss = torch.tensor(0)
#                 if triplets:
#                     anchor, positive, negative = triplets
#                     loss = criterion_triplet(anchor, positive, negative)
#                     loss.backward()
#                 if t % 10 == 0:
#                     print("loss:", loss.item())
#                 return loss
            
#             optimizer.step(closure)
#     epoch += 1

def update_CNN():
    for model in network_table:
        model.train()
    
    global epoch
    epoch = epoch if 'epoch' in globals() else 1
    print(f'==> online epoch # {epoch} [batchSize = {opt.batchSize}] [learningRate = {opt.learningRate}]')
    
    indices = torch.randperm(len(train_data)).split(opt.batchSize)
    
    for t, v in enumerate(indices):
        iter = epoch * len(indices) + t
        learning_rate = opt.learningRate * (1 + opt.gamma_lr * iter) ** (-opt.power_lr)
        
        inputs = train_data[v]
        
        for i, (model, optimizer) in enumerate(zip(network_table, optimizer_table)):
            targets = label_pre_tensor_table[i][v]
            
            def closure():
                optimizer.zero_grad()
                outputs = model(inputs)
                
                triplets, triplets_ind = organize_samples(outputs, targets.float())
                #loss = torch.tensor(0.0)
                if triplets is not None:
                    anchor, positive, negative = triplets
                    loss = criterion_triplet(anchor, positive, negative)
                    loss.backward()
                
                    if t % 10 == 0:
                        print("loss:", loss.item())
                    return loss
                else:
                    return torch.tensor(0.0)
            
            for param_group in optimizer.param_groups:
                param_group['lr'] = learning_rate
            
            optimizer.step(closure)
    
    epoch += 1

def eval_perf():
    for model in network_table:
        model.eval()
    print('==> testing')
    for i, model in enumerate(network_table):
        nmi = Evaluate().NMI(label_gt_table_table[i], label_pre_table_table[i])
        print('NMI:', nmi)
        print(" ")

def is_allfinished():
    for label_pre, target_nclusters in zip(label_pre_table_table, target_nclusters_table):
        if len(label_pre) > target_nclusters:
            return False
    return True

In [1226]:
epoch_reset_labels = [0] * num_networks
optimState = {'learningRate': opt.learningRate}

In [1227]:
# Train multi-attribute discovery models
for _ in range(opt.epoch_rnn):
    for i in range(opt.epoch_max + 1):
        if i % opt.epoch_pp == 0:
            merge_labels(network_table, epoch_reset_labels, train_data)
            eval_perf()
            if is_allfinished():
                break
        if opt.updateCNN == 1:
            update_CNN()
    epoch_reset_labels = [0] * num_networks
    while True:
        merge_labels_final()
        eval_perf()
        if is_allfinished():
            break

feature dims: torch.Size([1000, 320])
sigma: tensor(0.6083, dtype=torch.float64)
initialize clusters...
nclusters: 220
==> testing
NMI: 0.544594407081604
 
==> online epoch # 321 [batchSize = 10] [learningRate = 0.01]
loss: 0.19998672604560852
loss: 0.1999921053647995
loss: 0.19999709725379944
==> online epoch # 322 [batchSize = 10] [learningRate = 0.01]
loss: 0.19997325539588928
==> online epoch # 323 [batchSize = 10] [learningRate = 0.01]
loss: 0.19999583065509796
loss: 0.1999949961900711
loss: 0.19999182224273682
loss: 0.1999925822019577
==> online epoch # 324 [batchSize = 10] [learningRate = 0.01]
loss: 0.19999924302101135
loss: 0.19998779892921448
loss: 0.19999729096889496
==> online epoch # 325 [batchSize = 10] [learningRate = 0.01]
==> online epoch # 326 [batchSize = 10] [learningRate = 0.01]
loss: 0.19999435544013977
loss: 0.199991375207901
loss: 0.1999967098236084
loss: 0.19999602437019348
==> online epoch # 327 [batchSize = 10] [learningRate = 0.01]
loss: 0.19999562203884125


In [1231]:
# 最終的なクラスタリング結果を取得
final_clusters = label_pre_table_table

# クラスタリング結果を表示
for cluster_id, cluster in enumerate(final_clusters):
    print(f"Cluster {cluster_id}: {cluster}")

Cluster 0: [[0, 177, 247, 350, 536, 773, 776, 768, 769, 771, 772, 774, 377, 998, 6, 358, 527, 609, 690, 694, 853, 992, 31, 180, 370, 700, 176, 995, 32, 67, 178, 246, 352, 357, 363, 369, 385, 693, 696, 996, 997, 179, 537, 689, 852, 353, 354, 355, 372, 553, 577, 15, 245, 381, 530, 533, 697, 712, 713, 849, 855, 367, 545, 718, 371, 541, 563, 858, 24, 34, 27, 380, 698, 889, 551, 701, 705, 869, 884, 887, 249, 434, 523, 564, 691, 778, 388, 539, 557, 558, 576, 526, 994, 183, 351, 368, 851, 25, 30, 73, 579, 703, 704, 714, 727, 730, 885, 62, 63, 74, 383, 384, 859, 863, 865, 72, 716, 71, 379, 386, 392, 724, 732, 864, 1, 7, 240, 528, 359, 854, 529, 695, 918, 2, 3, 241, 919, 360, 542, 856, 692, 848, 850, 993, 248, 349, 991, 4, 11, 75, 187, 204, 532, 182, 525, 565, 999, 396, 707, 876, 882, 20, 378, 397, 550, 723, 862, 877, 13, 181, 213, 524, 531, 775, 861, 47, 538, 741, 897, 191, 709, 356, 534, 546, 547, 568, 43, 48, 192, 742, 49, 586, 898, 190, 554, 373, 375, 562, 40, 428, 763, 195, 432, 752, 755, 

In [1236]:
#正解ラベルの表示
print(label_gt_table_table)

[[[78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792, 793, 794, 795, 796, 920, 921, 922, 923, 924, 925, 926, 927, 928, 929, 930, 931, 932, 933, 934, 935, 936, 937, 938, 939, 940,

In [1234]:
sorted_data = []
#print(final_clusters)
for sublist in final_clusters[0]:
    #print(sublist)
    if isinstance(sublist, list):
        sorted_data.append(sorted(sublist))
#print(sorted_data)

#予測結果の表示
for i, sublist in enumerate(sorted_data):
    print(f"Sorted list {i}: {sublist}")

Sorted list 0: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 174, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408

In [1210]:
print(target_nclusters_table)

[6]
