## Initial 

In [None]:
import numpy as np
import cvxpy as cp
import scipy
from scipy.io import loadmat
from scipy.sparse import diags
from scipy.sparse import coo_matrix
from scipy.sparse import csr_matrix
from scipy.sparse import csc_matrix
from scipy.sparse.linalg import eigsh
from scipy.linalg import expm
import torch
from torch.utils.data import Dataset
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import pairwise_distances
from sklearn.cluster import KMeans, SpectralClustering
from sklearn.metrics import adjusted_rand_score, normalized_mutual_info_score, accuracy_score
from sklearn.preprocessing import MinMaxScaler
import math
import time
import warnings

warnings.filterwarnings('ignore')
scaler = MinMaxScaler()
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device1 = torch.device('cpu')
torch.autograd.set_detect_anomaly(True)

# torch.manual_seed(1)
# torch.cuda.manual_seed_all(1)
# np.random.seed(1)


def error_point(prep, real):
    prep_0 = prep
    error_point = np.array([], dtype=int)
    prep_full_label = np.setdiff1d(np.unique(prep), np.array([-1]))
    real_full_label = np.setdiff1d(np.unique(real), np.array([-1]))
    nonnoise_index = np.intersect1d(
        np.where(prep != -1)[0],
        np.where(real != -1)[0])
    real = real[nonnoise_index]
    prep = prep[nonnoise_index]
    real_label = np.unique(real)
    prep_label = np.unique(prep)
    n = len(real_label)
    n_1 = len(prep_label)
    reallogic = (np.reshape(np.repeat(real, n), [len(real), n])
                 == real_label).T + 0
    preplogic = (np.reshape(np.repeat(prep, n_1), [len(prep), n_1])
                 == prep_label).T + 0
    interset_matrix = reallogic @ preplogic.T
    x = cp.Variable((n, n_1), integer=True)
    obj = cp.Minimize(-cp.sum(cp.multiply(interset_matrix, x)))
    con = [
        0 <= x, x <= 1,
        cp.sum(x, axis=0, keepdims=True) == 1,
        cp.sum(x, axis=1, keepdims=True) <= 1
    ]
    prob = cp.Problem(obj, con)
    prob.solve('GLPK_MI')
    index = np.array(np.where(x.value == 1))
    add_index = np.array([
        np.setdiff1d(real_full_label, real_label[index[0, :]]),
        np.setdiff1d(prep_full_label, prep_label[index[1, :]])
    ],
                         dtype=int)
    prep0 = np.setdiff1d(prep_full_label, prep_label[index[1, :]])
    index = np.concatenate((index, add_index), axis=1)

    related_index = []
    for i in range(n):
        if i < n_1:
            real_iter_index = np.where(real == np.unique(real)[index[0, i]])[0]
            prep_iter_index = np.where(prep == np.unique(prep)[index[1, i]])[0]
            pp_index = np.where(prep_0 == np.unique(prep)[index[1, i]])[0]
            related_index.append(pp_index)
        else:
            real_iter_index = np.where(real == np.unique(real)[index[0, i]])[0]
            prep_iter_index = np.where(prep == np.unique(prep0)[i - n_1])[0]
            pp_index = np.where(prep_0 == np.unique(prep0)[i - n_1])[0]
            related_index.append(pp_index)
        error_point_i = np.setdiff1d(real_iter_index, prep_iter_index)
        error_point = np.union1d(error_point, error_point_i)
    for i in range(n):
        prep_0[related_index[i]] = np.ones(len(
            related_index[i])) * np.unique(real)[index[0, i]]
    error_point = nonnoise_index[error_point]
    return error_point, prep_0


class load_data(Dataset):
    def __init__(self, dataset):
        data = loadmat(dataset)
        X = data['data'][0]
        self.y = data['truelabel'][0][0].reshape(-1)
        self.x = []
        if isinstance(X[0], csr_matrix) or isinstance(
                X[0], csc_matrix) or isinstance(X[0], coo_matrix):
            for i in range(len(X)):
                self.x.append(X[i].toarray())
        else:
            for i in range(len(X)):
                self.x.append(X[i])

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        return torch.from_numpy(np.array(self.x[idx])), torch.from_numpy(
            np.array(self.y[idx])), torch.from_numpy(np.array(idx))


def torch_intersect1d(t1: torch.Tensor, t2: torch.Tensor):
    # NOTE: requires t1, t2 to be unique 1D Tensor in advance.
    # Method: based on unique's count
    num_t1, num_t2 = t1.numel(), t2.numel()
    u, inv, cnt = torch.unique(torch.cat([t1, t2]),
                               return_counts=True,
                               return_inverse=True)

    cnt_12 = cnt[inv]
    cnt_t1, cnt_t2 = cnt_12[:num_t1], cnt_12[num_t1:]
    m_t1 = (cnt_t1 == 2)
    inds_t1 = m_t1.nonzero()[..., 0]
    inds_t1_exclusive = (~m_t1).nonzero()[..., 0]
    inds_t2_exclusive = (cnt_t2 == 1).nonzero()[..., 0]

    intersection = t1[inds_t1]
    t1_exclusive = t1[inds_t1_exclusive]
    t2_exclusive = t2[inds_t2_exclusive]
    return intersection, t1_exclusive, t2_exclusive


def NormalizeData(data):
    data = data.T
    if np.sum(np.sum(data**2, axis=0) < 0) > 0:
        print((data**2)[np.where(np.sum(data**2, axis=0) < 0)[0], :])
        print(np.where(np.sum(np.abs(data), axis=0) < 0)[0])
        mm = np.maximum(np.sum(np.abs(data), axis=0), 10**-14)

        data = data * diags(mm**-1, 0)
    else:
        mm = np.maximum(np.sum(data**2, axis=0), 10**-14)
        data = data * diags(mm**-0.5, 0)
    return data.T


def comnFun(K, sigma):
    nSmp = K[0].shape[0]
    view_num = len(K)
    KC = np.zeros([nSmp, nSmp])
    for i in range(view_num):
        KC = KC + sigma[i] * K[i]
    return KC


def kcenter(K):
    n = K.shape[0]
    D = np.sum(K, axis=0) / n
    E = np.sum(D) / n
    J = D.reshape([n, 1])
    K = K - J - J.T + E * np.ones([n, n])
    K = 0.5 * (K + K.T)
    return K


def kernel_regularization(K):
    G = K + K.T / 2
    G = G.detach().cpu().numpy()
    D, V = np.linalg.eig(G)
    D = D * (D > 10**-14)
    G = (V * diags(D, 0)) @ V.T
    G = (G + G.T) / 2
    return torch.tensor(G).to(device)


def generateNeighborhood(X, k):
    X = np.copy(X)
    knn = []
    min_X = np.min(X)
    for i in range(k + 1):
        index = np.argmax(X, axis=1)
        knn.append(index)
        # 将近邻元素设置为比对角元素大的值，以便寻找反向k近邻
        X[np.arange(X.shape[0]), index] = np.array([min_X - 1] * X.shape[0])
    return X < min_X, np.array(knn, dtype=np.int32).T


def adapted_neighbor(X, min_k, k):
    X = np.copy(X)
    min_X = np.min(X)
    for knn_k in range(int(0.5 * X.shape[0])):
        index = np.argmax(X, axis=1)
        X[np.arange(X.shape[0]), index] = np.array([min_X - 1] * X.shape[0])
        logic_nn = X < min_X
        logic_nn = logic_nn * logic_nn.T
        if np.mean(np.sum(logic_nn, axis=1)) > min_k:
            print((np.sum(np.sum(logic_nn, axis=1)) / X.shape[0]), knn_k)
            return X < min_X
        if knn_k >= max(int(.5 * X.shape[0]), 5):
            print((np.sum(np.sum(logic_nn, axis=1)) / X.shape[0]), knn_k,
                  max(int(.5 * X.shape[0]), 5))
            return X < min_X
    return X < min_X


def knn_mean_X(X, knn):
    knn_X = X[knn]
    return np.sum(knn_X, axis=1) / np.maximum(np.sum(knn_X > 0, axis=1),
                                              np.ones(X[0].shape))


def compute_init_loss(K, view_K, X_bar, model):
    loss0 = 0
    loss1 = 0
    loss_param = 0
    criterion = nn.MSELoss()
    for i in range(len(view_K)):
        for param in model[i].parameters():
            if param is model[i].kernel_weights:
                continue
            loss_param += torch.norm(param, p=1) / max(param.size())
        loss0 = loss0 + criterion(view_K[i].view(-1), K[i].view(-1))
        loss1 = loss1 + torch.sum(
            torch.log1p(X_bar[i]) + torch.log1p(1 - X_bar[i]))
    return [loss0, loss1, loss_param]


def kernelkmeans(K, n_clusters):
    _, H = eigsh(K, k=n_clusters, which='LA')
    return H


# the structure of neural network
class connectivity(nn.Module):
    def __init__(self, options):
        super(connectivity, self).__init__()
        self.options = options
        self.activ = torch.nn.Sigmoid()
        self.activ1 = torch.nn.ReLU()
        self.activ2 = torch.nn.Tanh()
        self.matrix = torch.nn.Parameter(1 / (options.nSmp)**.5 * torch.zeros(
            [options.nSmp, options.n_clusters], dtype=torch.float32))
        self.layerencode = nn.ModuleList([
            nn.Linear(options.layer_width_c[i],
                      options.layer_width_c[i + 1],
                      bias=False,
                      dtype=torch.float32)
            for i in range(len(options.layer_width_c) - 1)
        ])

        for i in range(len(options.layer_width_c) - 1):
            nn.init.kaiming_normal_(self.layerencode[i].weight,
                                    nonlinearity='relu')

    def forward(self, knn_list):
        X = []
        for i in range(self.options.view_num):
            X.append(knn_list[i])
            for j in range(len(self.layerencode)):
                if j < len(self.layerencode) - 1:
                    X[i] = self.activ1(X[i] @ self.layerencode[j].weight.T)
                else:
                    X[i] = X[i] @ self.layerencode[j].weight.T

        return X


def init_Kernel_train(y, view_K, options):
    knn_list = []
    optimizers = {}
    init_maxiter = options.maxIter

    for i in range(options.view_num):
        knn_list.append(
            torch.tensor(view_K[i], dtype=torch.float32, device=device))
    connect_kernel = connectivity(options).to(device)
    # initial optimizer ------------------------------------------------------------------------------------------------------------
    optimizers["optimizer_connectivity"] = optim.Adam(
        [i.weight for i in connect_kernel.layerencode], lr=1e-3)
    sv = [0.5] * (options.n_clusters)
    singular_value = [
        torch.tensor(sv,
                     dtype=torch.float32,
                     device=device,
                     requires_grad=True) for i in range(options.view_num)
    ]
    optimizers["optimizer_singular"] = optim.Adam(singular_value, lr=1e-3)

    best_label = np.zeros([3, options.nSmp], dtype=np.int32)
    best_accurancy = [0] * 3
    knn_temp_list = []
    knn_temp_max_list = []
    knn_temp_min_list = []

    # normalize K --------------------------------------------------------------------------------------------------------------------
    for i in range(options.view_num):
        logic_knn = torch.tensor(1. * options.knn[i],
                                 dtype=torch.float32,
                                 device=device)
        knn_temp_list.append(knn_list[i] * (1. *
                                            (logic_knn + logic_knn.T) > 0))

        knn_temp_min_list.append(knn_list[i] * (1. *
                                                (logic_knn * logic_knn.T) > 0))

        del logic_knn
        torch.cuda.empty_cache()
        D = ((torch.maximum(
            torch.abs(torch.sum(knn_temp_min_list[i], dim=1)),
            torch.ones(knn_temp_min_list[i].shape[0],
                       dtype=torch.float32).to(device) *
            1e-15)**-1)**.5).unsqueeze(1)
        knn_temp_min_list[i] = knn_temp_min_list[i] * D * D.T

        D = ((torch.maximum(
            torch.abs(torch.sum(knn_temp_list[i], dim=1)),
            torch.ones(knn_temp_list[i].shape[0],
                       dtype=torch.float32).to(device) *
            1e-15)**-1)**.5).unsqueeze(1)
        knn_temp_list[i] = knn_temp_list[i] * D * D.T

        knn_temp_max_list.append(knn_temp_list[i] @ knn_temp_list[i])

    del D, knn_list
    torch.cuda.empty_cache()
    if options.alpha == 0:
        del knn_temp_min_list
        torch.cuda.empty_cache()
    if options.beta == 0:
        del knn_temp_max_list
        torch.cuda.empty_cache()

    old_loss = 1e15
    old_rank = 0
    old_inter_loss = -1e15
    object_function = np.zeros(options.maxIter - options.init_time)
    for epoch in range(init_maxiter):
        loss_init = 0
        loss_list = []
        reg_loss = 0
        # compute $\Phi$ ------------------------------------------------------------------------------------------
        X_view = connect_kernel(knn_temp_list)

        for j in range(len(options.layer_width_c) - 1):
            reg_loss += 0.5 * torch.norm(
                connect_kernel.layerencode[j].weight)**2

        orx_loss = []

        connected_matrix = (connect_kernel.matrix @ connect_kernel.matrix.T)

        view_loss = []
        connect_loss = []
        view_loss_temp = []
        view_loss_temp_1 = []

        # initial grad of $\Phi$ -----------------------------------------------------------------------------------
        X_view_orth_grad = torch.zeros_like(X_view[0],
                                            device=device,
                                            dtype=torch.float32)
        X_view_grad = torch.zeros_like(X_view[0],
                                       device=device,
                                       dtype=torch.float32)

        for i in range(options.view_num):
            connect_view_k = ((X_view[i] * singular_value[i]) @ X_view[i].T)

            X_view_orth_grad = 4 * (X_view[i] @ X_view[i].T @ X_view[i] -
                                    X_view[i])

            # compute $L_{connect}$ --------------------------------------------------------------------------------
            view_loss_temp.append(
                1 / (1 + options.alpha + options.beta) *
                torch.trace(X_view[i].T @ (X_view[i] * singular_value[i])
                            @ X_view[i].T @ (X_view[i] * singular_value[i]) -
                            2 * X_view[i].T @ knn_temp_list[i] @ (
                                X_view[i] * singular_value[i])))

            X_view_grad = 2 / (1 + options.alpha + options.beta) * (
                2 * connect_view_k - knn_temp_list[i] -
                knn_temp_list[i].T) @ (X_view[i] * singular_value[i])

            if options.alpha > 0:
                # compute $L_{connect}$ ----------------------------------------------------------------------------
                view_loss_temp_1.append(
                    options.alpha / (1 + options.alpha + options.beta) *
                    torch.trace(
                        X_view[i].T @ (X_view[i] * singular_value[i]**.5)
                        @ X_view[i].T @ (X_view[i] * singular_value[i]**.5) -
                        2 * X_view[i].T @ knn_temp_min_list[i] @ (
                            X_view[i] * singular_value[i]**.5)))

                X_view_grad += 2 * options.alpha / (
                    1 + options.alpha + options.beta) * (
                        2 * (X_view[i] * singular_value[i]**.5) @ X_view[i].T -
                        knn_temp_min_list[i] - knn_temp_min_list[i].T) @ (
                            X_view[i] * singular_value[i]**.5)
            if options.beta > 0:
                # compute $L_{connect}$ ------------------------------------------------------------------------------
                view_loss_temp_1.append(
                    options.alpha / (1 + options.alpha + options.beta) *
                    torch.trace(
                        X_view[i].T @ (X_view[i] * singular_value[i]**2)
                        @ X_view[i].T @ (X_view[i] * singular_value[i]**2) -
                        2 * X_view[i].T @ knn_temp_max_list[i] @ (
                            X_view[i] * singular_value[i]**2)))

                X_view_grad += 2 * options.beta / (
                    1 + options.alpha + options.beta) * (
                        2 * (X_view[i] * singular_value[i]**2) @ X_view[i].T -
                        knn_temp_max_list[i] -
                        knn_temp_max_list[i].T) @ (X_view[i] *
                                                   (singular_value[i]**2))
            # LCP modal ------------------------------------------------------------------------------------------------
            if (epoch > options.init_time):
                parents_index = (connect_view_k).detach().clone()

                diag = ((torch.maximum(
                    torch.diag(parents_index).detach(),
                    torch.ones(
                        options.nSmp, device=device, dtype=torch.float32) *
                    1e-15))).detach().unsqueeze(1)

                connectedness_matrix = parents_index / diag.T
                del diag
                torch.cuda.empty_cache()
                connectedness = torch.sum(connectedness_matrix, dim=1)
                connectedness = (torch.maximum(
                    torch.ones(
                        options.nSmp, device=device, dtype=torch.float32) *
                    torch.min(connectedness[connectedness > 0]),
                    torch.sum(connectedness_matrix, dim=1))).unsqueeze(1)

                neighbor_peak = torch.tensor(
                    1, device=device,
                    dtype=torch.float32) * (connectedness.repeat(
                        1, options.nSmp) < connectedness.T)

                torch.cuda.empty_cache()

                neighbor_peak = neighbor_peak * (
                    (connectedness_matrix) > torch.sort(
                        torch.max(neighbor_peak * connectedness_matrix,
                                  dim=1).values).values[options.peak_num - 1])

                neighbor_peak_index = torch.where(
                    torch.sum(neighbor_peak, dim=1) == 0)[0]
                neighbor_peak[neighbor_peak_index,
                              neighbor_peak_index] = torch.ones(
                                  len(neighbor_peak_index),
                                  device=device,
                                  dtype=torch.float32)

                parents_index = neighbor_peak * parents_index
                del neighbor_peak, connectedness_matrix, neighbor_peak_index

                torch.cuda.empty_cache()

                D = ((torch.maximum(
                    torch.norm(parents_index, dim=1, p=1),
                    torch.ones(
                        options.nSmp, device=device, dtype=torch.float32) *
                    1e-15))).detach().unsqueeze(1)

                parents_index = (parents_index / D)

                del connectedness
                # compute $L_{LCP}$ ----------------------------------------------------------------------------------
                connect_loss.append(
                    torch.sum(
                        torch.norm(parents_index @ X_view[i].detach() -
                                   X_view[i],
                                   dim=1)**2))

                X_view_grad += 1 * 2 * options.gamma * (
                    X_view[i] + (parents_index.T @ parents_index -
                                 parents_index - parents_index.T) @ X_view[i])
                del parents_index
                # compute $L_{fusion}$ ----------------------------------------------------------------------------------
                connect_loss.append(
                    options.con * (1 + options.gamma) / options.gamma *
                    torch.norm(X_view[i] * singular_value[i] -
                               connect_kernel.matrix.detach())**2)

                X_view_grad += options.con * 2. * (
                    1 + options.gamma) * (X_view[i] * singular_value[i] -
                                          connect_kernel.matrix).detach()

                if torch.trace(connect_view_k) > options.n_clusters:
                    connect_loss.append((torch.trace(
                        X_view[i].T @ (X_view[i] * singular_value[i])) -
                                         options.n_clusters))
                    X_view_grad += 2 * (options.gamma) * (X_view[i] *
                                                          singular_value[i])
                elif torch.trace(connect_view_k) < options.n_clusters:
                    connect_loss.append((options.n_clusters - torch.trace(
                        X_view[i].T @ (X_view[i] * singular_value[i]))))
                    X_view_grad -= 2 * (options.gamma) * (X_view[i] *
                                                          singular_value[i])

                # compute step size $\eta$ -------------------------------------------------------------------------------------
                norm_1 = torch.norm(X_view_grad, dim=0).detach().clone()
                norm_2 = torch.norm(X_view_orth_grad, dim=0).detach().clone()
                cos_value = (torch.diag(X_view_orth_grad.T @ X_view_grad) /
                             norm_1 / norm_2).detach()
                eta = 1. * (cos_value > options.eta) * options.eta * (norm_1 /
                                                                      norm_2)
                eta -= 1. * (cos_value <
                             -options.eta) * options.eta * (norm_1 / norm_2)
                eta += 1. * (torch.abs(cos_value) <=
                             options.eta) * cos_value * (norm_1 / norm_2)
                orx_loss.append(
                    torch.trace(X_view[i].T @ X_view[i] @ X_view[i].T
                                @ X_view[i] @ torch.diag(eta) -
                                2 * X_view[i].T @ X_view[i] @ torch.diag(eta)))

        del X_view_grad, X_view_orth_grad
        torch.cuda.empty_cache()
        view_loss.append(torch.stack(view_loss_temp))
        if len(view_loss_temp_1) > 0:
            view_loss.append(torch.stack(view_loss_temp_1))

        del view_loss_temp, view_loss_temp_1
        torch.cuda.empty_cache()
        if len(connect_loss) > 0:

            orx_loss = torch.stack(orx_loss)
            view_loss = torch.stack(view_loss)
            connect_loss = torch.stack(connect_loss)
            if torch.isnan(torch.sum(view_loss) + torch.sum(connect_loss)):
                print(view_loss, connect_loss)
                break
            loss_init = torch.sum(orx_loss) + torch.sum(
                view_loss) + options.gamma * torch.sum(connect_loss)

            loss_list.append([
                orx_loss,
                view_loss,
                connect_loss,
            ])

            if (((torch.sum(orx_loss) + options.n_clusters * options.view_num *
                  options.eta) < 1e-5 * options.view_num * options.n_clusters)
                    and
                (abs(old_loss.item() - loss_init.item()) < abs(
                    loss_init.item()) * 1e-5)) or epoch == options.maxIter - 1:
                print("converage")
                H = connect_kernel.matrix.detach().clone()

                H_normalized = ((H / (torch.maximum(
                    torch.norm(H, dim=1),
                    torch.ones(
                        options.nSmp, device=device, dtype=torch.float32) *
                    1e-15)).unsqueeze(1))).detach().to(
                        torch.float32).cpu().numpy()
                kmeans_model = KMeans(n_clusters=options.n_clusters,
                                      n_init='auto')

                repeat = 50
                best_inertia = np.zeros([3, repeat])
                for rep in range(repeat):
                    kmeans = kmeans_model.fit(H_normalized)
                    try:
                        y_prep = error_point(kmeans.labels_, y)[1]
                    except Exception as e:
                        y_prep = kmeans.labels_
                    y_prep = y_prep.astype('int')
                    ari = adjusted_rand_score(y, y_prep)
                    nmi = normalized_mutual_info_score(y, y_prep)
                    acc = accuracy_score(y, y_prep)
                    if ari > best_accurancy[0]:
                        best_label[0, :] = y_prep
                    if nmi > best_accurancy[1]:
                        best_label[1, :] = y_prep
                    if acc > best_accurancy[2]:
                        best_label[2, :] = y_prep
                    best_inertia[:, rep] = np.array([ari, nmi, acc])

                best_inertia = (np.max(best_inertia, axis=1)).tolist()
                best_accurancy = np.maximum(best_inertia,
                                            best_accurancy).tolist()

                return best_accurancy, best_label, best_inertia, np.array(
                    object_function)
        else:
            view_loss = torch.stack(view_loss)
            loss_init = torch.sum(view_loss)
            loss_list.append([orx_loss, view_loss])

            connect_kernel.matrix = torch.nn.Parameter(
                sum([(X_view[i] * singular_value[i]).detach().clone()
                     for i in range(options.view_num)]) / options.view_num)
            optimizers["optimizer_matrix"] = optim.SGD([connect_kernel.matrix],
                                                       lr=1e-2)
        if torch.isnan(loss_init):
            print(loss_list)
            print((torch.diag(connect_view_k + connected_matrix)**0.5
                   ).unsqueeze(1).detach().clone(), )
            print("error")
            break

        (loss_init + options.regular * reg_loss).backward()
        if len(connect_loss) > 0:
            object_function[epoch - options.init_time] = loss_init.item(
            ) + options.view_num * options.gamma * torch.trace(
                connected_matrix @ connected_matrix).item()
            # update $\Phi^\star$ --------------------------------------------------------------------------------
            connect_kernel.matrix.grad = torch.zeros(
                [options.nSmp, options.n_clusters],
                device=device,
                dtype=torch.float32)
            for i in range(options.view_num):
                connect_kernel.matrix.grad += 2. * (1 + options.gamma) * (
                    connect_kernel.matrix -
                    X_view[i] * singular_value[i]).detach().clone()
            orx_direction = 2. * (1 + options.gamma) * (
                connect_kernel.matrix @ connect_kernel.matrix.T
                @ connect_kernel.matrix -
                connect_kernel.matrix).detach().clone()
            norm_1 = torch.norm(connect_kernel.matrix.grad,
                                dim=0).detach().clone()
            norm_2 = torch.norm(orx_direction, dim=0).detach().clone()
            cos_value = torch.diag(
                connect_kernel.matrix.grad.T @ orx_direction) / norm_1 / norm_2
            eta = 1. * (cos_value > options.eta) * options.eta * (norm_1 /
                                                                  norm_2)
            eta -= 1. * (cos_value < -options.eta) * options.eta * (norm_1 /
                                                                    norm_2)
            eta += 1. * (torch.abs(cos_value) <=
                         options.eta) * cos_value * (norm_1 / norm_2)
            connect_kernel.matrix.grad += orx_direction * eta
            
        old_loss = loss_init
        if (epoch) % 25 == 0:

            new_rank = torch.trace(connected_matrix).item() / torch.mean(
                torch.stack(singular_value)).item()

        if ((epoch) % 25 == 0) and epoch != 0:
            if new_rank > 0.9 * old_rank:

                H = connect_kernel.matrix.detach().clone()

                H_normalized = ((H / (torch.maximum(
                    torch.norm(H, dim=1),
                    torch.ones(
                        options.nSmp, device=device, dtype=torch.float32) *
                    1e-15)).unsqueeze(1))).detach().to(
                        torch.float32).cpu().numpy()
                del H
                kmeans_model = KMeans(n_clusters=options.n_clusters,
                                      n_init='auto')

                repeat = 50
                best_inertia = np.zeros([3, repeat])
                for rep in range(repeat):
                    kmeans = kmeans_model.fit(H_normalized)
                    try:
                        y_prep = error_point(kmeans.labels_, y)[1]
                    except Exception as e:
                        y_prep = kmeans.labels_
                    y_prep = y_prep.astype('int')
                    ari = adjusted_rand_score(y, y_prep)
                    nmi = normalized_mutual_info_score(y, y_prep)
                    acc = accuracy_score(y, y_prep)
                    if ari > best_accurancy[0]:
                        best_label[0, :] = y_prep
                    if nmi > best_accurancy[1]:
                        best_label[1, :] = y_prep
                    if acc > best_accurancy[2]:
                        best_label[2, :] = y_prep
                    best_inertia[:, rep] = np.array([ari, nmi, acc])

                best_inertia = (np.max(best_inertia, axis=1)).tolist()
                best_accurancy = np.maximum(best_inertia,
                                            best_accurancy).tolist()


            elif new_rank < 0.9 * old_rank and old_loss > old_inter_loss:
                return best_accurancy, best_label, best_inertia, np.array(
                    object_function)
            old_rank = min(new_rank, options.n_clusters)

        # update $\Phi$ ------------------------------------------------------------------------------------
        for optimizer_key, optimizer_value in zip(optimizers.keys(),
                                                  optimizers.values()):
            optimizer_value.step()
            optimizer_value.zero_grad()
        connect_kernel.matrix.grad = None
        torch.cuda.empty_cache()
    return best_accurancy, best_label, best_inertia, np.array(object_function)

## Load data

In [None]:
import sys
import os


class HiddenPrints:
    def __enter__(self):
        self._original_stdout = sys.stdout
        sys.stdout = open(os.devnull, 'w')

    def __exit__(self, exc_type, exc_val, exc_tb):
        sys.stdout.close()
        sys.stdout = self._original_stdout

# Initial
datasets_path = "./datasets"
file_name = "YALE.mat"
data = load_data(datasets_path + '/' + file_name)
torch.cuda.empty_cache()
EmptyStruct = type('EmptyStruct', (), {})
options = EmptyStruct()
n_clusters = len(np.unique(data.y))
for i in range(n_clusters):
    print(
        np.unique(data.y)[i], len(np.where(data.y == np.unique(data.y)[i])[0]))

options.n_clusters = n_clusters
view_num = len(data.x)

options.view_num = view_num
nSmp = data.x[0].shape[0]
options.nSmp = nSmp
options.layer_width_c = [
    nSmp,
    max([min([int(nSmp**0.5), int(nSmp / n_clusters)]), n_clusters]),
    n_clusters
]
options.init_time = 300
options.maxIter = 1000
Sigma = np.ones(view_num) / view_num
options.threshold = 0.45
options.regular = 1e-3
options.con = 1e0
options.peak_num = n_clusters * 2
print(view_num, data.x[0].shape)

options.knn = []
options.min_k = 3

# normalize kernel matrix
K = []
normal_X = []
for i in range(view_num):
    if file_name[-5] == 'K':
        TempK = data.x[i]
        TempK = kcenter(TempK)
        TempK = TempK / (np.diag(TempK)**0.5).reshape([options.nSmp, 1]) / (
            np.diag(TempK)**0.5).reshape([options.nSmp, 1]).T
        TempD = np.abs(np.sum(TempK, axis=1, keepdims=True))**-.5
        V, D = eigsh(TempK * TempD * TempD.T, k=options.n_clusters, which='LA')
        print(i, V)
        K.append(TempK / V[-1])
        options.knn.append(
            adapted_neighbor(TempK / V[-1], options.min_k, options.n_clusters))
        del TempK
    elif data.x[i].shape[1] == data.x[i].shape[0]:
        print('distance_data')
        TempK = data.x[i]
        t = np.mean(np.mean(TempK))
        TempK = np.exp(-TempK**2 / (2 * t**2))
        K.append(TempK)
        options.knn.append(
            adapted_neighbor(TempK, options.min_k, options.n_clusters))
        del TempK
    else:
        temp_x = NormalizeData(data.x[i])
        TempK = pairwise_distances(temp_x, metric='euclidean')
        # logic_knn, knn = generateNeighborhood(-TempK, knn_k)
        t = np.mean(np.mean(TempK))
        TempK = np.exp(-TempK**2 / (2 * t**2))
        K.append(TempK)
        options.knn.append(
            adapted_neighbor(TempK, options.min_k, options.n_clusters))
        del TempK, temp_x  #mean_X

0 200
1 200
2 200
3 200
4 200
5 200
6 200
7 200
8 200
9 200
6 (2000, 240)
6.2 9
6.421 9
6.451 9
6.629 9
6.517 9
6.553 7


## Demo

In [None]:
options.alpha = 1
options.beta = 0
options.eta = 0.1
options.gamma = 1e-1
init_Kernel_train(data.y, K, options)[0]

[tensor(7.4622, device='cuda:0'), tensor(7.4738, device='cuda:0'), tensor(7.5251, device='cuda:0'), tensor(7.4852, device='cuda:0'), tensor(7.4294, device='cuda:0'), tensor(6.7613, device='cuda:0')]
[tensor(77.0325, device='cuda:0'), tensor(57.9800, device='cuda:0'), tensor(65.1734, device='cuda:0'), tensor(67.0208, device='cuda:0'), tensor(76.0557, device='cuda:0'), tensor(63.1677, device='cuda:0')]
25: [0.4907018813681303, 0.5949449609861407, 0.667] [0.4907018813681303, 0.5949449609861407, 0.667] [0.47112985883866004, 0.5822210139822191, 0.6409999999999999]
[tensor(77.4816, device='cuda:0'), tensor(56.9153, device='cuda:0'), tensor(66.4789, device='cuda:0'), tensor(66.4746, device='cuda:0'), tensor(77.9712, device='cuda:0'), tensor(59.5685, device='cuda:0')]
50: [0.7099640912007266, 0.7898720891999796, 0.8185] [0.7099640912007266, 0.7898720891999796, 0.8185] [0.6507482465761316, 0.748751663029362, 0.7598999999999999]
[tensor(78.1504, device='cuda:0'), tensor(60.8692, device='cuda:0')

[0.8607050421088525, 0.9210260764362568, 0.9045]