In [4]:
import matplotlib.pyplot as plt
import numpy as np
from struct import unpack
from sklearn import cluster
import datetime
import hdbscan
import seaborn as sns
from sklearn.preprocessing import PowerTransformer, normalize, MinMaxScaler, StandardScaler
from tsnecuda import TSNE
from struct import pack
from sklearn_extra.cluster import KMedoids
from matplotlib import colors
from sklearn.metrics import silhouette_score, silhouette_samples
import matplotlib.cm as cm


_cmap = colors.ListedColormap(['#1A90F0', '#F93252', '#FEA250', '#276B29', '#362700', 
                               '#2C2572', '#D25ABE', '#4AB836', '#A859EA', '#65C459', 
                               '#C90B18', '#E02FD1', '#5FAFD4', '#DAF779', '#ECEE25', 
                               '#56B390', '#F3BBBE', '#8FC0AE', '#0F16F5', '#8A9EFE', 
                               '#A23965', '#03F70C', '#A8D520', '#952B77', '#2A493C', 
                               '#E8DB82', '#7C01AC', '#1938A3', '#3C4249', '#BC3D92', 
                               '#DEEDB1', '#3C673E', '#65F3D7', '#77110B', '#D16DD6', 
                               '#08EF68', '#CFFD6F', '#DC6B26', '#912D5D', '#8CA6F8', 
                               '#04EE96', '#54B0C1', '#6CBE38', '#24633B', '#DE41DD', 
                               '#5EF270', '#896991', '#E6D381', '#7B0681', '#D66C07'
                              ])

def transform(_in) :
    #vg / h 분리
    np_vgh = _in
    np_vg = np.delete(_in, 2, axis = 1) #3열 삭제
    np_h = (_in[:,2]).reshape(-1,1) #3열 추출
    #log(h)
    np_logh = np.log(np_h[::]+1)
    #logh 병합
    np_vgh_logh = np.hstack((np_vgh,np_logh))
    # normalized v,g,h,logh
    np_normal_l1_vgh_logh = normalize(np_vgh_logh, axis=0, norm='l1')
    np_normal_l2_vgh_logh = normalize(np_vgh_logh, axis=0, norm='l2')
    np_normal_max_vgh_logh = normalize(np_vgh_logh, axis=0, norm='max')
    #tr standardization
    std_scaler = StandardScaler()
    fitted = std_scaler.fit(np_vgh_logh)
    np_std_vgh_logh = std_scaler.transform(np_vgh_logh)
    #print(np_std_vgh_logh)
    #tr min-max scale
    min_max_scaler = MinMaxScaler()
    min_max_scaler.fit(np_vgh_logh)
    np_min_max_vgh_logh=min_max_scaler.transform(np_vgh_logh)
    #print(np_min_max_vgh_logh)
    #yeo-johnson
    pt_vgh_logh = PowerTransformer(method='yeo-johnson')
    pt_vgh_logh.fit(np_vgh_logh)
    np_yeojohnson_vgh_logh = pt_vgh_logh.transform(np_vgh_logh)

    ret = []
    ret.append(  _in                                  )  
    ret.append(  np.delete(np_vgh_logh, 2, axis = 1)                   )
    ret.append(  np.hstack((np_normal_l1_vgh_logh[:,[0,1]], np_h))     )
    ret.append(  np.hstack((np_normal_l1_vgh_logh[:,[0,1]], np_logh))  ) 
    ret.append(  np.delete(np_normal_l1_vgh_logh, 2, axis = 1)         )
    ret.append(  np.delete(np_normal_l1_vgh_logh, 3, axis = 1)         )
    ret.append(  np.hstack((np_normal_l2_vgh_logh[:,[0,1]], np_h))     )
    ret.append(  np.hstack((np_normal_l2_vgh_logh[:,[0,1]], np_logh))  )
    ret.append(  np.delete(np_normal_l2_vgh_logh, 2, axis = 1)         )  
    ret.append(  np.delete(np_normal_l2_vgh_logh, 3, axis = 1)         )
    ret.append(  np.hstack((np_normal_max_vgh_logh[:,[0,1]], np_h))    )     
    ret.append(  np.hstack((np_normal_max_vgh_logh[:,[0,1]], np_logh)) )    
    ret.append(  np.delete(np_normal_max_vgh_logh, 2, axis = 1)        )
    ret.append(  np.delete(np_normal_max_vgh_logh, 3, axis = 1)        ) 
    ret.append(  np.hstack((np_std_vgh_logh[:,[0,1]], np_h))           )
    ret.append(  np.hstack((np_std_vgh_logh[:,[0,1]], np_logh))        )
    ret.append(  np.delete(np_std_vgh_logh, 2, axis = 1)               )   
    ret.append(  np.delete(np_std_vgh_logh, 3, axis = 1)               )
    ret.append(  np.hstack((np_min_max_vgh_logh[:,[0,1]], np_h))       )     
    ret.append(  np.hstack((np_min_max_vgh_logh[:,[0,1]], np_logh))    )     
    ret.append(  np.delete(np_min_max_vgh_logh, 2, axis = 1)           )     
    ret.append(  np.delete(np_min_max_vgh_logh, 3, axis = 1)           )       
    ret.append(  np.hstack((np_yeojohnson_vgh_logh[:,[0,1]], np_h))    )         
    ret.append(  np.hstack((np_yeojohnson_vgh_logh[:,[0,1]], np_logh)) )          
    ret.append(  np.delete(np_yeojohnson_vgh_logh, 2, axis = 1)        )         
    ret.append(  np.delete(np_yeojohnson_vgh_logh, 3, axis = 1)        )      
    
    return ret

def binaryFileWrite2DHist(filename, hist_cluster) :
    si = len(hist_cluster[0])
    sj = len(hist_cluster)
    with open(filename, 'wb') as fp:
        for i in range(si):
            for j in range(sj):
                fp.write(pack('<i', hist_cluster[i][j]))   
                
def _TSNE(learning_rate, data) :
    model = TSNE(learning_rate=learning_rate)
    print("TSNE calc : ", end='')
    startTime = datetime.datetime.now()
    transformed = model.fit_transform(data)
    endTime = datetime.datetime.now()
    diffTime = endTime-startTime
    exeTime = diffTime.total_seconds() * 1000
    print(exeTime,'ms')
    return transformed

def kmeans(k, targetdata, originaldata, w, h, bfilename):
    startTime = datetime.datetime.now()
    kmeans = cluster.KMeans(n_clusters=k, random_state=0).fit(targetdata)
    endTime = datetime.datetime.now()
    diffTime = endTime-startTime
    exeTime = diffTime.total_seconds() * 1000
    
    #print(targetdata, targetdata.shape)
    #print(originaldata, originaldata.shape)
    
    hist_cluster=[[-1 for x in range(w)] for x in range(h)]
    
    for idx in range(len(targetdata)):
        hist_cluster[originaldata[idx][0]][originaldata[idx][1]]=kmeans.labels_[idx]
            
    model = bfilename
    etc = "n_clusters_%d"%(k)
    
    H = np.array(hist_cluster)
    H = H[::]+1
    
    
    #plt.imshow(H[::-1], cmap=_cmap)
    
    
    plt.figure(figsize=(10,10))
    plt.title("%s K-Mean %d clusters, \ncalctime : %.2f ms"%(dataset, k, exeTime))
    plt.scatter(originaldata[:,1],originaldata[:,0], s=2, marker='s', cmap=_cmap, c=kmeans.labels_)
    plt.savefig("kmeans_png/%s_%s_1.png"%(model, etc), dpi=300)
    plt.show()
    print('KMEANS Time : ', exeTime,'ms', ', #clusters : ', k)
    
    print(exeTime,'ms')
    
    binaryFileWrite2DHist("kmeans_bin/%s_%s.bin"%(model, etc), H) 
    
    
    return kmeans.labels_

def kmeans_draw_elbow_2to10(targetdata, originaldata, w, h, bfilename):
    
    sse = []
    sil = []
    for k in range(2,11) :
        startTime = datetime.datetime.now()
        clusterer = cluster.KMeans(n_clusters=k, random_state=0)
        kmeans = clusterer.fit(targetdata)
        endTime = datetime.datetime.now()
        diffTime = endTime-startTime
        exeTime = diffTime.total_seconds() * 1000
        
        #for elbow
        sse.append(kmeans.inertia_)
        
        ########################################################################################
        # (start) for silhouette
        ########################################################################################
        fig, (ax1, ax2) = plt.subplots(1, 2)
        fig.set_size_inches(18, 7)

        # The 1st subplot is the silhouette plot
        # The silhouette coefficient can range from -1, 1 but in this example all
        # lie within [-0.1, 1]
        ax1.set_xlim([-0.1, 1])
        # The (n_clusters+1)*10 is for inserting blank space between silhouette
        # plots of individual clusters, to demarcate them clearly.
        ax1.set_ylim([0, len(targetdata) + (k + 1) * 10])

        #print(kmeans.labels_)
        silhouette_avg = silhouette_score(targetdata, kmeans.labels_)
        
        print("For n_clusters =", k,
              "The average silhouette_score is :", silhouette_avg)

        # Compute the silhouette scores for each sample
        sample_silhouette_values = silhouette_samples(targetdata, kmeans.labels_)
    
        y_lower=10
        for i in range(k):
            # Aggregate the silhouette scores for samples belonging to
            # cluster i, and sort them
            ith_cluster_silhouette_values = \
                sample_silhouette_values[kmeans.labels_ == i]

            ith_cluster_silhouette_values.sort()

            size_cluster_i = ith_cluster_silhouette_values.shape[0]
            y_upper = y_lower + size_cluster_i

            color = cm.nipy_spectral(float(i) / k)
            ax1.fill_betweenx(np.arange(y_lower, y_upper),
                              0, ith_cluster_silhouette_values,
                              facecolor=color, edgecolor=color, alpha=0.7)

            # Label the silhouette plots with their cluster numbers at the middle
            ax1.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))

            # Compute the new y_lower for next plot
            y_lower = y_upper + 10  # 10 for the 0 samples
        
        #print(targetdata, targetdata.shape)
        #print(originaldata, originaldata.shape)
        ax1.set_title("The silhouette plot. silhouette_score is : %.6f"%(silhouette_avg))
        ax1.set_xlabel("The silhouette coefficient values")
        ax1.set_ylabel("Cluster label")

        # The vertical line for average silhouette score of all the values
        ax1.axvline(x=silhouette_avg, color="red", linestyle="--")

        ax1.set_yticks([])  # Clear the yaxis labels / ticks
        ax1.set_xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1])

        # 2nd Plot showing the actual clusters formed
        colors = cm.nipy_spectral(kmeans.labels_.astype(float) / k)
        ax2.scatter(targetdata[:, 1], targetdata[:, 0], marker='.', s=30, lw=0, alpha=0.7,
                    c=colors, edgecolor='k')

        # Labeling the clusters
        centers = clusterer.cluster_centers_
        # Draw white circles at cluster centers
        ax2.scatter(centers[:, 1], centers[:, 0], marker='o',
                    c="white", alpha=1, s=200, edgecolor='k')

        for i, c in enumerate(centers):
            ax2.scatter(c[1], c[0], marker='$%d$' % i, alpha=1,
                        s=50, edgecolor='k')

        ax2.set_title("%s K-Mean %d clusters, \ncalctime : %.2f ms"%(dataset, k, exeTime))
        ax2.set_xlabel("Feature space for the 1st feature")
        ax2.set_ylabel("Feature space for the 2nd feature")

        plt.suptitle(("Silhouette analysis for KMeans clustering on sample data "
                      "with n_clusters = %d" % k),
                     fontsize=14, fontweight='bold')        
        
        ########################################################################################
        # (end) for silhouette
        ########################################################################################
        
        
        hist_cluster=[[-1 for x in range(w)] for x in range(h)]

        for idx in range(len(targetdata)):
            hist_cluster[originaldata[idx][0]][originaldata[idx][1]]=kmeans.labels_[idx]

        model = bfilename
        etc = "n_clusters_%d"%(k)

        H = np.array(hist_cluster)
        H = H[::]+1


        #plt.imshow(H[::-1], cmap=_cmap)


        #plt.figure(figsize=(10,10))
        #plt.title("%s K-Mean %d clusters, \ncalctime : %.2f ms"%(dataset, k, exeTime))
        #plt.scatter(originaldata[:,1],originaldata[:,0], s=2, marker='s', cmap=_cmap, c=kmeans.labels_)
        #plt.savefig("kmeans_png/%s_%s_1.png"%(model, etc), dpi=300)
        #plt.show()
        print('KMEANS Time : ', exeTime,'ms', ', #clusters : ', k)

        #print(exeTime,'ms')

        binaryFileWrite2DHist("kmeans_bin/%s_%s.bin"%(model, etc), H) 
    
    plt.show() ## for silhouette plot
    
    
    ## for elbow plot
    plt.plot(range(2,11), sse, marker='o')
    plt.xlabel('# of clusters')
    plt.ylabel('SSE')
    plt.show()
    
    
    return kmeans.labels_

def kmeans_dr(k, targetdata, originaldata, w, h, bfilename):
    startTime = datetime.datetime.now()
    kmeans = cluster.KMeans(n_clusters=k, random_state=0).fit(targetdata)
    endTime = datetime.datetime.now()
    diffTime = endTime-startTime
    exeTime = diffTime.total_seconds() * 1000
    
    #print(targetdata, targetdata.shape)
    #print(originaldata, originaldata.shape)
    
    hist_cluster=[[-1 for x in range(w)] for x in range(h)]
    
    for idx in range(len(targetdata)):
        hist_cluster[originaldata[idx][0]][originaldata[idx][1]]=kmeans.labels_[idx]
            
    model = bfilename
    etc = "n_clusters_%d"%(k)
    
    H = np.array(hist_cluster)
    H = H[::]+1
    
    plt.figure(figsize=(10,10))
    plt.title("%s K-Mean %d clusters, \ncalctime : %.2f ms"%(dataset, k, exeTime))
    plt.scatter(originaldata[:,1],originaldata[:,0], s=2,  marker='s',cmap=_cmap, c=kmeans.labels_)
    
    #plt.imshow(H[::-1], cmap=_cmap)
    
    
    plt.savefig("kmeans_dr_png/%s_%s_1.png"%(model, etc), dpi=300)
    plt.show()
    print('KMEANS Time : ', exeTime,'ms', ', #clusters : ', k)
    
    print(exeTime,'ms')
    
    binaryFileWrite2DHist("kmeans_dr_bin/%s_%s.bin"%(model, etc), H) 
    
    viewTSNEClustering(targetdata, kmeans.labels_, "kmeans_dr_png/%s_%s_2.png"%(model, etc) )
    
    return kmeans.labels_

def kmedoids(k, _metric, targetdata, originaldata, w, h, bfilename) :
    startTime = datetime.datetime.now()
    
    kmedoids = KMedoids(metric=_metric, n_clusters=k).fit(targetdata)
    endTime = datetime.datetime.now()
    diffTime = endTime-startTime
    exeTime = diffTime.total_seconds() * 1000
    
    #print(targetdata, targetdata.shape)
    #print(originaldata, originaldata.shape)
    
    hist_cluster=[[-1 for x in range(w)] for x in range(h)]
    
    for idx in range(len(targetdata)):
        hist_cluster[originaldata[idx][0]][originaldata[idx][1]]=kmedoids.labels_[idx]
            
    model = bfilename
    etc = "metric_%s_n_clusters_%d"%(_metric, k)
    
    H4 = np.array(hist_cluster)
    H4 = H4[::]+1
    plt.imshow(H4[::-1])
    plt.title("%s KMedoids %d clusters, metric : %s, \ncalctime : %.2f ms"%(dataset, k, _metric, exeTime))
    #plt.show()
    print('KMedoids Time : ', exeTime,'ms', ', #clusters : ', k)
    plt.savefig("kmedoids_png/%s_%s_1.png"%(model, etc), dpi=300)
    
    print(exeTime,'ms')
    
    binaryFileWrite2DHist("kmedoids_bin/%s_%s.bin"%(model, etc), H4) 
    
    return kmedoids.labels_

def dbscan(_eps, targetdata, originaldata, w, h, bfilename):
    startTime = datetime.datetime.now()
    dbscan = cluster.DBSCAN(eps=_eps).fit(targetdata)

    endTime = datetime.datetime.now()
    diffTime = endTime-startTime
    exeTime = diffTime.total_seconds() * 1000

    hist_dbscan_cluster=[[-1 for x in range(w)] for x in range(h)]
    
    for idx in range(len(originaldata)):
        _i = originaldata[idx][0]
        _j = originaldata[idx][1]
        hist_dbscan_cluster[_i][_j]=dbscan.labels_[idx]
        
    model = bfilename
    etc = "eps_%.2f"%(_eps)

    H2 = np.array(hist_dbscan_cluster)
    #plt.imshow(H2[::-1])
    plt.figure(figsize=(10,10))
    plt.title("%s DBSCAN eps : %f, \ncalctime : %.2f ms"%(dataset, _eps, exeTime))
    plt.scatter(originaldata[:,1],originaldata[:,0], s=2,  marker='s',cmap=_cmap, c=dbscan.labels_)
    
    #plt.show()
    plt.savefig("dbscan_png/%s_%s_1.png"%(model, etc), dpi=300)
    print('DBSCAN Time : ', exeTime,'ms', ', eps : ',_eps)
    
    
    n_clusters_ = len(set(dbscan.labels_)) - (1 if -1 in dbscan.labels_ else 0)
    print( "# of clusters : ", n_clusters_   )
    
    binaryFileWrite2DHist("dbscan_bin/%s_%s.bin"%(model, etc), H2)    
    
    return dbscan.labels_

def dbscan_dr(_eps, targetdata, originaldata, w, h, bfilename, tsne_cnt):
    startTime = datetime.datetime.now()
    dbscan = cluster.DBSCAN(eps=_eps).fit(targetdata)

    endTime = datetime.datetime.now()
    diffTime = endTime-startTime
    exeTime = diffTime.total_seconds() * 1000

    hist_dbscan_cluster=[[-1 for x in range(w)] for x in range(h)]
    
    for idx in range(len(originaldata)):
        _i = originaldata[idx][0]
        _j = originaldata[idx][1]
        hist_dbscan_cluster[_i][_j]=dbscan.labels_[idx]
        
    model = bfilename
    etc = "eps_%.2f"%(_eps)

    H2 = np.array(hist_dbscan_cluster)
    
    
    #plt.imshow(H2[::-1])
    plt.figure(figsize=(10,10))
    plt.title("%s DBSCAN eps : %f, \ncalctime : %.2f ms"%(dataset, _eps, exeTime))
    plt.scatter(originaldata[:,1],originaldata[:,0], s=2,  marker='s',cmap=_cmap, c=dbscan.labels_)
    
    plt.savefig("dbscan_dr_png/%d/%s_%s_1.png"%(tsne_cnt,model, etc), dpi=300)
    print('DBSCAN Time : ', exeTime,'ms', ', eps : ',_eps)
    
    n_clusters_ = len(set(dbscan.labels_)) - (1 if -1 in dbscan.labels_ else 0)
    print( "# of clusters : ", n_clusters_   )
    
    binaryFileWrite2DHist("dbscan_dr_bin/%d/%s_%s.bin"%(tsne_cnt,model, etc), H2)    
    
    
    viewTSNEClustering(targetdata, dbscan.labels_, "dbscan_dr_png/%d/%s_%s_2.png"%(tsne_cnt,model, etc) )
    
    return dbscan.labels_
    
    plt.figure(figsize=(10,10))
    plt.title("%s K-Mean %d clusters, \ncalctime : %.2f ms"%(dataset, k, exeTime))
    plt.scatter(originaldata[:,1],originaldata[:,0], s=2, marker='s', cmap=_cmap, c=kmeans.labels_)
    plt.savefig("kmeans_png/%d/%s_%s_1.png"%(tsne_cnt,model, etc), dpi=300)
    plt.show()
    print('KMEANS Time : ', exeTime,'ms', ', #clusters : ', k)

def Hdbscan_dr(_min_cluster_size, _min_samples, _alpha, targetdata, originaldata, w, h, bfilename, tsne_cnt):
    startTime = datetime.datetime.now()
    clusterer = hdbscan.HDBSCAN(min_cluster_size=_min_cluster_size, min_samples =_min_samples, alpha=_alpha).fit(targetdata)

    endTime = datetime.datetime.now()
    diffTime = endTime-startTime
    exeTime = diffTime.total_seconds() * 1000
    #print(clusterer.labels_)
    #print(max(clusterer.labels_))
    #print(targetdata)
    #print(originaldata)
    #print(clusterer.labels_)

    hist_cluster_hdbscan=[[-1 for x in range(w)] for x in range(h)]
    
    for idx in range(len(originaldata)):
        _i = originaldata[idx][0]
        _j = originaldata[idx][1]
        hist_cluster_hdbscan[_i][_j]=clusterer.labels_[idx]

        
    model = bfilename
    etc = "minsize_%.1f_minsample_%.1f_alpha_%.1f"%(_min_cluster_size, _min_samples, _alpha)
    
    
    n_clusters_ = len(set(clusterer.labels_)) - (1 if -1 in clusterer.labels_ else 0)
    print(  "cluster num : ", n_clusters_   )
    
    H3 = np.array(hist_cluster_hdbscan)
    plt.imshow(H3[::-1])
    
    
    
    plt.figure(figsize=(10,10))
    plt.title("HDBSCAN [%s], min_cluster_size : %d, result cluster number : %d, \ncalc time: %.2f ms"%(dataset, _min_cluster_size, n_clusters_, exeTime))
    plt.scatter(originaldata[:,1],originaldata[:,0], s=2,  marker='s',cmap=_cmap, c=clusterer.labels_)    
    plt.savefig("hdbscan_dr_png/%d/%s_%s_1.png"%(tsne_cnt,model, etc), dpi=300)
    plt.show()
    print(exeTime,'ms')    
    
    clusterer.condensed_tree_.plot(select_clusters=True,
                               selection_palette=sns.color_palette('deep', 8))
    
    plt.savefig("hdbscan_dr_png/%d/%s_%s_1.5.png"%(tsne_cnt,model, etc), dpi=300)
    plt.show()
                                   
    viewTSNEClustering(targetdata, clusterer.labels_, "hdbscan_dr_png/%d/%s_%s_2.png"%(tsne_cnt,model, etc) )
    
    
    binaryFileWrite2DHist("hdbscan_dr_bin/%d/%s_%s.bin"%(tsne_cnt,model, etc), H3) 
    
    print(bfilename)
    
    return clusterer.labels_


ModuleNotFoundError: No module named 'hdbscan'

In [None]:
index = ["vgh", "vglogh", 
         "n_l1_vg_h", "n_l1_vg_logh", "n_l1_vgh", "n_l1_vglogh", 
         "n_l2_vg_h", "n_l2_vg_logh", "n_l2_vgh", "n_l2_vglogh", 
         "n_max_vg_h", "n_max_vg_logh", "n_max_vgh", "n_max_vglogh", 
         "std_vg_h", "std_vg_logh", "std_vgh", "std_vglogh", 
         "mm_vg_h", "mm_vg_logh", "mm_vgh", "mm_vglogh", 
         "pt_vg_h", "pt_vg_logh", "pt_vgh", "pt_vglogh"
        ]

w=256
h=256
size = w*h

all_val_grad_hist = []
sparse_val_grad_hist = []
#'VisMale_128x256x256','bonsai256X256X256B', 'Carp_256x256x512','XMasTree-LO_256x249x256' 
for dataset in ['VisMale_128x256x256'] :
    with open('../volumeCache/%s.raw.2DHistogram.TextureCache'%(dataset), 'rb') as fp:
        Histogram2DYMax = unpack('<f', fp.read(4))[0] #Max of Grad_mag
        for i in range(h) :
            for j in range(w):
                readdata = unpack('<L', fp.read(4))[0]
                all_val_grad_hist.append([i, j, readdata])
                if readdata>=1 and i!=0:
                    sparse_val_grad_hist.append([i, j, readdata])

        np_all_val_grad_hist = np.array(all_val_grad_hist)
        np_sparse_val_grad_hist = np.array(sparse_val_grad_hist)

        ret_all_array = transform(np_all_val_grad_hist)
        ret_sparse_array = transform(np_sparse_val_grad_hist)
        
        tsne_all_array = []
        tsne_sparse_array = []
        
        ###########################################################################################################
        
        #for i in range(len(ret_all_array)):
        #    print("%d_%s_%s"% (i, dataset, index[i]), end='\t')
        #    tsne_all_array.append(_TSNE(100,ret_all_array[i]))
        
        #for i in range(len(tsne_all_array)):
        #    save_tsne_result( tsne_all_array[i],  "tsneCache", "%d_%s_%s"% (i, dataset, index[i]) )        
        
        #for i in range(len(tsne_all_array)):
        #    for _k in [10,15,20]:
        #        kmeans(_k,  tsne_all_array[i], np_all_val_grad_hist, 256,256,  "%d_%s_%s"% (i, dataset, index[i]) )
        #    
        #    for _eps in [0.5, 1.0, 1.5, 3.0]:
        #        dbscan(_eps, tsne_all_array[i], np_all_val_grad_hist, 256,256,  "%d_%s_%s"% (i, dataset, index[i]))
        #    Hdbscan(300, 20, 1.0, tsne_all_array[i], np_all_val_grad_hist, 256,256,  "%d_%s_%s"% (i, dataset, index[i])  ) 
        
        ###########################################################################################################
        
        for tsne_cnt in range(10):
            tsne_sparse_array.append([])
            for i in range(len(ret_sparse_array)):
                print("%s_%d_sparse_%s"% ( dataset, i,index[i]), end='\t')
                tsne_sparse_array[tsne_cnt].append(_TSNE(3000,ret_sparse_array[i]))

            for i in range(len(tsne_sparse_array[tsne_cnt])):
                save_tsne_result( tsne_sparse_array[tsne_cnt][i],  "tsneCache", "%d_%s_%d_sparse_%s"% (tsne_cnt, dataset,i,  index[i]) )     

In [None]:
for tsne_cnt in range(10):
    for i in range(len(tsne_sparse_array[tsne_cnt])):
        Hdbscan_dr(80,20,1.0, tsne_sparse_array[tsne_cnt][i], np_sparse_val_grad_hist, 256,256,  "%d_%d_%s_%s"% (tsne_cnt, i, dataset, index[i]), tsne_cnt)

In [2]:
!pip install hdbscan

Collecting cython>=0.27
  Using cached Cython-0.29.21-cp37-cp37m-win_amd64.whl (1.6 MB)
Installing collected packages: cython
Successfully installed cython-0.29.21
