In [23]:
import os
import re
import logging
from optparse import OptionParser
import sys
from time import time
import glob
from pathlib import Path
from collections import defaultdict
import numpy as np
import pandas as pd
from time import time
import sklearn
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans, DBSCAN
from sklearn.cluster import Birch
from sklearn.cluster import SpectralClustering
from sklearn.cluster import AgglomerativeClustering, FeatureAgglomeration
from sklearn import metrics
from sklearn.datasets import fetch_20newsgroups
from sklearn.feature_extraction.text import HashingVectorizer
from sklearn.pipeline import make_pipeline
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.preprocessing import Normalizer
from sklearn.decomposition import TruncatedSVD
from datetime import datetime

In [24]:
#change this to where you have the dataset in your local machine 

data_folder = 'C:\\Data\\DataSetForPaper2023\\'
#'C:\\Users\\boo34\\OneDrive - University of Brighton\\Desktop\\Research Papers\\Publication\\DataSetForPaper2023\\'

In [44]:
t0 = time()
collection_list = ["crisis3", "NG3", "crisis4", "R4", "NG5", "R5", "NG6", "R6"]
#collection_list = ["NG3"]

for collectionName in collection_list:

    container_path = Path(data_folder + collectionName)

    dataset = sklearn.datasets.load_files(container_path,  description=None, categories=None, load_content=True,
                                          shuffle=True, encoding='utf-8', decode_error='ignore', random_state=0, allowed_extensions=None)

    print("%d documents" % len(dataset.data))
    print("%d categories" % len(dataset.target_names))
    print()
    
     # Feature Extraction
    # ------------------

    labels = dataset.target
    true_k = np.unique(labels).shape[0]

    print("Extracting features from the training dataset using a sparse vectorizer")
 

    useHashing = False
    useIDF = False
    nFeature = 1000

    if useHashing:
        if useIDF:
            # Perform an IDF normalization on the output of HashingVectorizer
            hasher = HashingVectorizer(
                n_features=nFeature,
                stop_words="english",
                alternate_sign=False,
                norm=None,
            )
            vectorizer = make_pipeline(hasher, TfidfTransformer())
        else:
            vectorizer = HashingVectorizer(
                n_features=nFeature,
                stop_words="english",
                alternate_sign=False,
                norm="l2",
            )
    else:
        print("tfidf vectorizer")
        vectorizer = TfidfVectorizer(
            max_df=0.5,
            max_features= nFeature,
            min_df=2,
            stop_words="english",
            use_idf= True 
        )


    # K - means clustering runs
    for i in range(11):
        X = vectorizer.fit_transform(dataset.data)
        tvm = vectorizer.fit_transform(dataset.data).toarray()

        print("done in %fs" % (time() - t0))
        print("n_samples: %d, n_features: %d" % X.shape)
        numDocs = X.shape[0]

        print()

        km = KMeans(
            n_clusters=true_k,
            init="k-means++",
            max_iter=100,
            n_init=1,
            verbose= False
        )

        print("kMeans ++ run number: " + str(i))
        print("Clustering sparse data with %s" % km)
        #t0 = time()
        km.fit(X)
        print("done in %0.3fs" % (time() - t0))

        # %%
        # Performance metrics
        # -------------------

        km_v = metrics.v_measure_score(labels, km.labels_)
        km_h = metrics.homogeneity_score(labels, km.labels_)
        km_c = metrics.completeness_score(labels, km.labels_)
        km_adjustedRand = metrics.adjusted_rand_score(labels, km.labels_)        
        
        print(f"K Means V-measure: {km_v:.5f} Homogeneity: {km_h:.2f} Completeness: {km_c:.2f} Adjusted Rand-Index: {km_adjustedRand:.2f}")
        
        ag =  AgglomerativeClustering(n_clusters = None, distance_threshold= 1.70). fit(tvm) 
        
        ag_v = metrics.v_measure_score(labels, ag.labels_)
        ag_h = metrics.homogeneity_score(labels, ag.labels_)
        ag_c = metrics.completeness_score(labels, ag.labels_)
        ag_adjustedRand = metrics.adjusted_rand_score(labels, ag.labels_)
        
        labelLength = ag.labels_
        uniqueLabel=len(np.unique(labelLength))

        print(f"ag v {ag_v}  ag rand {ag_adjustedRand} ag unique label length {uniqueLabel}")
        
        sc= SpectralClustering(n_clusters = true_k, affinity ='nearest_neighbors').fit(tvm)

        sc_v = metrics.v_measure_score(labels, sc.labels_)
        sc_h = metrics.homogeneity_score(labels, sc.labels_)
        sc_c = metrics.completeness_score(labels, sc.labels_)
        sc_adjustedRand = metrics.adjusted_rand_score(labels, sc.labels_)

        print(f"spectral v {sc_v} spectral adjustred rand {sc_adjustedRand}")


       # filePath = "resultsKpython25.csv"
        #resultsFile = open(filePath, "a")
        
        filePath = "resultsKpython38.csv"
        resultsFile = open(filePath, "a")

        if os.path.getsize(filePath) == 0:
            resultsFile.write("index, km_v, km_h, km_c, km_adjustRand, ag_v, ag_adjustedRand, sc_v, sc_adjustedRand, nFeature, numDocs, useHashing, date \n")

        resultsFile.write(f"{collectionName}, {km_v}, {km_h}, {km_c}, {km_adjustedRand}, {ag_v}, {ag_adjustedRand}, {sc_v}, {sc_adjustedRand}, {nFeature}, {numDocs}, {useHashing}, {datetime.now()}  \n")    

        print()
        resultsFile.close()
        
        

    #    if os.path.getsize(filePath) == 0:
     #       resultsFile.write("index, v, h, c, adjustRand, nFeature, numDocs, useHashing, date \n")
        
      #  resultsFile.write(f"{collectionName}, {v}, {h}, {c}, {adjustedRand}, {nFeature}, {numDocs}, {useHashing}, {datetime.now()}  \n")    

        print()
        resultsFile.close()
 
print("total time %fs" % (time() - t0))
    

1500 documents
3 categories

Extracting features from the training dataset using a sparse vectorizer
done in 0.183495s
n_samples: 1500, n_features: 1000

kMeans ++ run number: 0
Clustering sparse data with KMeans(max_iter=100, n_clusters=3, n_init=1, verbose=False)
done in 0.208s
K Means V-measure: 0.20049 Homogeneity: 0.19 Completeness: 0.21 Adjusted Rand-Index: 0.15
ag v 0.2931513214733286  ag rand 0.03783116239664239 ag unique label length 146




spectral v 0.4996214147389438 spectral adjustred rand 0.42561458659773943


done in 1.407380s
n_samples: 1500, n_features: 1000

kMeans ++ run number: 1
Clustering sparse data with KMeans(max_iter=100, n_clusters=3, n_init=1, verbose=False)
done in 1.426s
K Means V-measure: 0.29188 Homogeneity: 0.28 Completeness: 0.30 Adjusted Rand-Index: 0.20
ag v 0.2931513214733286  ag rand 0.03783116239664239 ag unique label length 146




spectral v 0.4996214147389438 spectral adjustred rand 0.42561458659773943


done in 2.647505s
n_samples: 1500, n_features: 1000

kMeans ++ run number: 2
Clustering sparse data with KMeans(max_iter=100, n_clusters=3, n_init=1, verbose=False)
done in 2.680s
K Means V-measure: 0.27939 Homogeneity: 0.27 Completeness: 0.29 Adjusted Rand-Index: 0.19
ag v 0.2931513214733286  ag rand 0.03783116239664239 ag unique label length 146




spectral v 0.49962141473894367 spectral adjustred rand 0.42561458659773943


done in 3.871143s
n_samples: 1500, n_features: 1000

kMeans ++ run number: 3
Clustering sparse data with KMeans(max_iter=100, n_clusters=3, n_init=1, verbose=False)
done in 3.887s
K Means V-measure: 0.26256 Homogeneity: 0.24 Completeness: 0.29 Adjusted Rand-Index: 0.16
ag v 0.2931513214733286  ag rand 0.03783116239664239 ag unique label length 146




spectral v 0.4996214147389438 spectral adjustred rand 0.42561458659773943


done in 5.114279s
n_samples: 1500, n_features: 1000

kMeans ++ run number: 4
Clustering sparse data with KMeans(max_iter=100, n_clusters=3, n_init=1, verbose=False)
done in 5.146s
K Means V-measure: 0.20028 Homogeneity: 0.19 Completeness: 0.21 Adjusted Rand-Index: 0.15
ag v 0.2931513214733286  ag rand 0.03783116239664239 ag unique label length 146




spectral v 0.4996214147389438 spectral adjustred rand 0.42561458659773943


done in 6.463285s
n_samples: 1500, n_features: 1000

kMeans ++ run number: 5
Clustering sparse data with KMeans(max_iter=100, n_clusters=3, n_init=1, verbose=False)
done in 6.495s
K Means V-measure: 0.36386 Homogeneity: 0.34 Completeness: 0.39 Adjusted Rand-Index: 0.27
ag v 0.2931513214733286  ag rand 0.03783116239664239 ag unique label length 146




spectral v 0.4996214147389438 spectral adjustred rand 0.42561458659773943


done in 7.828682s
n_samples: 1500, n_features: 1000

kMeans ++ run number: 6
Clustering sparse data with KMeans(max_iter=100, n_clusters=3, n_init=1, verbose=False)
done in 7.862s
K Means V-measure: 0.30597 Homogeneity: 0.26 Completeness: 0.37 Adjusted Rand-Index: 0.16
ag v 0.2931513214733286  ag rand 0.03783116239664239 ag unique label length 146




spectral v 0.49962141473894367 spectral adjustred rand 0.42561458659773943


done in 9.114097s
n_samples: 1500, n_features: 1000

kMeans ++ run number: 7
Clustering sparse data with KMeans(max_iter=100, n_clusters=3, n_init=1, verbose=False)
done in 9.145s
K Means V-measure: 0.19109 Homogeneity: 0.19 Completeness: 0.19 Adjusted Rand-Index: 0.18
ag v 0.2931513214733286  ag rand 0.03783116239664239 ag unique label length 146




spectral v 0.4996214147389438 spectral adjustred rand 0.42561458659773943


done in 10.456359s
n_samples: 1500, n_features: 1000

kMeans ++ run number: 8
Clustering sparse data with KMeans(max_iter=100, n_clusters=3, n_init=1, verbose=False)
done in 10.492s
K Means V-measure: 0.26762 Homogeneity: 0.24 Completeness: 0.30 Adjusted Rand-Index: 0.21
ag v 0.2931513214733286  ag rand 0.03783116239664239 ag unique label length 146




spectral v 0.4996214147389438 spectral adjustred rand 0.42561458659773943


done in 11.874303s
n_samples: 1500, n_features: 1000

kMeans ++ run number: 9
Clustering sparse data with KMeans(max_iter=100, n_clusters=3, n_init=1, verbose=False)
done in 11.907s
K Means V-measure: 0.11566 Homogeneity: 0.10 Completeness: 0.13 Adjusted Rand-Index: 0.08
ag v 0.2931513214733286  ag rand 0.03783116239664239 ag unique label length 146




spectral v 0.4996214147389438 spectral adjustred rand 0.42561458659773943


done in 13.238678s
n_samples: 1500, n_features: 1000

kMeans ++ run number: 10
Clustering sparse data with KMeans(max_iter=100, n_clusters=3, n_init=1, verbose=False)
done in 13.271s
K Means V-measure: 0.11055 Homogeneity: 0.11 Completeness: 0.11 Adjusted Rand-Index: 0.11
ag v 0.2931513214733286  ag rand 0.03783116239664239 ag unique label length 146




spectral v 0.4996214147389438 spectral adjustred rand 0.42561458659773943


1200 documents
3 categories

Extracting features from the training dataset using a sparse vectorizer
done in 14.913179s
n_samples: 1200, n_features: 1000

kMeans ++ run number: 0
Clustering sparse data with KMeans(max_iter=100, n_clusters=3, n_init=1, verbose=False)
done in 14.945s
K Means V-measure: 0.28649 Homogeneity: 0.28 Completeness: 0.29 Adjusted Rand-Index: 0.22
ag v 0.3206571993339882  ag rand 0.02842937259523754 ag unique label length 148




spectral v 0.7630971529671222 spectral adjustred rand 0.8203336714028817


done in 16.235809s
n_samples: 1200, n_features: 1000

kMeans ++ run number: 1
Clustering sparse data with KMeans(max_iter=100, n_clusters=3, n_init=1, verbose=False)
done in 16.271s
K Means V-measure: 0.31348 Homogeneity: 0.31 Completeness: 0.32 Adjusted Rand-Index: 0.28
ag v 0.3206571993339882  ag rand 0.02842937259523754 ag unique label length 148




spectral v 0.7630971529671221 spectral adjustred rand 0.8203336714028817


done in 17.536169s
n_samples: 1200, n_features: 1000

kMeans ++ run number: 2
Clustering sparse data with KMeans(max_iter=100, n_clusters=3, n_init=1, verbose=False)
done in 17.571s
K Means V-measure: 0.34573 Homogeneity: 0.34 Completeness: 0.35 Adjusted Rand-Index: 0.29
ag v 0.3206571993339882  ag rand 0.02842937259523754 ag unique label length 148




spectral v 0.7630971529671222 spectral adjustred rand 0.8203336714028817


done in 18.831268s
n_samples: 1200, n_features: 1000

kMeans ++ run number: 3
Clustering sparse data with KMeans(max_iter=100, n_clusters=3, n_init=1, verbose=False)
done in 18.861s
K Means V-measure: 0.28324 Homogeneity: 0.28 Completeness: 0.29 Adjusted Rand-Index: 0.21
ag v 0.3206571993339882  ag rand 0.02842937259523754 ag unique label length 148




spectral v 0.7630971529671221 spectral adjustred rand 0.8203336714028817


done in 20.193807s
n_samples: 1200, n_features: 1000

kMeans ++ run number: 4
Clustering sparse data with KMeans(max_iter=100, n_clusters=3, n_init=1, verbose=False)
done in 20.224s
K Means V-measure: 0.27945 Homogeneity: 0.27 Completeness: 0.29 Adjusted Rand-Index: 0.21
ag v 0.3206571993339882  ag rand 0.02842937259523754 ag unique label length 148




spectral v 0.7630971529671222 spectral adjustred rand 0.8203336714028817


done in 21.489172s
n_samples: 1200, n_features: 1000

kMeans ++ run number: 5
Clustering sparse data with KMeans(max_iter=100, n_clusters=3, n_init=1, verbose=False)
done in 21.519s
K Means V-measure: 0.27065 Homogeneity: 0.26 Completeness: 0.28 Adjusted Rand-Index: 0.20
ag v 0.3206571993339882  ag rand 0.02842937259523754 ag unique label length 148




spectral v 0.7630971529671222 spectral adjustred rand 0.8203336714028817


done in 22.959873s
n_samples: 1200, n_features: 1000

kMeans ++ run number: 6
Clustering sparse data with KMeans(max_iter=100, n_clusters=3, n_init=1, verbose=False)
done in 22.991s
K Means V-measure: 0.61657 Homogeneity: 0.60 Completeness: 0.63 Adjusted Rand-Index: 0.57
ag v 0.3206571993339882  ag rand 0.02842937259523754 ag unique label length 148




spectral v 0.7630971529671222 spectral adjustred rand 0.8203336714028817


done in 24.317968s
n_samples: 1200, n_features: 1000

kMeans ++ run number: 7
Clustering sparse data with KMeans(max_iter=100, n_clusters=3, n_init=1, verbose=False)
done in 24.346s
K Means V-measure: 0.69829 Homogeneity: 0.69 Completeness: 0.70 Adjusted Rand-Index: 0.74
ag v 0.3206571993339882  ag rand 0.02842937259523754 ag unique label length 148




spectral v 0.7630971529671221 spectral adjustred rand 0.8203336714028817


done in 25.618682s
n_samples: 1200, n_features: 1000

kMeans ++ run number: 8
Clustering sparse data with KMeans(max_iter=100, n_clusters=3, n_init=1, verbose=False)
done in 25.645s
K Means V-measure: 0.32772 Homogeneity: 0.32 Completeness: 0.34 Adjusted Rand-Index: 0.27
ag v 0.3206571993339882  ag rand 0.02842937259523754 ag unique label length 148




spectral v 0.7630971529671222 spectral adjustred rand 0.8203336714028817


done in 26.903135s
n_samples: 1200, n_features: 1000

kMeans ++ run number: 9
Clustering sparse data with KMeans(max_iter=100, n_clusters=3, n_init=1, verbose=False)
done in 26.932s
K Means V-measure: 0.30695 Homogeneity: 0.30 Completeness: 0.31 Adjusted Rand-Index: 0.25
ag v 0.3206571993339882  ag rand 0.02842937259523754 ag unique label length 148




spectral v 0.7630971529671221 spectral adjustred rand 0.8203336714028817


done in 28.181859s
n_samples: 1200, n_features: 1000

kMeans ++ run number: 10
Clustering sparse data with KMeans(max_iter=100, n_clusters=3, n_init=1, verbose=False)
done in 28.210s
K Means V-measure: 0.28648 Homogeneity: 0.28 Completeness: 0.29 Adjusted Rand-Index: 0.22
ag v 0.3206571993339882  ag rand 0.02842937259523754 ag unique label length 148




spectral v 0.7630971529671222 spectral adjustred rand 0.8203336714028817


2000 documents
4 categories

Extracting features from the training dataset using a sparse vectorizer
done in 29.333395s
n_samples: 2000, n_features: 1000

kMeans ++ run number: 0
Clustering sparse data with KMeans(max_iter=100, n_clusters=4, n_init=1, verbose=False)
done in 29.361s
K Means V-measure: 0.41811 Homogeneity: 0.40 Completeness: 0.44 Adjusted Rand-Index: 0.33
ag v 0.3419645099050147  ag rand 0.036864363223099675 ag unique label length 207




spectral v 0.36663137381327515 spectral adjustred rand 0.20796503951810053


done in 31.492922s
n_samples: 2000, n_features: 1000

kMeans ++ run number: 1
Clustering sparse data with KMeans(max_iter=100, n_clusters=4, n_init=1, verbose=False)
done in 31.528s
K Means V-measure: 0.18779 Homogeneity: 0.18 Completeness: 0.19 Adjusted Rand-Index: 0.14
ag v 0.3419645099050147  ag rand 0.036864363223099675 ag unique label length 207




spectral v 0.36663137381327515 spectral adjustred rand 0.20796503951810053


done in 33.668145s
n_samples: 2000, n_features: 1000

kMeans ++ run number: 2
Clustering sparse data with KMeans(max_iter=100, n_clusters=4, n_init=1, verbose=False)
done in 33.692s
K Means V-measure: 0.20806 Homogeneity: 0.20 Completeness: 0.21 Adjusted Rand-Index: 0.14
ag v 0.3419645099050147  ag rand 0.036864363223099675 ag unique label length 207




spectral v 0.36663137381327515 spectral adjustred rand 0.20796503951810053


done in 35.801094s
n_samples: 2000, n_features: 1000

kMeans ++ run number: 3
Clustering sparse data with KMeans(max_iter=100, n_clusters=4, n_init=1, verbose=False)
done in 35.839s
K Means V-measure: 0.47755 Homogeneity: 0.47 Completeness: 0.48 Adjusted Rand-Index: 0.38
ag v 0.3419645099050147  ag rand 0.036864363223099675 ag unique label length 207




spectral v 0.3654481735070524 spectral adjustred rand 0.20682366483846568


done in 38.087897s
n_samples: 2000, n_features: 1000

kMeans ++ run number: 4
Clustering sparse data with KMeans(max_iter=100, n_clusters=4, n_init=1, verbose=False)
done in 38.130s
K Means V-measure: 0.18503 Homogeneity: 0.18 Completeness: 0.20 Adjusted Rand-Index: 0.12
ag v 0.3419645099050147  ag rand 0.036864363223099675 ag unique label length 207




spectral v 0.3666313738132752 spectral adjustred rand 0.20796503951810053


done in 40.472974s
n_samples: 2000, n_features: 1000

kMeans ++ run number: 5
Clustering sparse data with KMeans(max_iter=100, n_clusters=4, n_init=1, verbose=False)
done in 40.504s
K Means V-measure: 0.43208 Homogeneity: 0.43 Completeness: 0.44 Adjusted Rand-Index: 0.37
ag v 0.3419645099050147  ag rand 0.036864363223099675 ag unique label length 207




spectral v 0.36663137381327515 spectral adjustred rand 0.20796503951810053


done in 42.646949s
n_samples: 2000, n_features: 1000

kMeans ++ run number: 6
Clustering sparse data with KMeans(max_iter=100, n_clusters=4, n_init=1, verbose=False)
done in 42.694s
K Means V-measure: 0.40121 Homogeneity: 0.39 Completeness: 0.41 Adjusted Rand-Index: 0.31
ag v 0.3419645099050147  ag rand 0.036864363223099675 ag unique label length 207




spectral v 0.3654481735070525 spectral adjustred rand 0.20682366483846568


done in 44.738550s
n_samples: 2000, n_features: 1000

kMeans ++ run number: 7
Clustering sparse data with KMeans(max_iter=100, n_clusters=4, n_init=1, verbose=False)
done in 44.777s
K Means V-measure: 0.54911 Homogeneity: 0.52 Completeness: 0.58 Adjusted Rand-Index: 0.42
ag v 0.3419645099050147  ag rand 0.036864363223099675 ag unique label length 207




spectral v 0.3654481735070524 spectral adjustred rand 0.20682366483846568


done in 46.851786s
n_samples: 2000, n_features: 1000

kMeans ++ run number: 8
Clustering sparse data with KMeans(max_iter=100, n_clusters=4, n_init=1, verbose=False)
done in 46.879s
K Means V-measure: 0.37325 Homogeneity: 0.34 Completeness: 0.41 Adjusted Rand-Index: 0.25
ag v 0.3419645099050147  ag rand 0.036864363223099675 ag unique label length 207




spectral v 0.36663137381327515 spectral adjustred rand 0.20796503951810053


done in 49.083248s
n_samples: 2000, n_features: 1000

kMeans ++ run number: 9
Clustering sparse data with KMeans(max_iter=100, n_clusters=4, n_init=1, verbose=False)
done in 49.114s
K Means V-measure: 0.31720 Homogeneity: 0.30 Completeness: 0.33 Adjusted Rand-Index: 0.28
ag v 0.3419645099050147  ag rand 0.036864363223099675 ag unique label length 207




spectral v 0.36556392846511154 spectral adjustred rand 0.20598097801740595


done in 51.285228s
n_samples: 2000, n_features: 1000

kMeans ++ run number: 10
Clustering sparse data with KMeans(max_iter=100, n_clusters=4, n_init=1, verbose=False)
done in 51.316s
K Means V-measure: 0.44055 Homogeneity: 0.43 Completeness: 0.45 Adjusted Rand-Index: 0.38
ag v 0.3419645099050147  ag rand 0.036864363223099675 ag unique label length 207




spectral v 0.36663137381327515 spectral adjustred rand 0.20796503951810053


800 documents
4 categories

Extracting features from the training dataset using a sparse vectorizer
done in 53.582304s
n_samples: 800, n_features: 1000

kMeans ++ run number: 0
Clustering sparse data with KMeans(max_iter=100, n_clusters=4, n_init=1, verbose=False)
done in 53.614s
K Means V-measure: 0.27498 Homogeneity: 0.25 Completeness: 0.30 Adjusted Rand-Index: 0.16
ag v 0.4422729501917088  ag rand 0.09857258803102673 ag unique label length 68




spectral v 0.3719033355912425 spectral adjustred rand 0.16119446397649706


done in 54.379853s
n_samples: 800, n_features: 1000

kMeans ++ run number: 1
Clustering sparse data with KMeans(max_iter=100, n_clusters=4, n_init=1, verbose=False)
done in 54.411s
K Means V-measure: 0.46900 Homogeneity: 0.46 Completeness: 0.48 Adjusted Rand-Index: 0.38
ag v 0.4422729501917088  ag rand 0.09857258803102673 ag unique label length 68




spectral v 0.37190333559124256 spectral adjustred rand 0.16119446397649706


done in 55.176247s
n_samples: 800, n_features: 1000

kMeans ++ run number: 2
Clustering sparse data with KMeans(max_iter=100, n_clusters=4, n_init=1, verbose=False)
done in 55.211s
K Means V-measure: 0.34471 Homogeneity: 0.31 Completeness: 0.39 Adjusted Rand-Index: 0.19
ag v 0.4422729501917088  ag rand 0.09857258803102673 ag unique label length 68




spectral v 0.37190333559124256 spectral adjustred rand 0.16119446397649706


done in 55.992199s
n_samples: 800, n_features: 1000

kMeans ++ run number: 3
Clustering sparse data with KMeans(max_iter=100, n_clusters=4, n_init=1, verbose=False)
done in 56.023s
K Means V-measure: 0.32998 Homogeneity: 0.30 Completeness: 0.37 Adjusted Rand-Index: 0.18
ag v 0.4422729501917088  ag rand 0.09857258803102673 ag unique label length 68




spectral v 0.3719033355912425 spectral adjustred rand 0.16119446397649706


done in 56.754647s
n_samples: 800, n_features: 1000

kMeans ++ run number: 4
Clustering sparse data with KMeans(max_iter=100, n_clusters=4, n_init=1, verbose=False)
done in 56.771s
K Means V-measure: 0.17015 Homogeneity: 0.14 Completeness: 0.21 Adjusted Rand-Index: 0.11
ag v 0.4422729501917088  ag rand 0.09857258803102673 ag unique label length 68




spectral v 0.3719033355912425 spectral adjustred rand 0.16119446397649706


done in 57.543926s
n_samples: 800, n_features: 1000

kMeans ++ run number: 5
Clustering sparse data with KMeans(max_iter=100, n_clusters=4, n_init=1, verbose=False)
done in 57.566s
K Means V-measure: 0.36518 Homogeneity: 0.33 Completeness: 0.41 Adjusted Rand-Index: 0.22
ag v 0.4422729501917088  ag rand 0.09857258803102673 ag unique label length 68




spectral v 0.3719033355912425 spectral adjustred rand 0.16119446397649706


done in 58.358649s
n_samples: 800, n_features: 1000

kMeans ++ run number: 6
Clustering sparse data with KMeans(max_iter=100, n_clusters=4, n_init=1, verbose=False)
done in 58.396s
K Means V-measure: 0.28337 Homogeneity: 0.25 Completeness: 0.33 Adjusted Rand-Index: 0.14
ag v 0.4422729501917088  ag rand 0.09857258803102673 ag unique label length 68




spectral v 0.37190333559124256 spectral adjustred rand 0.16119446397649706


done in 59.177052s
n_samples: 800, n_features: 1000

kMeans ++ run number: 7
Clustering sparse data with KMeans(max_iter=100, n_clusters=4, n_init=1, verbose=False)
done in 59.208s
K Means V-measure: 0.31098 Homogeneity: 0.28 Completeness: 0.35 Adjusted Rand-Index: 0.17
ag v 0.4422729501917088  ag rand 0.09857258803102673 ag unique label length 68




spectral v 0.3719033355912425 spectral adjustred rand 0.16119446397649706


done in 59.967965s
n_samples: 800, n_features: 1000

kMeans ++ run number: 8
Clustering sparse data with KMeans(max_iter=100, n_clusters=4, n_init=1, verbose=False)
done in 59.996s
K Means V-measure: 0.42599 Homogeneity: 0.40 Completeness: 0.45 Adjusted Rand-Index: 0.37
ag v 0.4422729501917088  ag rand 0.09857258803102673 ag unique label length 68




spectral v 0.3719033355912425 spectral adjustred rand 0.16119446397649706


done in 60.737643s
n_samples: 800, n_features: 1000

kMeans ++ run number: 9
Clustering sparse data with KMeans(max_iter=100, n_clusters=4, n_init=1, verbose=False)
done in 60.770s
K Means V-measure: 0.28337 Homogeneity: 0.25 Completeness: 0.33 Adjusted Rand-Index: 0.14
ag v 0.4422729501917088  ag rand 0.09857258803102673 ag unique label length 68




spectral v 0.3719033355912425 spectral adjustred rand 0.16119446397649706


done in 61.516581s
n_samples: 800, n_features: 1000

kMeans ++ run number: 10
Clustering sparse data with KMeans(max_iter=100, n_clusters=4, n_init=1, verbose=False)
done in 61.536s
K Means V-measure: 0.28096 Homogeneity: 0.24 Completeness: 0.33 Adjusted Rand-Index: 0.14
ag v 0.4422729501917088  ag rand 0.09857258803102673 ag unique label length 68




spectral v 0.37190333559124256 spectral adjustred rand 0.16119446397649706


2000 documents
5 categories

Extracting features from the training dataset using a sparse vectorizer
done in 62.956854s
n_samples: 2000, n_features: 1000

kMeans ++ run number: 0
Clustering sparse data with KMeans(max_iter=100, n_clusters=5, n_init=1, verbose=False)
done in 62.983s
K Means V-measure: 0.27960 Homogeneity: 0.28 Completeness: 0.28 Adjusted Rand-Index: 0.20
ag v 0.34844891544417994  ag rand 0.026296255757636302 ag unique label length 244




spectral v 0.45752184238815663 spectral adjustred rand 0.22565618071460306


done in 65.816595s
n_samples: 2000, n_features: 1000

kMeans ++ run number: 1
Clustering sparse data with KMeans(max_iter=100, n_clusters=5, n_init=1, verbose=False)
done in 65.866s
K Means V-measure: 0.32071 Homogeneity: 0.31 Completeness: 0.33 Adjusted Rand-Index: 0.22
ag v 0.34844891544417994  ag rand 0.026296255757636302 ag unique label length 244




spectral v 0.45752184238815663 spectral adjustred rand 0.22565618071460306


done in 68.519984s
n_samples: 2000, n_features: 1000

kMeans ++ run number: 2
Clustering sparse data with KMeans(max_iter=100, n_clusters=5, n_init=1, verbose=False)
done in 68.579s
K Means V-measure: 0.33857 Homogeneity: 0.33 Completeness: 0.35 Adjusted Rand-Index: 0.23
ag v 0.34844891544417994  ag rand 0.026296255757636302 ag unique label length 244




spectral v 0.4575218423881565 spectral adjustred rand 0.22565618071460306


done in 71.457569s
n_samples: 2000, n_features: 1000

kMeans ++ run number: 3
Clustering sparse data with KMeans(max_iter=100, n_clusters=5, n_init=1, verbose=False)
done in 71.505s
K Means V-measure: 0.36895 Homogeneity: 0.36 Completeness: 0.38 Adjusted Rand-Index: 0.24
ag v 0.34844891544417994  ag rand 0.026296255757636302 ag unique label length 244




spectral v 0.4575218423881565 spectral adjustred rand 0.22565618071460306


done in 74.224088s
n_samples: 2000, n_features: 1000

kMeans ++ run number: 4
Clustering sparse data with KMeans(max_iter=100, n_clusters=5, n_init=1, verbose=False)
done in 74.275s
K Means V-measure: 0.37726 Homogeneity: 0.37 Completeness: 0.39 Adjusted Rand-Index: 0.25
ag v 0.34844891544417994  ag rand 0.026296255757636302 ag unique label length 244




spectral v 0.4575218423881565 spectral adjustred rand 0.22565618071460306


done in 76.988747s
n_samples: 2000, n_features: 1000

kMeans ++ run number: 5
Clustering sparse data with KMeans(max_iter=100, n_clusters=5, n_init=1, verbose=False)
done in 77.035s
K Means V-measure: 0.28848 Homogeneity: 0.28 Completeness: 0.30 Adjusted Rand-Index: 0.18
ag v 0.34844891544417994  ag rand 0.026296255757636302 ag unique label length 244




spectral v 0.4575218423881565 spectral adjustred rand 0.22565618071460306


done in 79.863462s
n_samples: 2000, n_features: 1000

kMeans ++ run number: 6
Clustering sparse data with KMeans(max_iter=100, n_clusters=5, n_init=1, verbose=False)
done in 79.895s
K Means V-measure: 0.40811 Homogeneity: 0.40 Completeness: 0.42 Adjusted Rand-Index: 0.34
ag v 0.34844891544417994  ag rand 0.026296255757636302 ag unique label length 244




spectral v 0.45752184238815663 spectral adjustred rand 0.22565618071460306


done in 82.793085s
n_samples: 2000, n_features: 1000

kMeans ++ run number: 7
Clustering sparse data with KMeans(max_iter=100, n_clusters=5, n_init=1, verbose=False)
done in 82.834s
K Means V-measure: 0.33431 Homogeneity: 0.32 Completeness: 0.35 Adjusted Rand-Index: 0.19
ag v 0.34844891544417994  ag rand 0.026296255757636302 ag unique label length 244




spectral v 0.45752184238815646 spectral adjustred rand 0.22565618071460306


done in 85.695859s
n_samples: 2000, n_features: 1000

kMeans ++ run number: 8
Clustering sparse data with KMeans(max_iter=100, n_clusters=5, n_init=1, verbose=False)
done in 85.733s
K Means V-measure: 0.34102 Homogeneity: 0.34 Completeness: 0.35 Adjusted Rand-Index: 0.25
ag v 0.34844891544417994  ag rand 0.026296255757636302 ag unique label length 244




spectral v 0.45752184238815663 spectral adjustred rand 0.22565618071460306


done in 89.120314s
n_samples: 2000, n_features: 1000

kMeans ++ run number: 9
Clustering sparse data with KMeans(max_iter=100, n_clusters=5, n_init=1, verbose=False)
done in 89.156s
K Means V-measure: 0.26909 Homogeneity: 0.26 Completeness: 0.28 Adjusted Rand-Index: 0.17
ag v 0.34844891544417994  ag rand 0.026296255757636302 ag unique label length 244




spectral v 0.45752184238815663 spectral adjustred rand 0.22565618071460306


done in 91.847753s
n_samples: 2000, n_features: 1000

kMeans ++ run number: 10
Clustering sparse data with KMeans(max_iter=100, n_clusters=5, n_init=1, verbose=False)
done in 91.879s
K Means V-measure: 0.29551 Homogeneity: 0.29 Completeness: 0.30 Adjusted Rand-Index: 0.20
ag v 0.34844891544417994  ag rand 0.026296255757636302 ag unique label length 244




spectral v 0.45752184238815646 spectral adjustred rand 0.22565618071460306


500 documents
5 categories

Extracting features from the training dataset using a sparse vectorizer
done in 94.238744s
n_samples: 500, n_features: 1000

kMeans ++ run number: 0
Clustering sparse data with KMeans(max_iter=100, n_clusters=5, n_init=1, verbose=False)
done in 94.274s
K Means V-measure: 0.53623 Homogeneity: 0.52 Completeness: 0.55 Adjusted Rand-Index: 0.47
ag v 0.4859187389615588  ag rand 0.14473806286863766 ag unique label length 48




spectral v 0.4348447230835253 spectral adjustred rand 0.26999553131405774


done in 94.864144s
n_samples: 500, n_features: 1000

kMeans ++ run number: 1
Clustering sparse data with KMeans(max_iter=100, n_clusters=5, n_init=1, verbose=False)
done in 94.895s
K Means V-measure: 0.51913 Homogeneity: 0.50 Completeness: 0.54 Adjusted Rand-Index: 0.47
ag v 0.4859187389615588  ag rand 0.14473806286863766 ag unique label length 48




spectral v 0.4348447230835253 spectral adjustred rand 0.26999553131405774


done in 95.520720s
n_samples: 500, n_features: 1000

kMeans ++ run number: 2
Clustering sparse data with KMeans(max_iter=100, n_clusters=5, n_init=1, verbose=False)
done in 95.552s
K Means V-measure: 0.55410 Homogeneity: 0.54 Completeness: 0.57 Adjusted Rand-Index: 0.52
ag v 0.4859187389615588  ag rand 0.14473806286863766 ag unique label length 48




spectral v 0.4348447230835253 spectral adjustred rand 0.26999553131405774


done in 96.222429s
n_samples: 500, n_features: 1000

kMeans ++ run number: 3
Clustering sparse data with KMeans(max_iter=100, n_clusters=5, n_init=1, verbose=False)
done in 96.238s
K Means V-measure: 0.42346 Homogeneity: 0.37 Completeness: 0.50 Adjusted Rand-Index: 0.27
ag v 0.4859187389615588  ag rand 0.14473806286863766 ag unique label length 48




spectral v 0.4348447230835253 spectral adjustred rand 0.26999553131405774


done in 96.886145s
n_samples: 500, n_features: 1000

kMeans ++ run number: 4
Clustering sparse data with KMeans(max_iter=100, n_clusters=5, n_init=1, verbose=False)
done in 96.911s
K Means V-measure: 0.55885 Homogeneity: 0.54 Completeness: 0.58 Adjusted Rand-Index: 0.49
ag v 0.4859187389615588  ag rand 0.14473806286863766 ag unique label length 48




spectral v 0.4348447230835253 spectral adjustred rand 0.26999553131405774


done in 97.571661s
n_samples: 500, n_features: 1000

kMeans ++ run number: 5
Clustering sparse data with KMeans(max_iter=100, n_clusters=5, n_init=1, verbose=False)
done in 97.604s
K Means V-measure: 0.54168 Homogeneity: 0.52 Completeness: 0.57 Adjusted Rand-Index: 0.45
ag v 0.4859187389615588  ag rand 0.14473806286863766 ag unique label length 48




spectral v 0.4348447230835253 spectral adjustred rand 0.26999553131405774


done in 98.255294s
n_samples: 500, n_features: 1000

kMeans ++ run number: 6
Clustering sparse data with KMeans(max_iter=100, n_clusters=5, n_init=1, verbose=False)
done in 98.279s
K Means V-measure: 0.54772 Homogeneity: 0.53 Completeness: 0.57 Adjusted Rand-Index: 0.50
ag v 0.4859187389615588  ag rand 0.14473806286863766 ag unique label length 48




spectral v 0.4348447230835253 spectral adjustred rand 0.26999553131405774


done in 98.908092s
n_samples: 500, n_features: 1000

kMeans ++ run number: 7
Clustering sparse data with KMeans(max_iter=100, n_clusters=5, n_init=1, verbose=False)
done in 98.932s
K Means V-measure: 0.60671 Homogeneity: 0.61 Completeness: 0.61 Adjusted Rand-Index: 0.60
ag v 0.4859187389615588  ag rand 0.14473806286863766 ag unique label length 48




spectral v 0.4348447230835253 spectral adjustred rand 0.26999553131405774


done in 99.580067s
n_samples: 500, n_features: 1000

kMeans ++ run number: 8
Clustering sparse data with KMeans(max_iter=100, n_clusters=5, n_init=1, verbose=False)
done in 99.612s
K Means V-measure: 0.40785 Homogeneity: 0.38 Completeness: 0.44 Adjusted Rand-Index: 0.30
ag v 0.4859187389615588  ag rand 0.14473806286863766 ag unique label length 48




spectral v 0.4348447230835253 spectral adjustred rand 0.26999553131405774


done in 100.239454s
n_samples: 500, n_features: 1000

kMeans ++ run number: 9
Clustering sparse data with KMeans(max_iter=100, n_clusters=5, n_init=1, verbose=False)
done in 100.269s
K Means V-measure: 0.75691 Homogeneity: 0.76 Completeness: 0.76 Adjusted Rand-Index: 0.78
ag v 0.4859187389615588  ag rand 0.14473806286863766 ag unique label length 48




spectral v 0.4348447230835253 spectral adjustred rand 0.26999553131405774


done in 100.884470s
n_samples: 500, n_features: 1000

kMeans ++ run number: 10
Clustering sparse data with KMeans(max_iter=100, n_clusters=5, n_init=1, verbose=False)
done in 100.910s
K Means V-measure: 0.56980 Homogeneity: 0.55 Completeness: 0.60 Adjusted Rand-Index: 0.49
ag v 0.4859187389615588  ag rand 0.14473806286863766 ag unique label length 48




spectral v 0.4348447230835253 spectral adjustred rand 0.26999553131405774


2400 documents
6 categories

Extracting features from the training dataset using a sparse vectorizer
done in 102.418449s
n_samples: 2400, n_features: 1000

kMeans ++ run number: 0
Clustering sparse data with KMeans(max_iter=100, n_clusters=6, n_init=1, verbose=False)
done in 102.498s
K Means V-measure: 0.30243 Homogeneity: 0.30 Completeness: 0.31 Adjusted Rand-Index: 0.20
ag v 0.3860260438191469  ag rand 0.027404202097791736 ag unique label length 293




spectral v 0.5174385922830651 spectral adjustred rand 0.27582715050278406


done in 106.388479s
n_samples: 2400, n_features: 1000

kMeans ++ run number: 1
Clustering sparse data with KMeans(max_iter=100, n_clusters=6, n_init=1, verbose=False)
done in 106.436s
K Means V-measure: 0.30426 Homogeneity: 0.30 Completeness: 0.31 Adjusted Rand-Index: 0.19
ag v 0.3860260438191469  ag rand 0.027404202097791736 ag unique label length 293




spectral v 0.5174385922830652 spectral adjustred rand 0.27582715050278406


done in 110.299747s
n_samples: 2400, n_features: 1000

kMeans ++ run number: 2
Clustering sparse data with KMeans(max_iter=100, n_clusters=6, n_init=1, verbose=False)
done in 110.349s
K Means V-measure: 0.25179 Homogeneity: 0.25 Completeness: 0.25 Adjusted Rand-Index: 0.19
ag v 0.3860260438191469  ag rand 0.027404202097791736 ag unique label length 293




spectral v 0.5174385922830651 spectral adjustred rand 0.27582715050278406


done in 114.343656s
n_samples: 2400, n_features: 1000

kMeans ++ run number: 3
Clustering sparse data with KMeans(max_iter=100, n_clusters=6, n_init=1, verbose=False)
done in 114.405s
K Means V-measure: 0.30306 Homogeneity: 0.30 Completeness: 0.31 Adjusted Rand-Index: 0.19
ag v 0.3860260438191469  ag rand 0.027404202097791736 ag unique label length 293




spectral v 0.5174385922830652 spectral adjustred rand 0.27582715050278406


done in 118.646669s
n_samples: 2400, n_features: 1000

kMeans ++ run number: 4
Clustering sparse data with KMeans(max_iter=100, n_clusters=6, n_init=1, verbose=False)
done in 118.695s
K Means V-measure: 0.31058 Homogeneity: 0.30 Completeness: 0.32 Adjusted Rand-Index: 0.19
ag v 0.3860260438191469  ag rand 0.027404202097791736 ag unique label length 293




spectral v 0.5174385922830651 spectral adjustred rand 0.27582715050278406


done in 122.673289s
n_samples: 2400, n_features: 1000

kMeans ++ run number: 5
Clustering sparse data with KMeans(max_iter=100, n_clusters=6, n_init=1, verbose=False)
done in 122.724s
K Means V-measure: 0.28216 Homogeneity: 0.27 Completeness: 0.30 Adjusted Rand-Index: 0.16
ag v 0.3860260438191469  ag rand 0.027404202097791736 ag unique label length 293




spectral v 0.5174385922830651 spectral adjustred rand 0.27582715050278406


done in 126.792473s
n_samples: 2400, n_features: 1000

kMeans ++ run number: 6
Clustering sparse data with KMeans(max_iter=100, n_clusters=6, n_init=1, verbose=False)
done in 126.854s
K Means V-measure: 0.30698 Homogeneity: 0.30 Completeness: 0.31 Adjusted Rand-Index: 0.20
ag v 0.3860260438191469  ag rand 0.027404202097791736 ag unique label length 293




spectral v 0.5174385922830651 spectral adjustred rand 0.27582715050278406


done in 130.914458s
n_samples: 2400, n_features: 1000

kMeans ++ run number: 7
Clustering sparse data with KMeans(max_iter=100, n_clusters=6, n_init=1, verbose=False)
done in 130.973s
K Means V-measure: 0.40062 Homogeneity: 0.39 Completeness: 0.41 Adjusted Rand-Index: 0.27
ag v 0.3860260438191469  ag rand 0.027404202097791736 ag unique label length 293




spectral v 0.5174385922830652 spectral adjustred rand 0.27582715050278406


done in 134.742926s
n_samples: 2400, n_features: 1000

kMeans ++ run number: 8
Clustering sparse data with KMeans(max_iter=100, n_clusters=6, n_init=1, verbose=False)
done in 134.800s
K Means V-measure: 0.28576 Homogeneity: 0.28 Completeness: 0.29 Adjusted Rand-Index: 0.20
ag v 0.3860260438191469  ag rand 0.027404202097791736 ag unique label length 293




spectral v 0.5174385922830651 spectral adjustred rand 0.27582715050278406


done in 138.550708s
n_samples: 2400, n_features: 1000

kMeans ++ run number: 9
Clustering sparse data with KMeans(max_iter=100, n_clusters=6, n_init=1, verbose=False)
done in 138.598s
K Means V-measure: 0.25270 Homogeneity: 0.25 Completeness: 0.26 Adjusted Rand-Index: 0.14
ag v 0.3860260438191469  ag rand 0.027404202097791736 ag unique label length 293




spectral v 0.5174385922830652 spectral adjustred rand 0.27582715050278406


done in 142.321632s
n_samples: 2400, n_features: 1000

kMeans ++ run number: 10
Clustering sparse data with KMeans(max_iter=100, n_clusters=6, n_init=1, verbose=False)
done in 142.363s
K Means V-measure: 0.33763 Homogeneity: 0.33 Completeness: 0.35 Adjusted Rand-Index: 0.22
ag v 0.3860260438191469  ag rand 0.027404202097791736 ag unique label length 293




spectral v 0.5174385922830651 spectral adjustred rand 0.27582715050278406


1197 documents
6 categories

Extracting features from the training dataset using a sparse vectorizer
done in 145.676842s
n_samples: 1197, n_features: 1000

kMeans ++ run number: 0
Clustering sparse data with KMeans(max_iter=100, n_clusters=6, n_init=1, verbose=False)
done in 145.722s
K Means V-measure: 0.37705 Homogeneity: 0.36 Completeness: 0.39 Adjusted Rand-Index: 0.28
ag v 0.4415961712982154  ag rand 0.0884399259490647 ag unique label length 103




spectral v 0.2711336033669205 spectral adjustred rand 0.0636022107058925


done in 146.801976s
n_samples: 1197, n_features: 1000

kMeans ++ run number: 1
Clustering sparse data with KMeans(max_iter=100, n_clusters=6, n_init=1, verbose=False)
done in 146.837s
K Means V-measure: 0.38373 Homogeneity: 0.37 Completeness: 0.40 Adjusted Rand-Index: 0.27
ag v 0.4415961712982154  ag rand 0.0884399259490647 ag unique label length 103




spectral v 0.27113360336692055 spectral adjustred rand 0.0636022107058925


done in 148.337084s
n_samples: 1197, n_features: 1000

kMeans ++ run number: 2
Clustering sparse data with KMeans(max_iter=100, n_clusters=6, n_init=1, verbose=False)
done in 148.378s
K Means V-measure: 0.37812 Homogeneity: 0.34 Completeness: 0.42 Adjusted Rand-Index: 0.21
ag v 0.4415961712982154  ag rand 0.0884399259490647 ag unique label length 103




spectral v 0.27113360336692066 spectral adjustred rand 0.0636022107058925


done in 149.529493s
n_samples: 1197, n_features: 1000

kMeans ++ run number: 3
Clustering sparse data with KMeans(max_iter=100, n_clusters=6, n_init=1, verbose=False)
done in 149.561s
K Means V-measure: 0.38866 Homogeneity: 0.35 Completeness: 0.43 Adjusted Rand-Index: 0.20
ag v 0.4415961712982154  ag rand 0.0884399259490647 ag unique label length 103




spectral v 0.2716693952111827 spectral adjustred rand 0.0630821715677585


done in 150.769754s
n_samples: 1197, n_features: 1000

kMeans ++ run number: 4
Clustering sparse data with KMeans(max_iter=100, n_clusters=6, n_init=1, verbose=False)
done in 150.801s
K Means V-measure: 0.34966 Homogeneity: 0.31 Completeness: 0.41 Adjusted Rand-Index: 0.17
ag v 0.4415961712982154  ag rand 0.0884399259490647 ag unique label length 103




spectral v 0.2711336033669205 spectral adjustred rand 0.0636022107058925


done in 151.990240s
n_samples: 1197, n_features: 1000

kMeans ++ run number: 5
Clustering sparse data with KMeans(max_iter=100, n_clusters=6, n_init=1, verbose=False)
done in 152.025s
K Means V-measure: 0.35388 Homogeneity: 0.34 Completeness: 0.37 Adjusted Rand-Index: 0.21
ag v 0.4415961712982154  ag rand 0.0884399259490647 ag unique label length 103




spectral v 0.2711336033669205 spectral adjustred rand 0.0636022107058925


done in 153.362857s
n_samples: 1197, n_features: 1000

kMeans ++ run number: 6
Clustering sparse data with KMeans(max_iter=100, n_clusters=6, n_init=1, verbose=False)
done in 153.395s
K Means V-measure: 0.31457 Homogeneity: 0.29 Completeness: 0.34 Adjusted Rand-Index: 0.17
ag v 0.4415961712982154  ag rand 0.0884399259490647 ag unique label length 103




spectral v 0.2711336033669205 spectral adjustred rand 0.0636022107058925


done in 154.524802s
n_samples: 1197, n_features: 1000

kMeans ++ run number: 7
Clustering sparse data with KMeans(max_iter=100, n_clusters=6, n_init=1, verbose=False)
done in 154.557s
K Means V-measure: 0.36163 Homogeneity: 0.34 Completeness: 0.38 Adjusted Rand-Index: 0.21
ag v 0.4415961712982154  ag rand 0.0884399259490647 ag unique label length 103




spectral v 0.2711336033669205 spectral adjustred rand 0.0636022107058925


done in 155.665787s
n_samples: 1197, n_features: 1000

kMeans ++ run number: 8
Clustering sparse data with KMeans(max_iter=100, n_clusters=6, n_init=1, verbose=False)
done in 155.697s
K Means V-measure: 0.40652 Homogeneity: 0.36 Completeness: 0.47 Adjusted Rand-Index: 0.20
ag v 0.4415961712982154  ag rand 0.0884399259490647 ag unique label length 103




spectral v 0.2711336033669205 spectral adjustred rand 0.0636022107058925


done in 156.854626s
n_samples: 1197, n_features: 1000

kMeans ++ run number: 9
Clustering sparse data with KMeans(max_iter=100, n_clusters=6, n_init=1, verbose=False)
done in 156.896s
K Means V-measure: 0.43177 Homogeneity: 0.40 Completeness: 0.46 Adjusted Rand-Index: 0.33
ag v 0.4415961712982154  ag rand 0.0884399259490647 ag unique label length 103




spectral v 0.27113360336692066 spectral adjustred rand 0.0636022107058925


done in 158.120440s
n_samples: 1197, n_features: 1000

kMeans ++ run number: 10
Clustering sparse data with KMeans(max_iter=100, n_clusters=6, n_init=1, verbose=False)
done in 158.152s
K Means V-measure: 0.32548 Homogeneity: 0.30 Completeness: 0.36 Adjusted Rand-Index: 0.15
ag v 0.4415961712982154  ag rand 0.0884399259490647 ag unique label length 103




spectral v 0.2711336033669205 spectral adjustred rand 0.0636022107058925


total time 159.118971s
