In [1]:
import numpy as np
import pandas as pd

from sklearn import manifold

import plotly.offline as py
import plotly.graph_objs as go

from time import time

In [2]:
def read_arianta_csv(csv_path):
    
    df = pd.read_csv(
        csv_path,
        index_col=0,
        header=0,
        delimiter=';',       
        error_bad_lines=False, 
        dtype='unicode'
        
    )
    
    categories = [s.strip() for s in df.columns.tolist()]
    titles = [s.strip() for s in df.index.tolist()]
    data = df.as_matrix()
    
    return data, categories, titles   #   , titles

In [3]:
def scatterplot(data, labels, title):
    
    trace = go.Scatter(
        x = data[:,0],
        y = data[:,1],
        mode = 'markers',
        text = labels
    )

    layout = go.Layout(
        title = title,
        hovermode= 'closest'
    )
    
    return go.Figure(data=[trace], layout=layout)

In [4]:
methods = {
#  'Isomap': manifold.Isomap(n_neighbors=10, n_components=2),
#  'MDS': manifold.MDS(n_components=2),
#   'Spectral Clustering': manifold.SpectralEmbedding(n_components=2, n_neighbors=10),
    't-SNE': manifold.TSNE(n_components=2)
}

In [5]:
def handle(map_for, weights_type, X, labels):
    
    for method_name, method in methods.items():
    
        t1 = time()

        Y = method.fit_transform(X)

        t2 = time()
        dt = round(t2 - t1, 2)

        title = f'{map_for}, {weights_type}, {method_name}, {dt:.2f}s'
        filename = f'arianta-{map_for}-{weights_type}-{method_name}'.replace(' ', '-').lower()
        
        df = pd.DataFrame(data=Y, index=labels, columns=['x', 'y'])
        
        df.to_csv(f'{filename}.csv', encoding='utf-8')
        
        fig = scatterplot(Y, labels, title)
        
        py.plot(fig, filename=f"{filename}.html")

In [6]:
X, category_labels, title_labels = read_arianta_csv('param_new_disc44.csv')


Method .as_matrix will be removed in a future version. Use .values instead.



In [7]:
#handle('kategorie', 'nieważone', X.transpose(), category_labels)

In [8]:
handle('publikacje', 'nieważone', X, title_labels)
#handle('publikacje', 'ważone', X, title_labels)