In [None]:
import sTAD
import pandas as pd
import numpy as np
from colour import Color
from sklearn.metrics.pairwise import cosine_distances, euclidean_distances
import pydot
from IPython.display import Image, display
from Levenshtein import distance
import matplotlib as plt
from holoviews import opts

from importlib import reload

from bokeh.models import HoverTool
import matplotlib.pyplot as plt

In [None]:
# sTAD.debug = True

In [None]:
import holoviews as hv
hv.extension('bokeh')

np.set_printoptions(precision=1,linewidth=220)

# Load data

In [None]:
dataset = 'circles'
values = None
colours = []
dist_matrix = None

sample_size = 200

if ( dataset == 'sim' ):
    data = pd.read_csv('data/simulated.csv', header=0)
    values = data[['x','y']]
    dist_matrix = euclidean_distances(values)
elif (dataset == 'fork'):
    data = pd.read_csv('data/simulated_fork.csv', header=0)
elif (dataset == 'iris' ):
    data = pd.read_csv('data/iris.csv', header=0)
    values = list(map(lambda x:x[0:4], data.values))
    colours = list(map(lambda x:x[4], data.values))
    dist_matrix = euclidean_distances(values)
    data['lens'] = data['sepal_length']
    data = sTAD.assign_bins(data, 8)
elif (dataset == 'horse' ):
    horse = pd.read_csv('data/horse.csv', header=None)
    data = horse.sample(n=sample_size)
    data.columns = ['x','y','z']

    domain_min = np.min(data['x'])
    domain_max = np.max(data['x'])
    hues = list(sTAD.normalize(data['x'], domain_min, domain_max, 0, 1))

    colours = list(map(lambda x:Color(hsl=(x,1,0.5)).hex_l, hues))
    dist_matrix = euclidean_distances(data)
elif (dataset == 'eqtl'):
    data = pd.read_csv('data/compressed_eqtl_ambiguity.csv', header=None)[0]
    dist_matrix = levenshtein_matrix(data)
#     values = data[['CDH1','CDH10','CDH11','CDH19','PCDH1','PCDH10','PCDH17','PCDH19','PCDH8','CDH2','CDH22','CDH5','CDH6','CDH7','CDH9']]
#     data['lens'] = data['CDH1']
#     data = sTAD.assign_bins(data,20)
elif (dataset == 'circles'):
    data = pd.read_csv('data/five_circles.csv', header=0)
    data['lens'] = data['hue']
    values = data[['x','y']]
    data = sTAD.assign_bins(data, 8)
    dist_matrix = euclidean_distances(values)
else:
    raise Exception("Unknown dataset")

In [None]:
data

In [None]:
tooltips = [
    ('id', '@id'),
    ('x', '@x'),
    ('y', '@y'),
    ('bin', '@bin')
]
hover = HoverTool(tooltips=tooltips)

In [None]:
%%opts Points [width=600, tools=[hover]]
points = hv.Points(data, ['x','y'], extents=(0,0,1200,1200))
points.opts(size=5, color='hue')
points

In [None]:
dist_matrix = sTAD.normalize_matrix(dist_matrix)

# Without lens

In [None]:
mst_graph, mst, non_mst, cmdm, dm_distances = sTAD.create_mst(dist_matrix)

In [None]:
reload(sTAD)
sTAD.debug = True

In [None]:
history_x, history_y, history_graph = sTAD.create_complete_plot(dist_matrix, res=40)

In [None]:
%%opts Scatter [width=600, tools=["hover"]]
hv.Scatter((sTAD.history_x,sTAD.history_y), extents=(-100,0.8,5000,1))

In [None]:
reload(sTAD)
result = sTAD.find_stad_optimum(dist_matrix)

In [None]:
hv.Curve(sTAD.history_y, extents=(-10,0.88,350,1))

In [None]:
g = sTAD.create_network(result[0], mst, non_mst)

In [None]:
import csv
g.vs['color'] = data['hue']
g.vs['bin'] = list(map(lambda x:str(x), (data['bin'])))
with open(dataset + '_' + str(result[0]) + '_nodes.csv', 'w') as f:
    file_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    file_writer.writerow(['id','name','color','bin'])
    counter = 0
    for v in g.vs:
        file_writer.writerow([v.index, v.index, v['color'], v['bin']])
        counter += 1
with open(dataset + '_' + str(result[0]) + '_edges.csv', 'w') as f:
    file_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    file_writer.writerow(['source','target'])
    for e in g.es:
        file_writer.writerow([e.tuple[0], e.tuple[1]])

# With lens

In [None]:
dist_matrix_with_lens = sTAD.alter_dist_matrix_1step(dist_matrix, data)

In [None]:
mst_graph_lens, mst_lens, non_mst_lens, cmdm_lens, dm_distances_lens = sTAD.create_mst(dist_matrix_with_lens)

In [None]:
history_x, history_y, history_graph = sTAD.create_complete_plot(dist_matrix_with_lens, res=40)

In [None]:
hv.Curve(sTAD.history_y, extents=(-10,0.1,50,1))

In [None]:
%%opts Scatter [width=600, tools=["hover"]]
hv.Scatter((sTAD.history_x, sTAD.history_y), extents=(0,0,100000,1))

In [None]:
result = sTAD.find_stad_optimum(dist_matrix_with_lens)

In [None]:
hv.Curve(sTAD.history_y, extents=(-10,0,350,0.4))

In [None]:
print(result)

In [None]:
g = sTAD.create_network(result[0], mst_lens, non_mst_lens)

In [None]:
import csv
g.vs['color'] = data['hue']
g.vs['bin'] = list(map(lambda x:str(x), (data['bin'])))
with open(dataset + '_' + str(result[0]) + '_lens_nodes.csv', 'w') as f:
    file_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    file_writer.writerow(['id','name','color','bin'])
    counter = 0
    for v in g.vs:
        file_writer.writerow([v.index, v.index, v['color'], v['bin']])
        counter += 1
with open(dataset + '_' + str(result[0]) + '_lens_edges.csv', 'w') as f:
    file_writer = csv.writer(f, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    file_writer.writerow(['source','target'])
    for e in g.es:
        file_writer.writerow([e.tuple[0], e.tuple[1]])