In [None]:
import pandas as pd
import numpy as np
import glob

from pathlib import Path, PurePath
from sklearn.manifold import TSNE
from bokeh.plotting import figure, show, output_file
from bokeh.models import ColumnDataSource
from smartula_sound import SmartulaSound

In [None]:
folder_name = "csv/mfcc-electromagnetic-field"
data_folder = Path(folder_name)
files_to_open = data_folder / "*.csv"

all_filenames = [i for i in glob.glob(str(files_to_open))]
# all_filenames = all_filenames[:10]
list_of_ss_mfcc = [SmartulaSound(PurePath(f).name.split(" ")[1].replace(".csv", ""),
                                 PurePath(f).name.split(" ")[0],
                                 samples=None, mfcc=np.ravel(pd.read_csv(f, header=None)))
                   for f in all_filenames]

In [9]:
mfccs_embedded = TSNE(n_components=2, perplexity=5, learning_rate=300, n_iter=5000, verbose=1) \
    .fit_transform([ss.mfcc_feature_vector for ss in list_of_ss_mfcc])

data_frame = pd.DataFrame()
data_frame['x'] = mfccs_embedded[:, 0]
data_frame['y'] = mfccs_embedded[:, 1]
data_frame['elfield'] = [ss.electromagnetic_field_on for ss in list_of_ss_mfcc]
data_frame['timestamp'] = [ss.timestamp for ss in list_of_ss_mfcc]


[t-SNE] Computing 16 nearest neighbors...
[t-SNE] Indexed 1071 samples in 0.002s...
[t-SNE] Computed neighbors for 1071 samples in 0.061s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1071
[t-SNE] Computed conditional probabilities for sample 1071 / 1071
[t-SNE] Mean sigma: 2.957399
[t-SNE] KL divergence after 250 iterations with early exaggeration: 87.690292
[t-SNE] KL divergence after 5000 iterations: 1.282760


## Data Visualization

In [11]:
data_frame['colors'] = ["#003399" if elfield == "True" else "#ff0000" for elfield in data_frame['elfield']]
source = ColumnDataSource(data=data_frame)

tools = "hover,pan,wheel_zoom,zoom_in,zoom_out,box_zoom,undo,redo,reset,tap,save,box_select," \
        "poly_select,lasso_select, "
tooltips = [
    ("timestamp", "@timestamp"),
    ("class", "@elfield")
]
p = figure(tools=tools, tooltips=tooltips)
p.scatter(x='x', y='y', fill_color='colors', fill_alpha=0.4, source=source, size=10, line_color=None)
output_file("color_scatter.html", title="color_scatter.py example")
show(p)  # open a browser