In [1]:
import pandas as pd
import numpy as np
import glob

from pathlib import Path, PurePath
from sklearn.manifold import TSNE
from bokeh.plotting import figure, show, output_file
from bokeh.models import ColumnDataSource
from smartula_sound import SmartulaSound

## MFCC 

In [None]:
folder_name = "csv/mfcc-electromagnetic-field"


## LPC

In [2]:
folder_name = "csv/lpc-electromagnetic-field"

In [3]:
data_folder = Path(folder_name)
files_to_open = data_folder / "*.csv"

all_filenames = [i for i in glob.glob(str(files_to_open))]
# all_filenames = all_filenames[:10]
list_of_features = [SmartulaSound(PurePath(f).name.split(" ")[1].replace(".csv", ""),
                                 PurePath(f).name.split(" ")[0],
                                 samples=None, features=np.ravel(pd.read_csv(f, header=None)))
                   for f in all_filenames]

## Dimension reduction

In [4]:
df_embedded = TSNE(n_components=2, perplexity=10, learning_rate=300, n_iter=3000, verbose=2) \
    .fit_transform([ss.features for ss in list_of_features])


[t-SNE] Computing 31 nearest neighbors...
[t-SNE] Indexed 1071 samples in 0.002s...
[t-SNE] Computed neighbors for 1071 samples in 0.053s...
[t-SNE] Computed conditional probabilities for sample 1000 / 1071
[t-SNE] Computed conditional probabilities for sample 1071 / 1071
[t-SNE] Mean sigma: 0.071363
[t-SNE] Computed conditional probabilities in 0.029s
[t-SNE] Iteration 50: error = 83.4197159, gradient norm = 0.3956720 (50 iterations in 0.547s)
[t-SNE] Iteration 100: error = 79.9394379, gradient norm = 0.3881147 (50 iterations in 0.508s)
[t-SNE] Iteration 150: error = 81.0683441, gradient norm = 0.3873584 (50 iterations in 0.480s)
[t-SNE] Iteration 200: error = 80.0780029, gradient norm = 0.3916182 (50 iterations in 0.542s)
[t-SNE] Iteration 250: error = 79.3510971, gradient norm = 0.3916323 (50 iterations in 0.469s)
[t-SNE] KL divergence after 250 iterations with early exaggeration: 79.351097
[t-SNE] Iteration 300: error = 1.6594059, gradient norm = 0.0017629 (50 iterations in 0.339s)

In [None]:
from sklearn.decomposition import PCA

pca = PCA(n_components=2)
df_embedded = pca.fit_transform([ss.features for ss in list_of_features])

In [5]:
data_frame = pd.DataFrame()
data_frame['x'] = df_embedded[:, 0]
data_frame['y'] = df_embedded[:, 1]
data_frame['elfield'] = [ss.electromagnetic_field_on for ss in list_of_features]
data_frame['timestamp'] = [ss.timestamp for ss in list_of_features]

## Data Visualization

In [6]:
data_frame['colors'] = ["#003399" if elfield == "True" else "#ff0000" for elfield in data_frame['elfield']]
source = ColumnDataSource(data=data_frame)

tools = "hover,pan,wheel_zoom,zoom_in,zoom_out,box_zoom,undo,redo,reset,tap,save,box_select," \
        "poly_select,lasso_select, "
tooltips = [
    ("timestamp", "@timestamp"),
    ("class", "@elfield")
]
p = figure(tools=tools, tooltips=tooltips)
p.scatter(x='x', y='y', fill_color='colors', fill_alpha=0.4, source=source, size=10, line_color=None)
output_file("color_scatter.html", title="color_scatter.py example")
show(p)  