In [None]:
import sys
IN_COLAB = 'google.colab' in sys.modules

if IN_COLAB:
    ! wget -O sensor.zip https://www.dropbox.com/sh/2umy3ajl9clwpyx/AAD1wZ2lRtvwFeZLQcMFeSMea?dl=1
    ! unzip sensor.zip
    ! pip install pandas numpy plotly matplotlib
    
    data_path = '.'
else:
    try:
        from local_settings import data_path  # noqa
    except ImportError:
        data_path = '.'

## Load dataframe

import os

import numpy as np
import pandas as pd

import plotly
import plotly.graph_objects as go
import torch
from torch.nn.functional import conv1d
from sklearn.decomposition import PCA

plotly.offline.init_notebook_mode()


def load_files(path, file_names=None, return_file_names=False):
    if file_names is None:
        file_names = [fn for fn in os.listdir(path) if fn.endswith('.txt')]
        if return_file_names:
            return file_names

    columns = ['timestamp', 'MQ2', 'MQ3', 'MQ4', 'MQ5', 'MQ7', 'MQ8', 'MQ9', 'MQ135', 
               'Humidity', 'Temperature']
    df = pd.concat([
        pd.read_csv(os.path.join(path, fn), delimiter="\t", on_bad_lines='skip', names=columns, header=None)
        for fn in file_names], ignore_index=True)
    df.timestamp = pd.to_datetime(df.timestamp)
    df = df.sort_values('timestamp').reset_index(drop=True)
    df = df[df.timestamp.notna()]
    if not len(df):
        raise RuntimeError('No data')
    return df


fns = load_files(data_path, return_file_names=True)

df = load_files(data_path)  # , [fn for fn in fns if fn.startswith('2022')])

def scatter(data_frame, coordinate_functions, color_function=None, size_function=None, **kwargs):
    if not color_function:
        color_function = lambda x: pd.to_numeric(x.timestamp) // 1e9
    if not callable(color_function):
        cf = color_function
        color_function = lambda _: cf
    if not size_function:
        size_function = 2
    if not callable(size_function):
        sf = size_function
        size_function = lambda _: sf
    kwargs.update({
        'marker': dict(
            color=color_function(data_frame),
            size=size_function(data_frame)),
        'customdata': data_frame[data_frame.columns],
        'hovertemplate': '<br>'.join([str(k) + ':%{customdata[' + str(i) + ']}'
                                      for i, k in enumerate(data_frame.columns)])
    })

    if len(coordinate_functions) == 2:
        return go.Scatter(**{label: f(data_frame)
                             for label, f in zip(['x', 'y'], coordinate_functions)}, **kwargs)
    if len(coordinate_functions) == 3:
        return go.Scatter3d(**{label: f(data_frame)
                               for label, f in zip(['x', 'y', 'z'], coordinate_functions)}, **kwargs)

In [None]:
# dataframe for rose sealed in the box
df1 = df[(df.timestamp > "2022-07-08T09:18")*(df.timestamp < "2022-07-13T09:18")]

data = np.array(df1.drop('timestamp', axis=1))
filters = [np.sqrt(1+n) * np.cos(n * np.arange(1000) / 1000 * 2 * np.pi) for n in range(10)] + \
          [np.sqrt(1+n) * np.sin(n * np.arange(1000) / 1000 * 2 * np.pi) for n in range(1, 10)]
ft = conv1d(torch.Tensor(data.transpose())[:, None, :], torch.Tensor(filters)[:, None, :], padding='same').numpy()

pca_ft = PCA(10)
components = pca_ft.fit_transform(ft.reshape(ft.shape[0]*ft.shape[1], ft.shape[2]).transpose())
components = pd.DataFrame(components)
components = components.assign(timestamp=np.array((df1.timestamp)))

In [None]:
q = components[::100][5:-5]

fig = go.Figure()
for i in range(10):
    coordinate_functions = [lambda x: x.timestamp,
                            lambda x: 2*i + x[i] / np.std(x[i])]
    fig.add_trace(scatter(q, coordinate_functions, size_function=2, mode='lines'))

fig['layout'].update(margin=dict(l=0, r=0, b=0, t=10))
fig.show()

In [None]:
# dataframe for rose sealed in the box
df1 = df[(df.timestamp > "2022-07-08T09:18")*(df.timestamp < "2022-07-13T09:18")]

data = np.array(df1.drop('timestamp', axis=1))
filters = [np.sqrt(1+n) * np.cos(n * np.arange(1000) / 1000 * 2 * np.pi) for n in range(10)] + \
          [np.sqrt(1+n) * np.sin(n * np.arange(1000) / 1000 * 2 * np.pi) for n in range(1, 10)]
ft = conv1d(torch.Tensor(data.transpose())[:, None, :], torch.Tensor(filters)[:, None, :], padding='same').numpy()

pca_ft = PCA(10)
components = pca_ft.fit_transform(ft.reshape(ft.shape[0]*ft.shape[1], ft.shape[2]).transpose())
components = pd.DataFrame(components)
components = components.assign(timestamp=np.array((df1.timestamp)))

In [None]:
q = components[500:-500][::10]

coordinate_functions = [lambda x: x[4],
                        lambda x: x[3]]

tick = len(q) // 10 + 1
color = pd.to_numeric(q.timestamp)
hours = (pd.to_numeric(q.timestamp) - pd.to_numeric(q.timestamp)[500]) * 1.01 / 1e9 // 3600
hours = hours.apply(int).apply(str) + 'h'
base_scatter = go.Scatter(x=coordinate_functions[0](q), 
                         y=coordinate_functions[1](q), 
                         marker=dict(
                             color=color, 
                             size=5,
                             colorbar=dict(
                                 title='',
                                 tickmode="array",
                                 tickvals=color[::tick],
                                 ticktext=hours[::tick],  # q.timestamp[::tick].dt.strftime('%H:%M %B %d'),
                                 ticks="outside",
                                 orientation='h',
                                 thickness=5,
                                 tickfont={'size':30}
                             ),
                             colorscale='Turbo'), 
                         mode='markers',
                         customdata=q,
                         hovertemplate='<br>'.join([str(k) + ':%{customdata[' + str(i) + ']}'
                                                       for i, k in enumerate(q.columns)]))

base_fig = go.Figure()
base_fig.add_trace(base_scatter)
base_fig['layout'].update(
    margin=dict(l=0, r=0, b=0, t=10),
    showlegend=False)

base_fig