In [1]:
import numpy as np
import pandas as pd
import yaml
from brand import initializeRedisFromYAML
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split

from utils import decode_field, get_lagged_features, smooth_data

In [2]:
# Connect to Redis
r = initializeRedisFromYAML('replayTest.yaml')

connecting to Redis using: replayTest.yaml
Redis Socket Path /var/run/redis.sock
Initialized Redis


In [3]:
with open('stream_spec.yaml', 'r') as f:
    stream_spec = yaml.safe_load(f)

In [4]:
# Load data from Redis
# taskInput
all_entries = r.xread({b'taskInput': 0})
stream_entries = all_entries[0][1]
stream_data = [entry[1] for entry in stream_entries]

task_input = pd.DataFrame(stream_data)
for field in ['timestamps', 'samples']:
    task_input[field] = task_input[field.encode()].apply(
        decode_field, stream='taskInput', field=field, stream_spec=stream_spec)
task_input = task_input.set_index('timestamps')

# thresholdCrossings
all_entries = r.xread({b'thresholdCrossings': 0})
stream_entries = all_entries[0][1]
stream_data = [entry[1] for entry in stream_entries]

threshold_crossings = pd.DataFrame(stream_data)
for field in ['timestamps', 'crossings']:
    threshold_crossings[field] = threshold_crossings[field.encode()].apply(
        decode_field,
        stream='thresholdCrossings',
        field=field,
        stream_spec=stream_spec)

# Separate channels into their own columns
tc_timestamps = threshold_crossings['timestamps'].values
crossings = np.stack(threshold_crossings['crossings'])
n_chans = crossings.shape[1]

channel_labels = [f'ch{i :03d}' for i in range(n_chans)]
tc_df = pd.DataFrame(crossings,
                     index=tc_timestamps + 13,
                     columns=channel_labels)

In [5]:
joined_df = task_input.join(tc_df, how='inner')
joined_df.index = pd.to_timedelta(joined_df.index / 30, unit='ms')
joined_df

Unnamed: 0,b'timestamps',b'BRANDS_time',b'udp_recv_time',b'samples',samples,ch000,ch001,ch002,ch003,ch004,...,ch086,ch087,ch088,ch089,ch090,ch091,ch092,ch093,ch094,ch095
00:45:15.803000,b'*2\xdb\x04',b'\\\x0f\x84a\x00\x00\x00\x00\n\xc0\x0e\x00\x0...,b'\\\x0f\x84a\x00\x00\x00\x00\x05\xc0\x0e\x00\...,b'\n\x01\x0f\x00\xf7\xff',"[266, 15, -9]",0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
00:45:15.804000,b'H2\xdb\x04',b'\\\x0f\x84a\x00\x00\x00\x00\x91\xc3\x0e\x00\...,b'\\\x0f\x84a\x00\x00\x00\x00\x8c\xc3\x0e\x00\...,b'\x0b\x01\r\x00\xfa\xff',"[267, 13, -6]",0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
00:45:15.805000,b'f2\xdb\x04',b'\\\x0f\x84a\x00\x00\x00\x00{\xc7\x0e\x00\x00...,b'\\\x0f\x84a\x00\x00\x00\x00u\xc7\x0e\x00\x00...,b'\t\x01\x0f\x00\xf7\xff',"[265, 15, -9]",0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
00:45:15.806000,b'\x842\xdb\x04',b'\\\x0f\x84a\x00\x00\x00\x00c\xcb\x0e\x00\x00...,b'\\\x0f\x84a\x00\x00\x00\x00^\xcb\x0e\x00\x00...,b'\t\x01\x12\x00\xf8\xff',"[265, 18, -8]",0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
00:45:15.807000,b'\xa22\xdb\x04',b'\\\x0f\x84a\x00\x00\x00\x00L\xcf\x0e\x00\x00...,b'\\\x0f\x84a\x00\x00\x00\x00G\xcf\x0e\x00\x00...,b'\x0b\x01\x0e\x00\xf9\xff',"[267, 14, -7]",0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
00:46:43.638000,b'Tg\x03\x05',b'\xb4\x0f\x84a\x00\x00\x00\x00\t@\x0c\x00\x00...,b'\xb4\x0f\x84a\x00\x00\x00\x00\x05@\x0c\x00\x...,b'\t\x01\x1e\x00\xc7\x05',"[265, 30, 1479]",0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
00:46:43.639000,b'rg\x03\x05',b'\xb4\x0f\x84a\x00\x00\x00\x00\xefC\x0c\x00\x...,b'\xb4\x0f\x84a\x00\x00\x00\x00\xebC\x0c\x00\x...,b'\x03\x01\x1f\x00\xc0\x05',"[259, 31, 1472]",0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
00:46:43.640000,b'\x90g\x03\x05',b'\xb4\x0f\x84a\x00\x00\x00\x00\xd8G\x0c\x00\x...,b'\xb4\x0f\x84a\x00\x00\x00\x00\xd4G\x0c\x00\x...,b'\t\x01\x1f\x00\xb7\x05',"[265, 31, 1463]",0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
00:46:43.641000,b'\xaeg\x03\x05',b'\xb4\x0f\x84a\x00\x00\x00\x00\xbfK\x0c\x00\x...,b'\xb4\x0f\x84a\x00\x00\x00\x00\xbcK\x0c\x00\x...,b'\x0c\x01#\x00\xca\x05',"[268, 35, 1482]",0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [6]:
samples = np.stack(joined_df['samples'])
joined_df['touch'] = samples[:, 0]
joined_df['x'] = samples[:, 1]
joined_df['y'] = samples[:, 2]

In [7]:
bin_size_ms = 5  # ms
gauss_width_ms = 20  # ms, for smoothing
binned_data = joined_df.resample(f'{bin_size_ms :d}ms').sum()

In [8]:
smoothed_data = smooth_data(binned_data[channel_labels].values,
                            bin_size=bin_size_ms,
                            gauss_width=gauss_width_ms)

X = get_lagged_features(smoothed_data, n_history=3)
y = binned_data[['x', 'y']].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

mdl = Ridge()
mdl.fit(X_train, y_train)

print(f'Train R^2: {mdl.score(X_train, y_train)}')
print(f'Test R^2: {mdl.score(X_test, y_test)}')

Train R^2: 0.39499259999416664
Test R^2: 0.38659503110922183
