In [45]:
import numpy as np
from scipy import signal
import scipy.io as sio
from math import modf
from itertools import combinations

First we need to create a global variable for the sample rate of the UniMiB-SHAR dataset, and how many features we are using (to be changed later!)

In [46]:
DT = 0.02
FS = 1/DT
NUM_FEATURES = 7

Then we need to create functions to extract features from an accelerometer stream

In [47]:
def get_mean(sig):
    return np.mean(sig)

def get_std(sig):
    return np.std(sig)

def get_variance(sig):
    return get_std(sig)**2

def get_mad(sig):
    m = get_mean(sig)
    sig = [ abs(x - m) for x in sig ]
    return get_mean(sig)

def get_iqr(sig):
    q75, q25 = np.percentile(sig, [75 ,25])
    return q75 - q25

def get_energy(sig):
    # Not sure what nperseg should be here
    f_welch, S_xx_welch = signal.welch(sig, fs=FS, nperseg=len(sig)/2)
    df_welch = f_welch[1] - f_welch[0]
    dt = 1/FS
    f_fft = np.fft.fftfreq(len(sig), d=dt)
    df_fft = f_fft[1] - f_fft[0]
    E_welch = (1. / dt) * (df_welch / df_fft) * np.sum(S_xx_welch)
    return E_welch

def get_power(sig):
    # Not sure what nperseg should be here
    f_welch, S_xx_welch = signal.welch(sig, fs=FS, nperseg=len(sig)/2)
    df_welch = f_welch[1] - f_welch[0]
    P_welch = np.sum(S_xx_welch) * df_welch
    return P_welch

Then we write a function to return a vector of these features, given a window (section of an accelerometer stream),

And a function to create a matrix of these feature vectors for an accelerometer stream

In [48]:
def get_feature_vector(sig):
    mean     = get_mean(sig)
    std      = get_std(sig)
    variance = get_variance(sig)
    mad      = get_mad(sig)
    iqr      = get_iqr(sig)
    energy   = get_energy(sig)
    power    = get_power(sig)
    return np.array([mean, std, variance, mad, iqr, energy, power])

def get_feature_matrix(sig, w):
    num_windows = int(len(sig) / w)
    windows = split(sig, w)

    matrix = np.empty([num_windows, NUM_FEATURES])

    index = 0
    for window in windows:
        f = get_feature_vector(window)
        matrix[index] = f
        index += 1
    return matrix

A few helper functions...

In [49]:
def mag(sig):
    return np.linalg.norm(sig)

def split(sig, w):
    num_windows = float(len(sig)) / w
    dec, i = modf(num_windows)
    if num_windows != int(num_windows):
        cutoff = dec * w
        last = int(round(-1*cutoff))
        sig = sig[:last]
        num_windows = i
    return np.split(sig, num_windows)

def get_c(w, coherence_window):
    c = int((coherence_window) / (w * DT))
    return c

Then the signal processing functions to get the coherence of two vectors, the normalized coherence of two signals, and the normalized coherence matrix of two feature matrices

In [50]:
def coherence(sig1, sig2):
    coherence = signal.coherence(sig1, sig2
        , FS    # I think this may be wrong because when we compute coherence its between two coherence windows containing samples, so should it be DT or window length? Also not sure how it affects the math at all
        , nperseg=len(sig1)/2) # Not sure what nperseg should be
    return coherence

def N_signal(sig1, sig2, phi_max):
    f, C_xy = coherence(sig1, sig2)
    f[f < 10]
    C_xy = C_xy[:len(f)]
    return 1/float(phi_max) * np.sum(C_xy)

def N_matrix(A, B, c, phi_max):
    num_windows = len(A)
    rows = num_windows - (c - 1)
    matrix = np.empty([rows, NUM_FEATURES])

    for f in range(0, 7):
        A_feature = np.transpose(A)[f]
        B_feature = np.transpose(B)[f]

        for k in range(0, rows):
            A_samples = A_feature[k:k+c]
            B_samples = B_feature[k:k+c]
            cell = N_signal(A_samples, B_samples, phi_max)

            matrix[k][f] = cell

    return matrix

Now we write code to go through the dataset and choose every combination of two accelerometer streams

(REALLY NEED TO ADD VISUALIZATIONS HERE)

First a function to split the each person's trials into separate arrays

In [51]:
def get_all_trials(data):
    all_trials = []
    
    # append each magnitude stream to all_trials
    for p in range(len(data)):
      for t in range(len(data[p])):
        mag = data[p][t]
        all_trials.append([p, mag])
    
    return all_trials

Then a function that returns every possible pair of trials from an array of all the trials

In [52]:
def get_pairs(trials):
    return list(combinations(trials, 2))

Then a function to get the coherence matrix given two trials and a function to loop through all the pairs and get the coherence matrix and label it

In [53]:
def get_coherence_matrix(w, c, trial1, trial2):
    person1 = trial1[0]
    sig1 = trial1[1]
    person2 = trial2[0]
    sig2 = trial2[1]
    
    short = min(len(sig1), len(sig2))
    sig1 = sig1[:short]
    sig2 = sig2[:short]
    
    A = get_feature_matrix(sig1, w)
    B = get_feature_matrix(sig2, w)
    phi_max = 10
    return N_matrix(A, B, c, phi_max)

def process(pairs, w, c):
    labelled_data = []
    for pair in pairs:
        print(str(pair[0][0]) + " " + str(pair[1][0]))
        labelled_data.append([get_coherence_matrix(w, c, *pair), (pair[0][0] == pair[1][0])])
    return labelled_data

From here, we need to split each labelled coherence matrix by its rows and label them as well, in this function

In [54]:
def get_all_rows(labelled):
    all_rows = []
    for row in labelled:
        new_row = []
        matrix = row[0]
        label = row[1]
        for fc in matrix:
            all_rows.append([list(fc), label])
    return all_rows

Then we can finally generate labelled data that we can use with skearn classifiers!

To do so we first load the raw dataset. Then we specify parameters for the dataset generation (number of people, window length, and coherence window length) then through several loops we generate and save each datafile with a name describing the parameters used to create it.

In [55]:
mat = sio.loadmat('./data/UniMiB-SHAR/data/full_data.mat')
full_data = mat['full_data']

people = [30]
activities = [3]
windows = [4, 8]
coherence_windows = [7, 9]

for p in people:
    data = full_data[:p]
    if p > 18:
        data = np.delete(data, (19), axis=0) # delete person 19 because short stream
    
    new_data = []
    # Loop through dataset and append trials to new_data
    for person_index in range(len(data)):
        accel_data = data[person_index][0][0][0]
        
        # Loop through trials and append magnitude streams to trial list
        trials = []
        for activity_index in activities:
            activity = accel_data[activity_index]
            for t in range(len(activity)):
                trial = activity[t][0]
                magnitude = trial[5]
                trials.append(magnitude)
              
        new_data.append(trials)
    
    print(len(new_data))
    print(len(new_data[0]))
    
    trials = get_all_trials(new_data)
    pairs = get_pairs(trials)
    
    for w in windows:
        for cw in coherence_windows:
            filename = 'p' + str(p) + '_a' + str(activities) + '_w' + str(w) + '_cw' + str(cw) + '.npy'
            try:
                print(filename)
                
                c = get_c(w, cw)
                
                labelled = process(pairs, w, c)
                
                rows = get_all_rows(labelled)
                
                np.save('./data/running/nperseg2/' + filename, rows)
            except KeyboardInterrupt:
                raise
            except Exception as e:
                print(e)
                print('- ' + filename + ' skipped')
                continue
            finally:
                print('- ' + filename + ' finished')

29
2
p30_a[3]_w4_cw7.npy
0 0
0 1
0 1
0 2
0 2
0 3
0 3
0 4
0 4
0 5
0 5
0 6
0 6
0 7
0 7
0 8
0 8
0 9
0 9
0 10
0 10
0 11
0 11
0 12
0 12
0 13
0 13
0 14
0 14
0 15
0 15
0 16
0 16
0 17
0 17
0 18
0 18
0 19
0 19
0 20
0 20
0 21
0 21
0 22
0 22
0 23
0 23
0 24
0 24
0 25
0 25
0 26
0 26
0 27
0 27
0 28
0 28
0 1
0 1
0 2
0 2
0 3
0 3
0 4
0 4
0 5
0 5
0 6
0 6
0 7
0 7
0 8
0 8
0 9
0 9
0 10
0 10
0 11
0 11
0 12
0 12
0 13
0 13
0 14
0 14
0 15
0 15
0 16
0 16
0 17
0 17
0 18
0 18
0 19
0 19
0 20
0 20
0 21
0 21
0 22
0 22
0 23
0 23
0 24
0 24
0 25
0 25
0 26
0 26
0 27
0 27
0 28
0 28
1 1
1 2
1 2
1 3
1 3
1 4
1 4
1 5
1 5
1 6
1 6
1 7
1 7
1 8
1 8
1 9
1 9
1 10
1 10
1 11
1 11
1 12
1 12
1 13
1 13
1 14
1 14
1 15
1 15
1 16
1 16
1 17
1 17
1 18
1 18
1 19
1 19
1 20
1 20
1 21
1 21
1 22
1 22
1 23
1 23
1 24
1 24
1 25
1 25
1 26
1 26
1 27
1 27
1 28
1 28
1 2
1 2
1 3
1 3
1 4
1 4
1 5
1 5
1 6
1 6
1 7
1 7
1 8
1 8
1 9
1 9
1 10
1 10
1 11
1 11
1 12
1 12
1 13
1 13
1 14
1 14
1 15
1 15
1 16
1 16
1 17
1 17
1 18
1 18
1 19
1 19
1 20
1 20
1 21
1 21
1 22


21 24
21 24
21 25
21 25
21 26
21 26
21 27
21 27
21 28
21 28
22 22
22 23
22 23
22 24
22 24
22 25
22 25
22 26
22 26
22 27
22 27
22 28
22 28
22 23
22 23
22 24
22 24
22 25
22 25
22 26
22 26
22 27
22 27
22 28
22 28
23 23
23 24
23 24
23 25
23 25
23 26
23 26
23 27
23 27
23 28
23 28
23 24
23 24
23 25
23 25
23 26
23 26
23 27
23 27
23 28
23 28
24 24
24 25
24 25
24 26
24 26
24 27
24 27
24 28
24 28
24 25
24 25
24 26
24 26
24 27
24 27
24 28
24 28
25 25
25 26
25 26
25 27
25 27
25 28
25 28
25 26
25 26
25 27
25 27
25 28
25 28
26 26
26 27
26 27
26 28
26 28
26 27
26 27
26 28
26 28
27 27
27 28
27 28
27 28
27 28
28 28
- p30_a[3]_w4_cw7.npy finished
p30_a[3]_w4_cw9.npy
0 0
0 1
0 1
0 2
0 2
0 3
0 3
0 4
0 4
0 5
0 5
0 6
0 6
0 7
0 7
0 8
0 8
0 9
0 9
0 10
0 10
0 11
0 11
0 12
0 12
0 13
0 13
0 14
0 14
0 15
0 15
0 16
0 16
0 17
0 17
0 18
0 18
0 19
0 19
0 20
0 20
0 21
0 21
0 22
0 22
0 23
0 23
0 24
0 24
0 25
0 25
0 26
0 26
0 27
0 27
0 28
0 28
0 1
0 1
0 2
0 2
0 3
0 3
0 4
0 4
0 5
0 5
0 6
0 6
0 7
0 7
0 8
0 8
0 9
0 9
0 10


18 20
18 21
18 21
18 22
18 22
18 23
18 23
18 24
18 24
18 25
18 25
18 26
18 26
18 27
18 27
18 28
18 28
19 19
19 20
19 20
19 21
19 21
19 22
19 22
19 23
19 23
19 24
19 24
19 25
19 25
19 26
19 26
19 27
19 27
19 28
19 28
19 20
19 20
19 21
19 21
19 22
19 22
19 23
19 23
19 24
19 24
19 25
19 25
19 26
19 26
19 27
19 27
19 28
19 28
20 20
20 21
20 21
20 22
20 22
20 23
20 23
20 24
20 24
20 25
20 25
20 26
20 26
20 27
20 27
20 28
20 28
20 21
20 21
20 22
20 22
20 23
20 23
20 24
20 24
20 25
20 25
20 26
20 26
20 27
20 27
20 28
20 28
21 21
21 22
21 22
21 23
21 23
21 24
21 24
21 25
21 25
21 26
21 26
21 27
21 27
21 28
21 28
21 22
21 22
21 23
21 23
21 24
21 24
21 25
21 25
21 26
21 26
21 27
21 27
21 28
21 28
22 22
22 23
22 23
22 24
22 24
22 25
22 25
22 26
22 26
22 27
22 27
22 28
22 28
22 23
22 23
22 24
22 24
22 25
22 25
22 26
22 26
22 27
22 27
22 28
22 28
23 23
23 24
23 24
23 25
23 25
23 26
23 26
23 27
23 27
23 28
23 28
23 24
23 24
23 25
23 25
23 26
23 26
23 27
23 27
23 28
23 28
24 24
24 25
24 25
24 26
24 2

16 22
16 23
16 23
16 24
16 24
16 25
16 25
16 26
16 26
16 27
16 27
16 28
16 28
16 17
16 17
16 18
16 18
16 19
16 19
16 20
16 20
16 21
16 21
16 22
16 22
16 23
16 23
16 24
16 24
16 25
16 25
16 26
16 26
16 27
16 27
16 28
16 28
17 17
17 18
17 18
17 19
17 19
17 20
17 20
17 21
17 21
17 22
17 22
17 23
17 23
17 24
17 24
17 25
17 25
17 26
17 26
17 27
17 27
17 28
17 28
17 18
17 18
17 19
17 19
17 20
17 20
17 21
17 21
17 22
17 22
17 23
17 23
17 24
17 24
17 25
17 25
17 26
17 26
17 27
17 27
17 28
17 28
18 18
18 19
18 19
18 20
18 20
18 21
18 21
18 22
18 22
18 23
18 23
18 24
18 24
18 25
18 25
18 26
18 26
18 27
18 27
18 28
18 28
18 19
18 19
18 20
18 20
18 21
18 21
18 22
18 22
18 23
18 23
18 24
18 24
18 25
18 25
18 26
18 26
18 27
18 27
18 28
18 28
19 19
19 20
19 20
19 21
19 21
19 22
19 22
19 23
19 23
19 24
19 24
19 25
19 25
19 26
19 26
19 27
19 27
19 28
19 28
19 20
19 20
19 21
19 21
19 22
19 22
19 23
19 23
19 24
19 24
19 25
19 25
19 26
19 26
19 27
19 27
19 28
19 28
20 20
20 21
20 21
20 22
20 22
20 23
20 2

14 22
14 23
14 23
14 24
14 24
14 25
14 25
14 26
14 26
14 27
14 27
14 28
14 28
14 15
14 15
14 16
14 16
14 17
14 17
14 18
14 18
14 19
14 19
14 20
14 20
14 21
14 21
14 22
14 22
14 23
14 23
14 24
14 24
14 25
14 25
14 26
14 26
14 27
14 27
14 28
14 28
15 15
15 16
15 16
15 17
15 17
15 18
15 18
15 19
15 19
15 20
15 20
15 21
15 21
15 22
15 22
15 23
15 23
15 24
15 24
15 25
15 25
15 26
15 26
15 27
15 27
15 28
15 28
15 16
15 16
15 17
15 17
15 18
15 18
15 19
15 19
15 20
15 20
15 21
15 21
15 22
15 22
15 23
15 23
15 24
15 24
15 25
15 25
15 26
15 26
15 27
15 27
15 28
15 28
16 16
16 17
16 17
16 18
16 18
16 19
16 19
16 20
16 20
16 21
16 21
16 22
16 22
16 23
16 23
16 24
16 24
16 25
16 25
16 26
16 26
16 27
16 27
16 28
16 28
16 17
16 17
16 18
16 18
16 19
16 19
16 20
16 20
16 21
16 21
16 22
16 22
16 23
16 23
16 24
16 24
16 25
16 25
16 26
16 26
16 27
16 27
16 28
16 28
17 17
17 18
17 18
17 19
17 19
17 20
17 20
17 21
17 21
17 22
17 22
17 23
17 23
17 24
17 24
17 25
17 25
17 26
17 26
17 27
17 27
17 28
17 28
17 1