In [279]:
# This is the template for the submission. If you want, you can develop your algorithm in a regular Python script and copy the code here for submission.

# Team members (e-mail, legi):
# zhisun@ethz.ch, 22-958-227
# enjcao@ethz.ch, 22-942-700
# yifzhou@ethz.ch, 22-940-381

In [280]:
import numpy as np
import pandas as pd

from Lilygo.Recording import Recording
from Lilygo.Dataset import Dataset

import os
from os import listdir
from os.path import isfile, join


import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from scipy.stats import entropy
from scipy import signal
from scipy.signal import welch
from scipy.fftpack import fft
from scipy.stats import entropy, kurtosis, skew
from scipy.interpolate import interp1d

from Lilygo.Recording import Recording, data_integrity
from Lilygo.Dataset import Dataset

import xgboost as xgb
from xgboost import XGBClassifier
from imblearn.over_sampling import SMOTE

import joblib
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

## Local dirs

In [281]:
# Get the path of all traces
dir_data = 'E:\\Sunzhichao\\ETHz\\2223Spring\\Mobile_Health\\data\\'
dir_traces_train = dir_data + 'train\\'
dir_traces_test = dir_data + 'test\\'
dir_labels = dir_data + 'labels\\'
dir_loaded = dir_data + 'Loaded_data\\'
dir_models = dir_data + 'models\\'
# recorded
dir_recorded = 'data/recorded'

In [282]:
# Get the path of all traces
# dir_traces = '/kaggle/input/mobile-health-2023-path-detection/data/test'
dir_traces = dir_traces_test
filenames = [join(dir_traces, f) for f in listdir(dir_traces) if (isfile(join(dir_traces, f)) and f[-5:] == '.json')]
filenames.sort()

## Data Pre-processing

In [283]:
# This function aims to find the component caused by gravity from data, which means the signal around 0 Hz
def get_gravity(data):
    filtered_data = np.zeros_like(data)
    # Parameters in IIR filter
    alpla = [1, -1.979133761292768, 0.979521463540373]
    beta = [0.000086384997973502, 0.00012769995947004, 0.000086384997973502]
    # Formula of IIR filter
    for i in range(2, len(data)):
        filtered_data[i] = alpla[0] * (data[i] * beta[0] + data[i-1] * beta[1] + data[i-2] * beta[2] - filtered_data[i-1] * alpla[1] - filtered_data[i-2] * alpla[2])
    return filtered_data

# This function aims to realize a low-pass filter with cutoff frequency = 1 Hz. Because according to massive amounts of data, the general 
# minimum frequency of human walking is about 1 Hz
def get_highpass(data):
    filtered_data = np.zeros_like(data)  # filtered_data
    alpla = [1, -1.905384612118461, 0.910092542787947]
    beta = [0.953986986993339, -1.907503180919730, 0.953986986993339]
    for i in range(2, len(data)):
        filtered_data[i] = alpla[0] * (data[i] * beta[0] + data[i-1] * beta[1] + data[i-2] * beta[2] - filtered_data[i-1] * alpla[1] - filtered_data[i-2] * alpla[2])
    return filtered_data

# This funciton aims to realize a high-pass filter with cutoff frequency = 5 Hz. Because according to massive amounts of data, the general 
# maximum frequency of human walking is about 5 Hz
def get_lowpass(data):
    filtered_data = np.zeros_like(data)  # filtered_data
    alpla = [1, -1.80898117793047, 0.827224480562408]
    beta = [0.096665967120306, -0.172688631608676, 0.095465967120306]
    for i in range(2, len(data)):
        filtered_data[i] = alpla[0] * (data[i] * beta[0] + data[i-1] * beta[1] + data[i-2] * beta[2] - filtered_data[i-1] * alpla[1] - filtered_data[i-2] * alpla[2])
    return filtered_data

In [284]:
# compute energy
def compute_energy(data):
    window_length = len(data)  # length of each window
    # Define the window function (e.g., Hanning or Hamming)
    window = np.hanning(window_length)
    # Apply the window function to the data
    windowed_data = data * window
    # Compute the FFT of the windowed data
    fft_result = np.fft.fft(windowed_data)
    # Compute the squared magnitudes of the FFT components (excluding DC)
    mag_squared = np.abs(fft_result[1:window_length // 2])**2
    # Sum the squared magnitudes and normalize by the window length
    energy = np.sum(mag_squared) / window_length
    # fft_data now contains the energy feature for each window of data
    return energy

# compute entropy
def compute_entropy(data):
    # Assume we have accelerometer data in a 3D array called "data", where each row
    # represents a window of data and each column represents a sensor axis (x, y, z or magnitute)
    window_length = len(data) # length of each window
    # Define the window function (e.g., Hanning or Hamming)
    window = np.hanning(window_length)
    # Apply the window function to the data
    windowed_data = data * window
    # Compute the FFT of the windowed data
    fft_result = np.fft.fft(windowed_data)
    # Compute the magnitudes of the FFT components (excluding DC)
    mag = np.abs(fft_result[1:window_length // 2])
    # Normalize the magnitudes to obtain a probability distribution
    mag_norm = mag / np.sum(mag)
    # Compute the information entropy of the probability distribution
    entropy = -np.sum(mag_norm * np.log2(mag_norm))
    # fft_data now contains the frequency-domain entropy feature for each window of data
    return entropy

def get_features_3(data):
    # get features from sliding windows
    dc = np.mean(data)
    energy = compute_energy(np.asarray(data))
    entropy = compute_entropy(np.asarray(data))
    return dc, energy, entropy

In [285]:
# add new features
def reshape_to_windows(x, wl, ol):
  """
  Segments signal into windows by reshaping it into 2D
  x: 1-d array to reshape
  wl: no. of samples in window
  ol: no. of samples overlap
  """
  assert wl>ol, 'Window must be longer than overlap'
  step=int(wl-ol)
  nrows = int(1+(x.size-wl)//step)
  n = int(x.strides[0])
  return np.lib.stride_tricks.as_strided(x, shape=(nrows,int(wl)),
                                        strides=(step*n,n))

def get_AC_DC(data, sr, order = 1, crit_freq = 2):
  """
  returns high-pass and low-pass filtered signals using butterworth filter
  data: original windowed signal
  sr = sampling frequency
  order: order of filter
  crit_freq: critical frequency of filter
  """

  sos_low = signal.butter(order, crit_freq, 'lp', fs = sr, output = 'sos')
  sos_high = signal.butter(order, crit_freq, 'hp', fs = sr, output = 'sos')
  AC = signal.sosfilt(sos_low, data)
  DC = signal.sosfilt(sos_high, data)
  return AC, DC


def extract_temporal_features(data):
  """
  returns time domain features (20) from windowed raw signal:
   - mean, standard deviation, kurtosis, skewness;
   - RMS, zero-crossing
   - The following percentiles: [0, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 100] (Q)
   - Range: max(x) - min(x)
  data: windowed signal
  """
  m = np.mean(data)
  sd = np.std(data)
  kurt = kurtosis(data)
  sk = skew(data)
  rms = np.sqrt(np.mean(data**2))
  zc = np.sum(np.diff(data>=m))
  q = np.array([0, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 1])
  Q = np.quantile(data, q)
  range = Q[-1]-Q[0]             
  return m, sd, kurt, sk, rms, zc, range, Q[0], Q[1], Q[2], Q[3], Q[4], Q[5], Q[6], Q[7], Q[8], Q[9], Q[10], Q[11], Q[12]


def extract_frequential_features(data, sr):
  """
  exctracting frequncy domain features (5) from windowed Fourier transform:
   - energy, entropy, centroid, bandwidth, max_freq
  data: windowed signal
  sf: sampling rate
  """
  window_size = len(data)
  data -= np.mean(data)
  ft = np.fft.fft(data)/window_size
  sr = int(sr)
  # get window length
  
  #discarding mirror part
  ft = ft[:window_size//2]
  #frequencies of the transofm
  freqs = np.fft.fftfreq(window_size, 1/sr)[1:window_size//2]
  #the spectral density is the squared of the absolute
  Spec = np.abs(ft)**2
  #Energy
  E = np.sum(Spec)/(window_size//2)
  #density
  P = Spec[1:]/np.sum(Spec[1:])
  #entropy
  H = -np.sum(P*np.log2(P))/np.log2((window_size//2))
  #centriod 
  C = np.sum(P*freqs)
  #Absolute distance  of frequencies from from Centroid
  distC = np.abs((C-freqs))
  #bandwidth is the weighted mean of the distance
  BW = np.sum(distC*P)
  #maximum frequency 
  max_fr = freqs[np.argmax(Spec[1:])]
  return E, H, C, BW, max_fr

# 25 features
def get_features_25(data):
  # AC, DC = get_AC_DC(data, 200)
  m, sd, kurt, sk, rms, zc, range, q_0, q_1, q_2, q_3, q_4, q_5, q_6, q_7, q_8, q_9, q_10, q_11, q_12 = extract_temporal_features(data)
  E, H, C, BW, max_fr = extract_frequential_features(data, 200)
  all_features = [m, sd, kurt, sk, rms, zc, range, q_0, q_1, q_2, q_3, q_4, q_5, q_6, q_7, q_8, q_9, q_10, q_11, q_12, E, H, C, BW, max_fr]
  return all_features

In [286]:
# data pre-processing
# This function aims to find the component caused by gravity from data, which means the signal around 0 Hz
def get_gravity(data):
    filtered_data = np.zeros_like(data)
    # Parameters in IIR filter
    alpla = [1, -1.979133761292768, 0.979521463540373]
    beta = [0.000086384997973502, 0.00012769995947004, 0.000086384997973502]
    # Formula of IIR filter
    for i in range(2, len(data)):
        filtered_data[i] = alpla[0] * (data[i] * beta[0] + data[i-1] * beta[1] + data[i-2] * beta[2] - filtered_data[i-1] * alpla[1] - filtered_data[i-2] * alpla[2])
    return filtered_data

def get_highpass(data):
    filtered_data = np.zeros_like(data)  # filtered_data
    alpla = [1, -1.905384612118461, 0.910092542787947]
    beta = [0.953986986993339, -1.907503180919730, 0.953986986993339]

    for i in range(2, len(data)):
        filtered_data[i] = alpla[0] * (data[i] * beta[0] + data[i-1] * beta[1] + data[i-2] * beta[2] - filtered_data[i-1] * alpla[1] - filtered_data[i-2] * alpla[2])
    return filtered_data

# This funciton aims to realize a high-pass filter with cutoff frequency = 5 Hz. Because according to massive amounts of data, the general 
# maximum frequency of human walking is about 5 Hz
def get_lowpass(data):
    filtered_data = np.zeros_like(data)  # filtered_data
    alpla = [1, -1.80898117793047, 0.827224480562408]
    beta = [0.096665967120306, -0.172688631608676, 0.095465967120306]

    for i in range(2, len(data)):
        filtered_data[i] = alpla[0] * (data[i] * beta[0] + data[i-1] * beta[1] + data[i-2] * beta[2] - filtered_data[i-1] * alpla[1] - filtered_data[i-2] * alpla[2])
    return filtered_data

def pre_process(data):
    # Find the component caused by gravity from data and remove it from the singanl
    data_gravity = get_gravity(data)
    data_user = data - data_gravity
    # Get user's acceleration along the gravity direction by dot product
    data_acc = data_user * data_gravity
    # Add low pass and high pass filter to reduce noise in signal (possible human walking rate:1 - 5Hz)
    data_filtered = get_highpass(data_acc)
    data_filtered = get_lowpass(data_filtered)
    return data_filtered

def preprocess_and_extract_features(trace, window_size=15, sampling_rate=200):
    """
    Preprocess the data and extract features from the 3D accelerometer, gyroscope, and magnetometer data.

    Args:
    trace (Lilygo.Recording.Recording): Object containing the raw data with accelerometer data stored in lists (e.g. trace.data['ax'])
    window_size (int): The window size in seconds for splitting the data
    sampling_rate (int): The sampling rate of the data in Hz

    Returns:
    pd.DataFrame: A DataFrame with the extracted features and location labels
    """
    # Read data from trace
    ax = pre_process(trace.data['ax'].values)
    ay = pre_process(trace.data['ay'].values)
    az = pre_process(trace.data['az'].values)
    
    gx = pre_process(trace.data['gx'].values)
    gy = pre_process(trace.data['gy'].values)
    gz = pre_process(trace.data['gz'].values)
    
    '''mx = trace.data['mx'].values
    my = trace.data['my'].values
    mz = trace.data['mz'].values'''
    
    # Compute the length of each window in samples
    window_samples = window_size * sampling_rate
    
    # Compute the number of windows in the recording
    num_windows = len(ax) // window_samples

    # Initialize lists for storing extracted features and location labels
    features = []
    loc_labels = []

    # Helper function to compute the magnitude of a vector
    magnitude = lambda vec: np.sqrt(np.sum(vec**2, axis=1))

    for i in range(num_windows):
       
        # Extract the accelerometer, gyroscope, and magnetometer data for the current window
        acc_data = np.array([ax[i*window_samples:(i+1)*window_samples],
                             ay[i*window_samples:(i+1)*window_samples],
                             az[i*window_samples:(i+1)*window_samples]]).T
        gyro_data = np.array([gx[i*window_samples:(i+1)*window_samples],
                              gy[i*window_samples:(i+1)*window_samples],
                              gz[i*window_samples:(i+1)*window_samples]]).T
        '''mag_data = np.array([mx[i*window_samples:(i+1)*window_samples],
                             my[i*window_samples:(i+1)*window_samples],
                             mz[i*window_samples:(i+1)*window_samples]]).T'''
        
        '''figure,ax = plt.subplots(3, 1, figsize=(10, 6))
        ax[0].plot(acc_data[:,0])
        ax[0].set_ylabel('ax')
        ax[1].plot(acc_data[:,1])
        ax[1].set_ylabel('ay')
        ax[2].plot(acc_data[:,2])
        ax[2].set_ylabel('az')'''

        # Compute magnitudes
        acc_magnitude = np.sqrt(np.sum(acc_data**2, axis=1))
        acc_mag_mean = abs(np.mean(acc_magnitude))
        acc_mag_std = np.std(acc_magnitude)
        #gyro_magnitude = magnitude(gyro_data)
        #mag_magnitude = magnitude(mag_data)


        # ----ACCELERATOR TIME DOMAIN----
        ax_mean = abs(np.mean(acc_data[:,0]))
        ay_mean = abs(np.mean(acc_data[:,1]))
        az_mean = abs(np.mean(acc_data[:,2]))
        a_mean_list = [ax_mean, ay_mean, az_mean]
        a_mean_list.sort() # Sorting list of numbers in ascending
        Am = a_mean_list[2] # Feature Am: the maximum mean among all dimensions (represents motion range for location)
        Bm = a_mean_list[2]/a_mean_list[1] # Feature Bm and Cm: ratio of the maximum mean in different axes (represents DoF in movement for location)
        Cm = a_mean_list[2]/a_mean_list[0] 

        ax_range = acc_data[np.argmax(acc_data[:,0]), 0] - acc_data[np.argmin(acc_data[:,0]), 0]
        ay_range = acc_data[np.argmax(acc_data[:,1]), 1] - acc_data[np.argmin(acc_data[:,1]), 1]
        az_range = acc_data[np.argmax(acc_data[:,2]), 2] - acc_data[np.argmin(acc_data[:,2]), 2]
        a_range_list = [ax_range, ay_range, az_range]
        a_range_list.sort() # Sorting list of numbers in ascending
        A = a_range_list[2] # Feature A: the maximum range among all dimensions (represents motion range for location)
        B = a_range_list[2]/a_range_list[1] # Feature B and C: ratio of the maximum ranges in different axes (represents DoF in movement for location)
        C = a_range_list[2]/a_range_list[0] 
        
        
        # ----GYROSCOPE TIME DOMAIN----
        gx_mean = abs(np.mean(gyro_data[:,0]))
        gy_mean = abs(np.mean(gyro_data[:,1]))
        gz_mean = abs(np.mean(gyro_data[:,2]))
        g_mean_list = [gx_mean, gy_mean, gz_mean]
        g_mean_list.sort() # Sorting list of numbers in ascending
        Gm = g_mean_list[2] 
        Hm = g_mean_list[2]/g_mean_list[1] 
        Im = g_mean_list[2]/g_mean_list[0] 

        gx_range = gyro_data[np.argmax(gyro_data[:,0]), 0] - gyro_data[np.argmin(gyro_data[:,0]), 0]
        gy_range = gyro_data[np.argmax(gyro_data[:,1]), 1] - gyro_data[np.argmin(gyro_data[:,1]), 1]
        gz_range = gyro_data[np.argmax(gyro_data[:,2]), 2] - gyro_data[np.argmin(gyro_data[:,2]), 2]
        g_range_list = [gx_range, gy_range, gz_range]
        g_range_list.sort() # Sorting list of numbers in ascending
        G = g_range_list[2] 
        H = g_range_list[2]/g_range_list[1] 
        I = g_range_list[2]/g_range_list[0]


        # ----ACCELERATOR FREQUENCY DOMAIN----
        freq, Pxx = welch(acc_magnitude, fs=sampling_rate) # use of the fast Fourier transform for the estimation of power spectra
        freq_band = np.logical_and(freq >= 0.3, freq <= 15) 
        power_in_band = Pxx[freq_band] 
        freq_in_band = freq[freq_band] 
        #plt.plot(freq,Pxx)

        # D and F reflects impact of strides on acceleration
        D = np.max(power_in_band) # Feature D: the maximum energy captured by the accelerator, 
        total_power = np.sum(power_in_band) # Feature F: total power in the frequencies between 0.3 and 15 Hz:

        norm_Pxx = Pxx / total_power # normalize the power spectrum
        E = entropy(norm_Pxx) # Feature E: normalized information entropy of the discrete FFT component magnitudes

        

        sorted_idx = np.argsort(power_in_band)[::-1] 
        first_freq = freq_in_band[sorted_idx[0]] 
        second_freq = freq_in_band[sorted_idx[1]] 
        first_power = power_in_band[sorted_idx[0]] 
        second_power = power_in_band[sorted_idx[1]]

        R1 = np.sum(power_in_band[freq_in_band  < 3]) / total_power
        R3 = np.sum(Pxx[(freq >= 1.5) & (freq <= 2.5)]) / total_power 

        

        # ----MOVING VS: STANDING----
        moving = False
        # Append the features to the list
        if acc_mag_mean > 0.07:
            moving = True
        
        # Calculate the timestamp for the current window as the median of the timestamps (not necessary for location)
        # timestamp = np.median(trace.timestamp[i*window_samples:(i+1)*window_samples]) 
        # try:
        #     # Determine the location label for the current window based on the timestamp
        #     loc_label = trace.labels.get('board_loc')
        #     # Append the label to the labels list
        #     loc_labels.append(loc_label)
        # except Exception as error:
        #     #print("!-This might be testing trace and does not have labels. Error: ",error)
        #     pass
        features.append([moving, A, B, C, Am,Bm,Cm, acc_mag_mean , 
                         D, E, total_power, first_freq, first_power,
                         G,H,I,Gm,Hm,Im,acc_mag_std])

    # Create a DataFrame with the extracted features and location labels
    features_df = pd.DataFrame(features, columns=['moving','A', 'B', 'C','Am', 'Bm', 'Cm','acc_mag' ,
                                                  'D', 'E', 'total_power', 'first_freq', 'first_power',
                                                  'G','H','I','Gm','Hm','Im','acc_std'])
    # features_df['loc_label'] = loc_labels
    #print(features_df)

    return features_df

In [287]:
# This function aims to find peak locations and corresponding values in the signal with the function signal.find_peaks
def get_peaks(input_signal, prominence):
    peak_locations, _ = signal.find_peaks(input_signal, prominence=prominence)
    peak_values = input_signal[peak_locations]
    return peak_locations, peak_values

In [288]:
# resample data to size: fixed length
def resample_data(data, fixed_length):
    
    
    num_data_points = len(data)
    x_old = np.linspace(0, 1, num_data_points)
    x_new = np.linspace(0, 1, fixed_length)
    
    # 1D interpolation for the magnitude data
    interpolator = interp1d(x_old, data, axis=0, kind='linear')
    resampled_data = interpolator(x_new)
    return resampled_data

## Load trained model

In [289]:
# Define the CNN model for magnitute data
class CNN1D(nn.Module):
    def __init__(self, num_classes):
        super(CNN1D, self).__init__()
        self.conv1 = nn.Conv1d(1, 32, kernel_size=5)
        self.relu1 = nn.ReLU()
        self.maxpool1 = nn.MaxPool1d(kernel_size=2)
        self.conv2 = nn.Conv1d(32, 64, kernel_size=5)
        self.relu2 = nn.ReLU()
        self.maxpool2 = nn.MaxPool1d(kernel_size=2)
        self.dropout = nn.Dropout(p=0.5)  # Add dropout layer with 50% dropout probability
        self.fc1 = nn.Linear(64 * 247, 255)
        self.relu3 = nn.ReLU()
        self.fc2 = nn.Linear(255, num_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.maxpool1(x)
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.maxpool2(x)
        x = self.dropout(x)  # Apply dropout
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = self.relu3(x)
        x = self.fc2(x)
        return x

In [290]:
# Location------------------------------------------
# location_model = joblib.load('./trained_models/location_svm_model.joblib')
location_xgboost_model = joblib.load('./trained_models/location_xgboost_model_feature_25.joblib')
# activity------------------------------------------
activity_xgb_model = joblib.load('trained_models/activity_xgboost_model_feature_3.joblib')
# path---------------------------------------------
path_cnn_model = torch.load('trained_models/path_index_cnn_model.pt')


In [291]:
def get_segment_overlap(data,sampling_rate, std_win):
    # Calculate raw magnitude of accelerometer signal
    # amagn_acc = [np.sqrt(a**2+trace.data['ay'].values[i]**2+trace.data['az'].values[i]**2)for i, a in enumerate(trace.data['ay'].values)]
    # Pre-process data
    data_seg = pre_process(data)
    # Calculate window size
    window_size = round(std_win*sampling_rate)
    segment_trace = [data_seg[s:s+window_size] for s in range(0, len(data_seg)-window_size, round(window_size/2))]
    return segment_trace


In [292]:
def get_segment_nonoverlap(data,sampling_rate, std_win):
    # Calculate window size
    window_size = round(std_win*sampling_rate)
    segment_trace = [data[s:s+window_size] for s in range(0, len(data)-window_size, window_size)]
    return segment_trace

In [293]:
def check_moving(magn_data):
    acc_mag_mean = np.mean(abs(magn_data))
    moving = False
    # Append the features to the list
    if acc_mag_mean > 0.027:
        moving = True
    return moving

In [294]:
def normalize(data):
    min_val = min(data)
    max_val = max(data)
    normalized_data = [(x - min_val) / (max_val - min_val)  - 0.5 for x in data]
    return normalized_data

In [295]:
def get_watch_loc(trace):
    # # 0 accuracy for testing
    # y_final = 3

    # # XGBoost zyf
    # # Get features of data
    # features_df = preprocess_and_extract_features(trace)
    # features_df = features_df[features_df['moving']] # Keep only moving windows
    # X_trace = features_df.drop(['Gm', 'Hm', 'Im'], axis=1)
    # if np.shape(X_trace)[0] != 0:

    #     # Prepare data for classification
    #     scaler = StandardScaler()
    #     X_test = scaler.fit_transform(X_trace)

    #     # Create the XGBoost DMatrix object for the test data
    #     dtest = xgb.DMatrix(X_test)

    #     # Make predictions on the test set and evaluate the model
    #     y_pred = location_xgboost_model.predict(dtest)
    #     # y_pred = np.squeeze(y_pred)
    #     y_final = np.argmax(np.bincount(y_pred.astype(int)))

    # XGboost szc
    # get segment data
    amagn = [np.sqrt(a**2+trace.data['ay'].values[i]**2+trace.data['az'].values[i]**2)for i, a in enumerate(trace.data['ax'].values)]
    std_win = 3 # length of window in seconds
    sampling_rate = 200
    segment_trace = get_segment_overlap(amagn, sampling_rate, std_win)
    # add feature extraction
    num_features = 25
    featured_trace = np.zeros((np.shape(segment_trace)[0], num_features))
    for i in range(np.shape(segment_trace)[0]):
        featured_trace[i,] = get_features_25(segment_trace[i])
    # Create the XGBoost DMatrix object for the test data
    dtest = xgb.DMatrix(featured_trace)

    # Make predictions on the test set and evaluate the model
    y_pred = location_xgboost_model.predict(dtest)
    y_final = np.argmax(np.bincount(y_pred.astype(int)))
    
    return y_final

In [296]:
def get_path_idx(trace):
    # # 0 accuracy for testing
    # path_idx = 5
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    magn_mag = [np.sqrt(m**2+trace.data['my'].values[i]**2+trace.data['mz'].values[i]**2)for i, m in enumerate(trace.data['mx'].values)]
    X_trace = torch.tensor(np.expand_dims(resample_data(magn_mag, 1000), axis=(0, -1)), dtype=torch.float32)
    X_trace = X_trace.permute(0, 2, 1).to(device)
    
    path_cnn_model.to(device)
    path_cnn_model.eval()
    path_idx = int(np.argmax(path_cnn_model(X_trace).cpu().detach().numpy()))
    return path_idx

In [297]:
def get_step_count(trace):
    # # 0 accuracy for testing
    # stepCount = 0

    # Calculate raw magnitude of accelerometer signal
    amagn = [np.sqrt(a**2+trace.data['ay'].values[i]**2+trace.data['az'].values[i]**2)for i, a in enumerate(trace.data['ax'].values)]
    # Filter the signal to get more accurate results -----------------------------------------------------------
  
    data_filtered = pre_process(amagn)
    # Use convolution to reduce noise in signal again
    filter_window_size = 40
    data_filtered = np.convolve(data_filtered, np.ones((filter_window_size,))/filter_window_size, mode='valid')
    # Find peaks in the filtered signal and realize our stepcount -----------------------------------------------
    # TODO: modify prominence
    # Segment data into windows --------------------------------------------------------------------------------
    std_win = 1 # length of window in seconds
    sampling_rate = 200
    data_segmented = get_segment_nonoverlap(data_filtered, sampling_rate, std_win)
    # Normalize data in each windows ---------------------------------------------------------------------------
    win_size = round(std_win * sampling_rate)
    normalized_data = data_filtered.copy()
    for i, seg in enumerate(data_segmented):
        if check_moving(seg):
            normalized_data[i*win_size:i*win_size + win_size] = normalize(seg)
        else:
            normalized_data[i*win_size:i*win_size + win_size] = 0
    # check data after last window
    if np.shape(data_segmented)[0]*win_size < len(data_filtered):
        seg = data_filtered[np.shape(data_segmented)[0]*win_size:]
        if check_moving(seg):
                normalized_data[i*win_size:i*win_size + len(seg)] = normalize(seg)
        else:
            normalized_data[i*win_size:i*win_size + len(seg)] = 0

    # Find peaks in the filtered signal and realize our stepcount -----------------------------------------------
    prominence = 0.4
    peak_locations, _ = get_peaks(normalized_data, prominence)
    stepCount = len(peak_locations)

    # # plot normalized data
    # fig, axes = plt.subplots(2,1, figsize=(60, 5)) #figsize is width, height
    # # axes[0].set_title(title)
    # axes[0].plot(data_filtered, alpha=1, label="Filtered mag")
    # axes[1].plot(normalized_data, alpha=1, label="Normalaized mag")
    # # colors = ['r', 'g']
    # # for s in  range(0, len(acc_magnitude), win_size): # adding vertical lines for the peak detection windows
    # #     axes[1].axvline(s, color = 'r')
    # axes[1].plot(peak_locations, normalized_data[peak_locations], 'y+', color="red", label="Peak Locations")
    
    return stepCount

In [298]:
def get_activity(trace):
    # # 0 accuracy for testing
    # stand, walk, run, cycle = 2, 2, 2, 2

    # algorithm here
    stand, walk, run, cycle = 0, 0, 0, 0
    
    # get segment data
    amagn = [np.sqrt(a**2+trace.data['ay'].values[i]**2+trace.data['az'].values[i]**2)for i, a in enumerate(trace.data['ax'].values)]
    std_win = 3 # length of window in seconds
    sampling_rate = 200
    segment_trace = get_segment_overlap(amagn, sampling_rate, std_win)
    # add feature extraction
    num_features = 3
    featured_trace = np.zeros((np.shape(segment_trace)[0], num_features))
    for i in range(np.shape(segment_trace)[0]):
        featured_trace[i,] = get_features_3(segment_trace[i])
    # Create the XGBoost DMatrix object for the test data
    dtest = xgb.DMatrix(featured_trace)

    # Make predictions on the test set and evaluate the model
    y_pred = activity_xgb_model.predict(dtest)
    # filter prediction by 60s
    # Sliding window: 60s
    std_win = 10
    n = round (60 / std_win * 2 - 1)
    y_pred_count = np.zeros(4)
    
    y_pred_60 = y_pred.copy()
    for s in range(0, len(y_pred) - n, int(n/2+1)):
        # window = 60s 
        windowed_label = y_pred[s : s+n]
        for j in range(n): 
            # Find the label that appears the most
            for k in range(4):
                if windowed_label[j] == k:
                    y_pred_count[k]+=1
        label_argmax = np.where(y_pred_count == np.max(y_pred_count))
        # print(label_argmax)
        if len(label_argmax)==1:
            y_pred_60[s : s+n] = np.argmax(y_pred_count)
    
    # remove duplicated elements
    predicted = list(set(y_pred_60))
    if 0 in predicted:
        stand = 1
    if 1 in predicted:
        walk = 1
    if 2 in predicted:
        run = 1
    if 3 in predicted:
        cycle = 1    
    return stand, walk, run, cycle

In [299]:
# Loop through all traces and calculate the step count for each trace
solution_file = []
for idx, filename in enumerate(filenames):
    # print(filename)
    trace = Recording(filename, no_labels=True, mute=True)
    categorization_results = {'watch_loc': 0, 'path_idx': 0, 'step_count': 0, 'stand': 0, 'walk': 0, 'run': 0, 'cycle': 0}

    #
    # Your algorithm goes here
    # Make sure, you do not use the gps data and are tolerant for missing data (see task set).
    # Your program must not crash when single smartphone data traces are missing.
    #

    categorization_results['watch_loc'] = get_watch_loc(trace)
    categorization_results['path_idx'] = get_path_idx(trace)
    categorization_results['step_count'] = get_step_count(trace)
    categorization_results['stand'], categorization_results['walk'], categorization_results['run'], categorization_results['cycle'] = get_activity(trace)


    # Append your calculated results and the id of each trace and category to the solution file
    trace_id = ''.join([*filename][-8:-5])
    for counter_label, category in enumerate(categorization_results):
        solution_file.append([trace_id + f'_{counter_label+1}', categorization_results[category]])
    # show progress
    if (idx+1)%10 == 0:
        print("Process traces: ", idx+1, '/', len(filenames))


RuntimeError: Given groups=1, weight of size [32, 3, 5], expected input[1, 1, 1000] to have 3 channels, but got 1 channels instead

In [None]:
# Write the detected step counts into a .csv file to then upload the .csv file to Kaggle
# When cross-checking the .csv file on your computer, we recommend using the text editor and NOT excel so that the results are displayed correctly
# IMPORTANT: Do NOT change the name of the columns ('Id' and 'Category') of the .csv file
submission_file_df = pd.DataFrame(np.asarray(solution_file), columns=['Id', 'Category'])
submission_file_df.to_csv('results/submission_12.csv', header=['Id', 'Category'], index=False)