In [2]:
import pandas as pd
import numpy as np
import sqlite3
import math

import plotly.express as px
import plotly.graph_objects as go

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Interim Assignment - Sensor Data

In [4]:
df = pd.read_csv("/content/drive/MyDrive/sensors/GeoAI_assignment_data.csv")

### Part 1

1. Read in the .csv file named 'GeoAI_assignment_data'
2. Select Participant with ID 5 and take a subset containing only the following columns ('time_iso', 'IBI', 'stress', 'ID')
3. Visualize the Interbeat Interval signal over time and add Stress situations as vertical lines in red color
4. Get a summary of the IBI signal values and check the distribution via a histogram and/or boxplot
5. Trim / filter the first minute of the data and the last minute
6. "Come up" with rule(s) to differentiate between stress and non-stress situations
    - try to add a vertical line to the plot which shows your rule


In [8]:
# Select Participant with ID 5 and take a subset containing only the following columns ('time_iso', 'IBI', 'stress', 'ID')
subset = df.loc[df['ID'] == 5, ['time_iso', 'IBI', 'stress', 'ID']]

In [10]:
subset

Unnamed: 0,time_iso,IBI,stress,ID
0,2021-04-21 10:48:32.500,0.0,0,5
1,2021-04-21 10:48:32.750,0.0,0,5
2,2021-04-21 10:48:33.000,563.0,0,5
3,2021-04-21 10:48:33.250,563.0,0,5
4,2021-04-21 10:48:33.500,563.0,0,5
...,...,...,...,...
3578,2021-04-21 11:03:27.000,394.0,0,5
3579,2021-04-21 11:03:27.250,394.0,0,5
3580,2021-04-21 11:03:27.500,394.0,0,5
3581,2021-04-21 11:03:27.750,394.0,0,5


In [14]:
# Check if there is a timestamp problem
subset['time_iso'].duplicated().any() # True

False

In [11]:
# Visualize Interbeat Interval signal over time with Stress situations as vertical lines
fig = go.Figure()
fig.add_trace(go.Scatter(x=subset['time_iso'], y=subset['IBI'], mode='lines', name='Interbeat Interval'))
fig.update_layout(title='Interbeat Interval Over Time',
                  xaxis_title='Time',
                  yaxis_title='Interbeat Interval')

In [13]:
# Add Stress situations as vertical lines in red color
stress_times = subset.loc[subset['stress'] == 1, 'time_iso']
for time in stress_times:
    fig.add_shape(type='line', x0=time, y0=0, x1=time, y1=2000, line=dict(color='red'))

fig.show()

In [15]:
# Get summary of IBI signal values and check distribution via histogram
summary = subset['IBI'].describe()
print("Summary of IBI signal values:")
print(summary)

Summary of IBI signal values:
count    3583.000000
mean      598.585543
std        41.176168
min         0.000000
25%       584.000000
50%       597.000000
75%       612.000000
max      1111.000000
Name: IBI, dtype: float64


In [16]:
# Create histogram to visualize IBI signal distribution
fig_hist = px.histogram(subset, x='IBI', nbins=30, title='Distribution of IBI Signal')
fig_hist.show()


In [26]:
# Convert 'time_iso' column to datetime
subset['time_iso'] = pd.to_datetime(subset['time_iso'])

In [35]:
# Filter the first minute and the last minute of the data
subset_filtered = subset[(subset['time_iso'] >= subset['time_iso'].min()) &
                        (subset['time_iso'] <= subset['time_iso'].min() + pd.Timedelta(minutes=1)) |
                        (subset['time_iso'] >= subset['time_iso'].max() - pd.Timedelta(minutes=1)) &
                        (subset['time_iso'] <= subset['time_iso'].max())]


In [36]:
# Define your rule(s) to differentiate between stress and non-stress situations
# For example, if IBI value is below a certain threshold, consider it as a stress situation
threshold = 650
subset_filtered = subset_filtered.copy()
subset_filtered['rule'] = np.where(subset_filtered['IBI'] < threshold, 'Stress', 'Non-Stress')

In [37]:
# Add a vertical line to the plot to show your rule
fig = px.line(subset_filtered, x='time_iso', y='IBI')
fig.add_trace(go.Scatter(x=subset_filtered['time_iso'], y=[threshold] * len(subset_filtered),
                         mode='lines', name='Threshold', line=dict(color='red')))
fig.update_layout(title='Interbeat Interval Signal Over Time - Filtered first and last minute',
                  xaxis_title='Time', yaxis_title='IBI')
fig.show()

In [41]:
# Trim the first minute and the last minute of the data
subset_trimmed = subset[(subset['time_iso'] >= subset['time_iso'].min() + pd.Timedelta(minutes=1)) &
                        (subset['time_iso'] <= subset['time_iso'].max() - pd.Timedelta(minutes=1))]

In [44]:
# Define your rule(s) to differentiate between stress and non-stress situations
# For example, if IBI value is below a certain threshold, consider it as a stress situation
threshold = 680
subset_trimmed = subset_trimmed.copy()
subset_trimmed['rule'] = np.where(subset_trimmed['IBI'] < threshold, 'Stress', 'Non-Stress')

In [45]:
# Add a vertical line to the plot to show your rule
fig = px.line(subset_trimmed, x='time_iso', y='IBI')
fig.add_trace(go.Scatter(x=subset_trimmed['time_iso'], y=[threshold] * len(subset_trimmed),
                         mode='lines', name='Threshold', line=dict(color='red')))
fig.update_layout(title='Interbeat Interval Signal Over Time - Trimmed first and last minute',
                  xaxis_title='Time', yaxis_title='IBI')
fig.show()

### Part 2

1. Import the .sqlite file 'GeoAI_assignment_data'
2. Retrieve the Skin Temperature Data (4 Hz, platform_id = 3, sensor_id = 3, column = 'value_real')
3. Visualize Skin Temperature Data
4. Filter out the first 5 minutes
5. Apply frequency filtering to the time-filtered data
    - try to use different filter parameters (orders, cutoff frequencies) and interpret how your results change
    - start with a second order butterworth filter with a lowpass cutoff frequency of 0.1 Hz and a high cutoff frequency of 0.01 Hz
    - Hint: the given values work well for a stationary setting. However, the given .sqlite file was recorded during a real-world field study when the participant was moving, hence the cutoff values are most likely too strict


In [4]:
def load_raw_signal_data_from_file(sqlite_file, platform_id, sensor_id, column_name):
    conn = sqlite3.connect(sqlite_file)
    query_raw_data = f'''SELECT time_iso, time_millis, {column_name} FROM sensordata
                         WHERE platform_id = {platform_id} and sensor_id = {sensor_id}'''
    raw_data = pd.read_sql_query(query_raw_data, conn)
    return raw_data

# Specify the path to your .sqlite file
file_path = '/content/drive/MyDrive/sensors/GeoAI_assignment_data.sqlite'

# Load the Skin Temperature data from the .sqlite file
skin_temp_data = load_raw_signal_data_from_file(file_path, platform_id=3, sensor_id=3, column_name='value_real')

# Visualize the Skin Temperature data
fig = go.Figure()
fig.add_trace(go.Scatter(x=skin_temp_data['time_iso'], y=skin_temp_data['value_real'], mode='lines'))
fig.update_layout(title='Skin Temperature Data', xaxis_title='Time', yaxis_title='Skin Temperature')
fig.show()


In [5]:
skin_temp_data

Unnamed: 0,time_iso,time_millis,value_real
0,2021-07-06T07:59:00.270,1625551140270,31.910000
1,2021-07-06T07:59:00.270,1625551140270,31.910000
2,2021-07-06T07:59:00.271,1625551140271,31.910000
3,2021-07-06T07:59:00.271,1625551140271,31.910000
4,2021-07-06T07:59:00.272,1625551140272,31.889999
...,...,...,...
8616,2021-07-06T08:34:54.047,1625553294047,30.450001
8617,2021-07-06T08:34:54.047,1625553294047,30.450001
8618,2021-07-06T08:34:54.047,1625553294047,30.450001
8619,2021-07-06T08:34:54.047,1625553294047,30.450001


In [6]:
# Check number of unique timestamps

len(skin_temp_data['time_iso'].unique())

2194

In [7]:
# Check for duplicates

skin_temp_data['time_iso'].duplicated().any()

True

In [8]:
sampling_frequency_Hz = 4

In [9]:
# get first timestamp in unix format
start_time_unix = skin_temp_data['time_millis'][0] # check if this is in seconds (10 digits) or milliseconds (13 digits)
start_time = pd.to_datetime(start_time_unix, unit="ms") # convert to ISO standard time format
length_GSR = math.ceil(len(skin_temp_data) / sampling_frequency_Hz) # divide length of sensor recordings by the sampling frequency (fs)

sampling_frequency_milliseconds = 1000 / sampling_frequency_Hz # determine millisecond interval for timestamp generation

end_time = start_time + pd.to_timedelta(length_GSR, unit = 's') # determine end time based on signal length (considers fs)

fourHz_ts = pd.date_range(start=start_time, end = end_time, freq=f"{sampling_frequency_milliseconds}ms") # generate the 4 Hz timestamps

length_difference = len(fourHz_ts) - len(skin_temp_data) # check if there is a difference between the lenght of the generated timestamps and the sensor measurements

skin_temp_data['datetime'] = fourHz_ts[:-length_difference] # remove the difference from the timestamps

skin_temp_data.rename(columns={"value_real": "skin_temp_data"}, inplace = True)  # rename column

skin_temp_data_subset = skin_temp_data[['datetime', 'skin_temp_data']] # return the two relevant columns

skin_temp_data_subset

Unnamed: 0,datetime,skin_temp_data
0,2021-07-06 05:59:00.270,31.910000
1,2021-07-06 05:59:00.520,31.910000
2,2021-07-06 05:59:00.770,31.910000
3,2021-07-06 05:59:01.020,31.910000
4,2021-07-06 05:59:01.270,31.889999
...,...,...
8616,2021-07-06 06:34:54.270,30.450001
8617,2021-07-06 06:34:54.520,30.450001
8618,2021-07-06 06:34:54.770,30.450001
8619,2021-07-06 06:34:55.020,30.450001


In [12]:
# Visualize the Skin Temperature data
fig = go.Figure()
fig.add_trace(go.Scatter(x=skin_temp_data_subset['datetime'], y=skin_temp_data_subset['skin_temp_data'], mode='lines'))
fig.update_layout(title='Skin Temperature Data Subset', xaxis_title='Time', yaxis_title='Skin Temperature')
fig.show()

In [15]:
# Check for duplicates again

skin_temp_data['datetime'].duplicated().any()

False

In [21]:
# retrieve original unix timestamp in correct format
skin_temp_data['unix_time_ms'] = (pd.to_datetime(skin_temp_data['datetime']) - pd.Timestamp("1970-01-01")) // pd.Timedelta('1ms')
skin_temp_data['unix_time_s'] = (pd.to_datetime(skin_temp_data['datetime']) - pd.Timestamp("1970-01-01")) // pd.Timedelta('1s')
skin_temp_data

Unnamed: 0,time_iso,time_millis,skin_temp_data,datetime,unix_time_ms,unix_time_s
0,2021-07-06T07:59:00.270,1625551140270,31.910000,2021-07-06 05:59:00.270,1625551140270,1625551140
1,2021-07-06T07:59:00.270,1625551140270,31.910000,2021-07-06 05:59:00.520,1625551140520,1625551140
2,2021-07-06T07:59:00.271,1625551140271,31.910000,2021-07-06 05:59:00.770,1625551140770,1625551140
3,2021-07-06T07:59:00.271,1625551140271,31.910000,2021-07-06 05:59:01.020,1625551141020,1625551141
4,2021-07-06T07:59:00.272,1625551140272,31.889999,2021-07-06 05:59:01.270,1625551141270,1625551141
...,...,...,...,...,...,...
8616,2021-07-06T08:34:54.047,1625553294047,30.450001,2021-07-06 06:34:54.270,1625553294270,1625553294
8617,2021-07-06T08:34:54.047,1625553294047,30.450001,2021-07-06 06:34:54.520,1625553294520,1625553294
8618,2021-07-06T08:34:54.047,1625553294047,30.450001,2021-07-06 06:34:54.770,1625553294770,1625553294
8619,2021-07-06T08:34:54.047,1625553294047,30.450001,2021-07-06 06:34:55.020,1625553295020,1625553295


In [23]:
# Filter out the first 5 minutes
start_time = skin_temp_data['datetime'].min()
end_time = start_time + pd.Timedelta(minutes=5)
skin_temp_data_filtered = skin_temp_data[(skin_temp_data['datetime'] >= start_time) & (skin_temp_data['datetime'] <= end_time)]

skin_temp_data_filtered

Unnamed: 0,time_iso,time_millis,skin_temp_data,datetime,unix_time_ms,unix_time_s
0,2021-07-06T07:59:00.270,1625551140270,31.910000,2021-07-06 05:59:00.270,1625551140270,1625551140
1,2021-07-06T07:59:00.270,1625551140270,31.910000,2021-07-06 05:59:00.520,1625551140520,1625551140
2,2021-07-06T07:59:00.271,1625551140271,31.910000,2021-07-06 05:59:00.770,1625551140770,1625551140
3,2021-07-06T07:59:00.271,1625551140271,31.910000,2021-07-06 05:59:01.020,1625551141020,1625551141
4,2021-07-06T07:59:00.272,1625551140272,31.889999,2021-07-06 05:59:01.270,1625551141270,1625551141
...,...,...,...,...,...,...
1196,2021-07-06T08:03:58.246,1625551438246,31.590000,2021-07-06 06:03:59.270,1625551439270,1625551439
1197,2021-07-06T08:03:58.246,1625551438246,31.590000,2021-07-06 06:03:59.520,1625551439520,1625551439
1198,2021-07-06T08:03:58.246,1625551438246,31.590000,2021-07-06 06:03:59.770,1625551439770,1625551439
1199,2021-07-06T08:03:58.246,1625551438246,31.590000,2021-07-06 06:04:00.020,1625551440020,1625551440


In [24]:
import plotly.express as px

px.line(data_frame=skin_temp_data_filtered, x = "time_iso", y = "skin_temp_data")

## First attempt

In [41]:
from scipy.signal import butter, filtfilt

# Define filter parameters
order = 2
lowpass_cutoff = 0.1
highpass_cutoff = 0.01
fs = 4  # Sampling frequency in Hz

# Compute the normalized cutoff frequencies
lowpass_cutoff_norm = lowpass_cutoff / (fs / 2)
highpass_cutoff_norm = highpass_cutoff / (fs / 2)

# Design the Butterworth filter
b, a = butter(order, [highpass_cutoff_norm, lowpass_cutoff_norm], btype='band')  # Modify the cutoff frequencies

# Apply the filter to the skin temperature data
filtered_skin_temp_data = filtfilt(b, a, skin_temp_data_filtered['skin_temp_data'])

# Create a new DataFrame with the filtered data
filtered_skin_temp_data_subset = pd.DataFrame({
    'datetime': skin_temp_data_filtered['datetime'],
    'filtered_skin_temp_data': filtered_skin_temp_data
})

filtered_skin_temp_data_subset

Unnamed: 0,datetime,filtered_skin_temp_data
0,2021-07-06 05:59:00.270,0.009596
1,2021-07-06 05:59:00.520,0.009650
2,2021-07-06 05:59:00.770,0.009754
3,2021-07-06 05:59:01.020,0.009926
4,2021-07-06 05:59:01.270,0.010184
...,...,...
1196,2021-07-06 06:03:59.270,0.008601
1197,2021-07-06 06:03:59.520,0.007726
1198,2021-07-06 06:03:59.770,0.006838
1199,2021-07-06 06:04:00.020,0.005950


In [42]:
# Visualize the Skin Temperature data
fig = go.Figure()
fig.add_trace(go.Scatter(x=filtered_skin_temp_data_subset['datetime'], y=filtered_skin_temp_data_subset['filtered_skin_temp_data'], mode='lines'))
fig.update_layout(title='Skin Temperature Data Subset', xaxis_title='Time', yaxis_title='Skin Temperature')
fig.show()

## Second attempt

In [43]:
from scipy.signal import butter, filtfilt

# Define filter parameters
order = 2
lowpass_cutoff = 0.2
highpass_cutoff = 0.05
fs = 4  # Sampling frequency in Hz

# Compute the normalized cutoff frequencies
lowpass_cutoff_norm = lowpass_cutoff / (fs / 2)
highpass_cutoff_norm = highpass_cutoff / (fs / 2)

# Design the Butterworth filter
b, a = butter(order, [highpass_cutoff_norm, lowpass_cutoff_norm], btype='band')  # Modify the cutoff frequencies

# Apply the filter to the skin temperature data
filtered_skin_temp_data = filtfilt(b, a, skin_temp_data_filtered['skin_temp_data'])

# Create a new DataFrame with the filtered data
filtered_skin_temp_data_subset = pd.DataFrame({
    'datetime': skin_temp_data_filtered['datetime'],
    'filtered_skin_temp_data': filtered_skin_temp_data
})

filtered_skin_temp_data_subset

Unnamed: 0,datetime,filtered_skin_temp_data
0,2021-07-06 05:59:00.270,-0.005445
1,2021-07-06 05:59:00.520,-0.007690
2,2021-07-06 05:59:00.770,-0.009730
3,2021-07-06 05:59:01.020,-0.011413
4,2021-07-06 05:59:01.270,-0.012598
...,...,...
1196,2021-07-06 06:03:59.270,-0.002117
1197,2021-07-06 06:03:59.520,-0.002179
1198,2021-07-06 06:03:59.770,-0.002434
1199,2021-07-06 06:04:00.020,-0.002877


In [44]:
# Visualize the Skin Temperature data
fig = go.Figure()
fig.add_trace(go.Scatter(x=filtered_skin_temp_data_subset['datetime'], y=filtered_skin_temp_data_subset['filtered_skin_temp_data'], mode='lines'))
fig.update_layout(title='Skin Temperature Data Subset', xaxis_title='Time', yaxis_title='Skin Temperature')
fig.show()

## Third attempt

In [50]:
from scipy.signal import butter, filtfilt

# Define filter parameters
order = 2
lowpass_cutoff = 0.1
highpass_cutoff = 0.05
fs = 4  # Sampling frequency in Hz

# Compute the normalized cutoff frequencies
lowpass_cutoff_norm = lowpass_cutoff / (fs / 2)
highpass_cutoff_norm = highpass_cutoff / (fs / 2)

# Design the Butterworth filter
b, a = butter(order, [highpass_cutoff_norm, lowpass_cutoff_norm], btype='band')  # Modify the cutoff frequencies

# Apply the filter to the skin temperature data
filtered_skin_temp_data = filtfilt(b, a, skin_temp_data_filtered['skin_temp_data'])

# Create a new DataFrame with the filtered data
filtered_skin_temp_data_subset = pd.DataFrame({
    'datetime': skin_temp_data_filtered['datetime'],
    'filtered_skin_temp_data': filtered_skin_temp_data
})

filtered_skin_temp_data_subset

Unnamed: 0,datetime,filtered_skin_temp_data
0,2021-07-06 05:59:00.270,-0.007915
1,2021-07-06 05:59:00.520,-0.008211
2,2021-07-06 05:59:00.770,-0.008417
3,2021-07-06 05:59:01.020,-0.008522
4,2021-07-06 05:59:01.270,-0.008516
...,...,...
1196,2021-07-06 06:03:59.270,0.000584
1197,2021-07-06 06:03:59.520,0.000569
1198,2021-07-06 06:03:59.770,0.000536
1199,2021-07-06 06:04:00.020,0.000488


In [51]:
# Visualize the Skin Temperature data
fig = go.Figure()
fig.add_trace(go.Scatter(x=filtered_skin_temp_data_subset['datetime'], y=filtered_skin_temp_data_subset['filtered_skin_temp_data'], mode='lines'))
fig.update_layout(title='Skin Temperature Data Subset', xaxis_title='Time', yaxis_title='Skin Temperature')
fig.show()