In [1]:
# get train files
import glob

train_folders = ['CAR', 'UAH', 'UJITI', 'UJIUB']

train_files_names = []
n_files = 0
for building in train_folders:
    cur_file_names = glob.glob('train/' + building + '/log*')
    train_files_names.append(sorted(cur_file_names))
    n_files += len(cur_file_names)

print(train_files_names)
print('There are %d training files' % n_files)

[['train/CAR/logfile_CAR_R1_S3.txt', 'train/CAR/logfile_CAR_R1_S3mini.txt', 'train/CAR/logfile_CAR_R2_S3.txt', 'train/CAR/logfile_CAR_R2_S4.txt'], ['train/UAH/logfile_UAH_R1_S3.txt', 'train/UAH/logfile_UAH_R1_S4.txt', 'train/UAH/logfile_UAH_R2_S3.txt', 'train/UAH/logfile_UAH_R2_S4.txt', 'train/UAH/logfile_UAH_R4_S3.txt', 'train/UAH/logfile_UAH_R4_S4.txt'], ['train/UJITI/logfile_UJITI_R1_NEXUS5.txt', 'train/UJITI/logfile_UJITI_R2_NEXUS5.txt'], ['train/UJIUB/logfile_UJIUB_R1n_S3.txt', 'train/UJIUB/logfile_UJIUB_R1r_S3.txt', 'train/UJIUB/logfile_UJIUB_R2n_S3.txt', 'train/UJIUB/logfile_UJIUB_R2r_S3.txt', 'train/UJIUB/logfile_UJIUB_R3_S3.txt']]
There are 17 training files


In [2]:
# select file
cur_train_name = train_files_names[1][2]
cur_train_name

'train/UAH/logfile_UAH_R2_S3.txt'

In [3]:
# import files into dataframe
import numpy as np
import pandas as pd

track = pd.read_csv(cur_train_name, sep=';', engine='c', names=range(11))
print(np.unique(track[0]))

['ACCE' 'AHRS' 'GNSS' 'GYRO' 'LIGH' 'MAGN' 'POSI' 'PRES' 'SOUN' 'WIFI']


In [4]:
# get pressure data
pres = track.iloc[track[0].values == 'PRES']
pres = pres.dropna(axis=1)
pres = pres.drop(0, axis=1)
pres = pres.astype(float)
pres.columns = ['Apptime', 'Sensortime', 'Pressure', 'Acc']
pres = pres.drop('Acc', axis=1)
pres.head()

Unnamed: 0,Apptime,Sensortime,Pressure
10,0.019,1459416000.0,942.6626
29,0.078,1459416000.0,942.6711
48,0.098,1459416000.0,942.7488
67,0.14,1459416000.0,942.6968
86,0.179,1459416000.0,942.5725


In [5]:
# filter time series
def holt_filter(sensor_data, time_data, alpha, beta):
    sensor_data = np.array(sensor_data).astype(float)
    time_data = np.array(time_data).astype(float)
    holt_data = np.zeros((sensor_data.shape[0]))
    holt_data[0] = sensor_data[0]
    trend = 0
    for i in range(1, sensor_data.shape[0]):
        dt = time_data[i] - time_data[i-1]
        holt_data[i] = alpha * sensor_data[i] + (1 - alpha) * (holt_data[i-1] + trend * dt)
        trend = beta * (holt_data[i] - holt_data[i-1]) + (1 - beta) * trend
    return holt_data


In [6]:
# filter pressure
# %matplotlib inline
# import matplotlib.pyplot as plt

pres['holt_pres'] = holt_filter(pres['Pressure'], pres['Sensortime'], 0.02, 0.02)
# plt.plot(pres['Sensortime'], pres['Pressure'], 'b', pres['Sensortime'], pres['holt_pres'], 'r')
pres.head()

Unnamed: 0,Apptime,Sensortime,Pressure,holt_pres
10,0.019,1459416000.0,942.6626,942.6626
29,0.078,1459416000.0,942.6711,942.66277
48,0.098,1459416000.0,942.7488,942.664491
67,0.14,1459416000.0,942.6968,942.665138
86,0.179,1459416000.0,942.5725,942.663288


In [7]:
# Get position
posi = track.iloc[track[0].values == 'POSI']
posi = posi.dropna(axis=1)
posi = posi.drop(0, axis=1)
posi = posi.astype(float)
posi.columns = ['Apptime', 'Sensortime', 'Lat', 'Lon', 'FloorID', 'BuildingID']
posi.head()

Unnamed: 0,Apptime,Sensortime,Lat,Lon,FloorID,BuildingID
4142,8.618,1.0,40.513608,-3.34883,0.0,20.0
16724,34.656,2.0,40.513552,-3.34892,0.0,20.0
24993,51.802,3.0,40.513356,-3.34923,0.0,20.0
32452,67.263,4.0,40.513288,-3.34934,0.0,20.0
39755,82.372,5.0,40.513284,-3.34934,1.0,20.0


In [8]:
# get gnss data
gnss = track.iloc[track[0].values == 'GNSS']
gnss = gnss.dropna(axis=1)
gnss = gnss.drop(0, axis=1)
gnss = gnss.astype(float)
gnss.columns = ['Apptime', 'Sensortime', 'Lat', 'Lon', 'Alt', 'bearing', 'Acc', 'Speed', 'SatUsed', 'SatViewed']
gnss.head()

Unnamed: 0,Apptime,Sensortime,Lat,Lon,Alt,bearing,Acc,Speed,SatUsed,SatViewed
369,0.775,1459416000.0,40.512847,-3.348497,668.373,212.192,12.0,0.0,16.0,12.0
384,0.806,1459416000.0,40.512847,-3.348497,668.373,212.192,12.0,0.0,16.0,12.0
858,1.799,1459416000.0,40.512847,-3.348497,668.372,212.192,12.0,0.0,16.0,12.0
866,1.815,1459416000.0,40.512847,-3.348497,668.372,212.192,12.0,0.0,16.0,12.0
1097,2.302,1459416000.0,40.512668,-3.348566,0.0,0.0,21.0,0.0,16.0,12.0


In [9]:
# filter gnss

In [10]:
# Get change in floors
floor_change = posi['FloorID'].iloc[1:].values - posi['FloorID'].iloc[:-1].values
floor_change = np.hstack((0, floor_change))
posi['FloorChange'] = floor_change
posi.head()

Unnamed: 0,Apptime,Sensortime,Lat,Lon,FloorID,BuildingID,FloorChange
4142,8.618,1.0,40.513608,-3.34883,0.0,20.0,0.0
16724,34.656,2.0,40.513552,-3.34892,0.0,20.0,0.0
24993,51.802,3.0,40.513356,-3.34923,0.0,20.0,0.0
32452,67.263,4.0,40.513288,-3.34934,0.0,20.0,0.0
39755,82.372,5.0,40.513284,-3.34934,1.0,20.0,1.0


In [11]:
# Interpolate pressure for position
import scipy.interpolate as interpolate

f_int = interpolate.interp1d(pres['Apptime'], pres['holt_pres'])
posi['interp_pres'] = f_int(posi['Apptime'].values)
posi.head()

Unnamed: 0,Apptime,Sensortime,Lat,Lon,FloorID,BuildingID,FloorChange,interp_pres
4142,8.618,1.0,40.513608,-3.34883,0.0,20.0,0.0,942.720641
16724,34.656,2.0,40.513552,-3.34892,0.0,20.0,0.0,942.618454
24993,51.802,3.0,40.513356,-3.34923,0.0,20.0,0.0,942.471261
32452,67.263,4.0,40.513288,-3.34934,0.0,20.0,0.0,942.399435
39755,82.372,5.0,40.513284,-3.34934,1.0,20.0,1.0,942.373046


In [12]:
# create wifi data frames
wifi = track.iloc[track[0].values == 'WIFI']
wifi = wifi.drop([0, 3], axis=1)
wifi = wifi.dropna(axis=1)
wifi.columns = ['Apptime', 'Sensortime', 'MAC', 'dBm']
print(wifi.head())

      Apptime  Sensortime                MAC   dBm
1137    2.381   17707.501  04:bd:88:0d:77:a1 -79.0
1138    2.381   17707.501  04:bd:88:0d:77:a0 -79.0
1139    2.381   17707.501  04:bd:88:50:4b:80 -77.0
1140    2.381   17707.501  04:bd:88:0d:c3:01 -83.0
1141    2.381   17707.501  04:bd:88:0d:c3:00 -82.0


In [13]:
# Pad ground truth
posi_pad_start = posi.iloc[0]
posi_pad_start.at['Apptime'] = 0
posi_pad_end = posi.iloc[-1]
posi_pad_end.at['Apptime'] = track[1].values[-1]
posi_array = np.vstack((posi_pad_start.values.reshape((1, posi.shape[1])), 
                        posi.values, 
                        posi_pad_end.values.reshape((1, posi.shape[1]))))
pad_posi = pd.DataFrame(posi_array, columns=posi.columns.values)
print(pad_posi.head())

   Apptime  Sensortime        Lat      Lon  FloorID  BuildingID  FloorChange  \
0    0.000         1.0  40.513608 -3.34883      0.0        20.0          0.0   
1    8.618         1.0  40.513608 -3.34883      0.0        20.0          0.0   
2   34.656         2.0  40.513552 -3.34892      0.0        20.0          0.0   
3   51.802         3.0  40.513356 -3.34923      0.0        20.0          0.0   
4   67.263         4.0  40.513288 -3.34934      0.0        20.0          0.0   

   interp_pres  
0   942.720641  
1   942.720641  
2   942.618454  
3   942.471261  
4   942.399435  


In [17]:
interp_ground_truth_lat = interpolate.interp1d(pad_posi['Apptime'], pad_posi['Lat'])
wifi['interp_lat'] = interp_ground_truth_lat(wifi['Apptime'])
interp_ground_truth_lon = interpolate.interp1d(pad_posi['Apptime'], pad_posi['Lon'])
wifi['interp_lon'] = interp_ground_truth_lon(wifi['Apptime'])
wifi.head()

Unnamed: 0,Apptime,Sensortime,MAC,dBm,interp_lat,interp_lon
1137,2.381,17707.501,04:bd:88:0d:77:a1,-79.0,40.513608,-3.348830
1138,2.381,17707.501,04:bd:88:0d:77:a0,-79.0,40.513608,-3.348830
1139,2.381,17707.501,04:bd:88:50:4b:80,-77.0,40.513608,-3.348830
1140,2.381,17707.501,04:bd:88:0d:c3:01,-83.0,40.513608,-3.348830
1141,2.381,17707.501,04:bd:88:0d:c3:00,-82.0,40.513608,-3.348830
1142,2.381,17707.501,04:bd:88:50:4b:90,-88.0,40.513608,-3.348830
1143,2.381,17703.502,04:bd:88:50:4b:91,-88.0,40.513608,-3.348830
1144,2.381,17707.501,04:bd:88:50:4b:81,-77.0,40.513608,-3.348830
1145,2.381,17703.502,04:bd:88:50:40:71,-88.0,40.513608,-3.348830
1146,2.381,17703.502,04:bd:88:0e:0f:41,-90.0,40.513608,-3.348830


In [None]:
# Dummy wifi