In [1]:
import numpy as np
import pandas as pd

import pickle
import os
from pathlib import Path
import glob
import json
from tqdm.notebook import tqdm

import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import plotly
import plotly.graph_objs as go
from PIL import Image
#%matplotlib inline
plotly.offline.init_notebook_mode(connected=True)

sns.set_style('darkgrid')

from scipy.interpolate import interp1d

from xyz10.io_f_mod import read_data_file
from xyz10.visualize_f_mod import visualize_trajectory
from xyz10.compute_f_mod import compute_step_positions, compute_step_positions_mod, split_ts_seq
from xyz10.compute_f_mod import compute_steps, compute_headings, compute_stride_length, compute_step_heading, compute_rel_positions
from xyz10.compute_f_mod import correct_positions,  correct_positions_mod

Downsample Rotation, Magnetic, and (to be calculated) Relative Steps (x,y) into WiFi timestamps   

> Setting bssid list

In [2]:
test_bssid = pickle.load(open("./data_out/test_bssid_ranks.pkl", "rb"))
train_bssid = pickle.load(open("./data_out/train_24IDs_standardF_bssid_ranks.pkl", "rb"))

bssid = {}
for site in test_bssid["count"]:
    _test_bssid = test_bssid["count"][site].bssid
    _train_bssid = train_bssid["count"][site].bssid
    _mix_bssid = pd.concat([_test_bssid,  _train_bssid]).unique()
    
    _list = _mix_bssid.tolist()
    
    bssid[site] = _list

> Interpolated STEPS (snap to steps; where steps 0.5s/wifi 0.5-1s dt = expected error is 0.5step=0.25m): Convert/Append/Save each trace per site to DataFrame: Features=BSSID/RSSI + DELAYS + F + M + R + rx + ry + TRACEID + rel time  
> Targets=XY (BSSID not in trace gets min-RSSI and max-DELAY per site) 

In [3]:
wifi_data_train = pickle.load(open("./data_out/train_24IDs_standardF_wifi.pkl", "rb"))

In [4]:
accel_data = pickle.load(open("./data_out/train_24IDs_standardF_motion.pkl", "rb"))

rotate_data = pickle.load(open("./data_out/train_24IDs_standardF_rotate.pkl", "rb"))
magnetic_data = pickle.load(open("./data_out/train_24IDs_standardF_magnetic.pkl", "rb"))

steps_data = pickle.load(open("./data_out/train_24IDs_standardF_steps.pkl", "rb"))

In [7]:
num_s = 0
for site_id in ["5da1389e4db8ce0c98bd0547"]:#tqdm(bssid.keys()): # over sites
    num_s += 1
    print(f"Processing #{num_s}: Site-{site_id} with {len(steps_data[site_id])} traces and {len(bssid[site_id])} bssids")
    site_trace_record_wifi = []

    col_names = []
    for feature_bssid in bssid[site_id]:  # create features-columns list
        col_names.append(feature_bssid)
        col_names.append(feature_bssid+"_D")

    for trace_id in tqdm(steps_data[site_id]):  # over traces

        wifi_record = wifi_data_train[site_id][trace_id][1].copy()
        steps_record = steps_data[site_id][trace_id].copy()

        accel_record = accel_data[site_id][trace_id][1]
        rotate_record = rotate_data[site_id][trace_id][1]
        magnetic_record = magnetic_data[site_id][trace_id][1]       

        trace_record_wifi = np.zeros((steps_record[1].shape[0], len(col_names)))

        for col_id, _ in enumerate(col_names):
            if col_id%2 == 0:
                trace_record_wifi[:, col_id] = int(train_bssid["min_rssi"][site_id])
            else:
                trace_record_wifi[:, col_id] = int(train_bssid["max_delay"][site_id])

        trace_record_wifi = pd.DataFrame(trace_record_wifi, columns=col_names)

        # time, absolute x/y positions of steps, floor number
        trace_record_wifi[["time", "x", "y"]] = steps_record[1][["time", "x", "y"]].copy()
        trace_record_wifi["f"] = steps_record[0].FloorName[0]
        timestamps = trace_record_wifi.time.to_numpy()
        # magnetic field intensity
        magnetic_intensity = np.sqrt(np.square(magnetic_record.x_axis.to_numpy()) + np.square(magnetic_record.y_axis.to_numpy()) + np.square(magnetic_record.z_axis.to_numpy()))
        trace_record_wifi["m"] = interp1d(magnetic_record.time.to_numpy(), magnetic_intensity, kind="linear", fill_value="extrapolate", copy=False, assume_sorted=True)(timestamps)
        # z_axis rotation angle (i.e. inplane direction)
        trace_record_wifi["r"] = interp1d(rotate_record.time.to_numpy(), rotate_record.z_axis.to_numpy(), kind="linear", fill_value="extrapolate", copy=False, assume_sorted=True)(timestamps)
        # relative x/y steps positions
        step_timestamps, step_indexs, step_acce_max_mins = compute_steps(accel_record[["time", "x_axis", "y_axis", "z_axis"]].to_numpy())
        rel_step_positions = pd.DataFrame(compute_rel_positions(compute_stride_length(step_acce_max_mins),
                                                                compute_step_heading(step_timestamps,
                                                                                     compute_headings(rotate_record[["time", "x_axis", "y_axis", "z_axis"]].to_numpy()))), 
                                          columns=["time", "x", "y"])
        # recalculate relative steps for a given timestamps(cumsum->interpolate->differences)
        _rt = np.insert(rel_step_positions.time.to_numpy(), 0, 0.0)
        rx_cum = interp1d(_rt, np.insert(rel_step_positions.x.to_numpy(), 0, 0.0).cumsum(), kind="linear", fill_value="extrapolate", copy=False, assume_sorted=True)(timestamps)
        ry_cum = interp1d(_rt, np.insert(rel_step_positions.y.to_numpy(), 0, 0.0).cumsum(), kind="linear", fill_value="extrapolate", copy=False, assume_sorted=True)(timestamps)
        trace_record_wifi["rx"] = np.diff(rx_cum, prepend=0.0) 
        trace_record_wifi["ry"] = np.diff(ry_cum, prepend=0.0) 
            
        # trace id
        trace_record_wifi["trace"] = trace_id
        # wifi rssi and delays
        # Take wifi data the nearest to each waypoint
        for j, t0 in enumerate(trace_record_wifi.time):  # over times in trace/for each waypoint

            wifi_record["time0"] = wifi_record.time - t0
            wifi_record["time0"] = wifi_record.time0.apply(abs)

            wifi_min = wifi_record[wifi_record.time0 == wifi_record.time0.min()]
            wifi_min_bssid_list = wifi_min.bssid.tolist()
            wifi_min_delay_list = [col+"_D" for col in wifi_min_bssid_list]

            trace_record_wifi.loc[j, wifi_min_bssid_list] = wifi_min.rssi.to_numpy()
            trace_record_wifi.loc[j, wifi_min_delay_list] = wifi_min.delay.to_numpy().astype(int)    

        site_trace_record_wifi.append(trace_record_wifi)
        #break  # only fist trace

    site_trace_record_wifi = pd.concat(site_trace_record_wifi, ignore_index=True)
        
    # saving model for a given site_id
    with open(f"./data_out/full24/{site_id}_10k_mix-counts.pkl", "wb") as f:
        pickle.dump(site_trace_record_wifi, f)
    #break  # only first site

Processing #1: Site-5da1389e4db8ce0c98bd0547 with 514 traces and 1041 bssids


  0%|          | 0/514 [00:00<?, ?it/s]

Convert Train data into sequences of 2-5-10-15-20 records (for LSTM)

In [2]:
site_ids = ["5d27075f03f801723c2e360f", "5dc8cea7659e181adb076a3f", "5dbc1d84c1eb61796cf7c010", "5da138b74db8ce0c98bd4774",
            "5d2709c303f801723c3299ee", "5da958dd46f8266d0737457b", "5d2709bb03f801723c32852c", "5d27096c03f801723c31e5e0",
            "5a0546857ecc773753327266", "5c3c44b80379370013e0fd2b", "5d27097f03f801723c320d97", "5da1382d4db8ce0c98bbe92e",
            "5d2709b303f801723c327472", "5d2709d403f801723c32bd39", "5da138764db8ce0c98bcaa46", "5da1383b4db8ce0c98bc11ab",
            "5d2709e003f801723c32d896", "5da138754db8ce0c98bca82f", "5da1389e4db8ce0c98bd0547", "5da138314db8ce0c98bbf3a0",
            "5d2709a003f801723c3251bf", "5d27099f03f801723c32511d", "5da138364db8ce0c98bc00f1", "5da138274db8ce0c98bbd3d2"]
#site_ids = ["5da1389e4db8ce0c98bd0547"]

len_seq = 20  # number ot time stamps per sequence, with ~2/second, e.g. len5/10/20 = "2.5/5/10sec duration

num_s = 0
for site_id in tqdm(site_ids): # over sites
    num_s += 1
    print(f"Processing #{num_s}: Site-{site_id}")
    record = pickle.load(open(f"./data_out/full24/{site_id}_10k_mix-counts.pkl", "rb"))
    
    main_dfs = []
    gr = record.groupby(["trace"])
    for group in tqdm(gr.groups):  # loop over traces
        trace_dfs = []   #list of sequences/DataFrames

        _group = gr.get_group(group).copy()
        #_ = _group.pop("time")
        #_ = _group.pop("trace")
        _len = _group.shape[0]
        n_seq = _len//len_seq
        
        # loop over every sequence for a given trace (skip redundant data)
        for i in range(n_seq):
            post_rec = _group[len_seq*i:len_seq*(i+1)].copy().reset_index(drop=True)
            post_rec.loc[0, ["rx", "ry"]] = 0.0
            post_rec["rx_cum"] = post_rec["rx"].cumsum()
            post_rec["ry_cum"] = post_rec["ry"].cumsum()

            trace_dfs.append(post_rec)

        if len(trace_dfs)>0:  # some sequences are available
            main_dfs.append(pd.concat(trace_dfs).reset_index(drop=True)) # cobine list of dataframes into single dataframe

        #break

    main_dfs = pd.concat(main_dfs).reset_index(drop=True) if len(main_dfs)>0 else None
    print(f"Data converted: {round(100*len(main_dfs)/len(record), 1)}%")
    # saving model for a given site_id
    with open(f"./data_out/full24/seq{len_seq}/{site_id}_10k_mix-counts.pkl", "wb") as f:
        pickle.dump(main_dfs, f)
    

  0%|          | 0/24 [00:00<?, ?it/s]

Processing #1: Site-5d27075f03f801723c2e360f


  0%|          | 0/1141 [00:00<?, ?it/s]

Data converted: 85.2%
Processing #2: Site-5dc8cea7659e181adb076a3f


  0%|          | 0/745 [00:00<?, ?it/s]

Data converted: 87.9%
Processing #3: Site-5dbc1d84c1eb61796cf7c010


  0%|          | 0/793 [00:00<?, ?it/s]

Data converted: 88.1%
Processing #4: Site-5da138b74db8ce0c98bd4774


  0%|          | 0/748 [00:00<?, ?it/s]

Data converted: 87.5%
Processing #5: Site-5d2709c303f801723c3299ee


  0%|          | 0/664 [00:00<?, ?it/s]

Data converted: 80.9%
Processing #6: Site-5da958dd46f8266d0737457b


  0%|          | 0/552 [00:00<?, ?it/s]

Data converted: 89.3%
Processing #7: Site-5d2709bb03f801723c32852c


  0%|          | 0/264 [00:00<?, ?it/s]

Data converted: 94.0%
Processing #8: Site-5d27096c03f801723c31e5e0


  0%|          | 0/351 [00:00<?, ?it/s]

Data converted: 82.4%
Processing #9: Site-5a0546857ecc773753327266


  0%|          | 0/514 [00:00<?, ?it/s]

Data converted: 80.9%
Processing #10: Site-5c3c44b80379370013e0fd2b


  0%|          | 0/385 [00:00<?, ?it/s]

Data converted: 87.6%
Processing #11: Site-5d27097f03f801723c320d97


  0%|          | 0/404 [00:00<?, ?it/s]

Data converted: 89.2%
Processing #12: Site-5da1382d4db8ce0c98bbe92e


  0%|          | 0/338 [00:00<?, ?it/s]

Data converted: 89.1%
Processing #13: Site-5d2709b303f801723c327472


  0%|          | 0/639 [00:00<?, ?it/s]

Data converted: 80.9%
Processing #14: Site-5d2709d403f801723c32bd39


  0%|          | 0/361 [00:00<?, ?it/s]

Data converted: 85.2%
Processing #15: Site-5da138764db8ce0c98bcaa46


  0%|          | 0/513 [00:00<?, ?it/s]

Data converted: 83.0%
Processing #16: Site-5da1383b4db8ce0c98bc11ab


  0%|          | 0/239 [00:00<?, ?it/s]

Data converted: 93.2%
Processing #17: Site-5d2709e003f801723c32d896


  0%|          | 0/343 [00:00<?, ?it/s]

Data converted: 88.5%
Processing #18: Site-5da138754db8ce0c98bca82f


  0%|          | 0/205 [00:00<?, ?it/s]

Data converted: 85.5%
Processing #19: Site-5da1389e4db8ce0c98bd0547


  0%|          | 0/514 [00:00<?, ?it/s]

Data converted: 74.2%
Processing #20: Site-5da138314db8ce0c98bbf3a0


  0%|          | 0/428 [00:00<?, ?it/s]

Data converted: 70.3%
Processing #21: Site-5d2709a003f801723c3251bf


  0%|          | 0/310 [00:00<?, ?it/s]

Data converted: 65.5%
Processing #22: Site-5d27099f03f801723c32511d


  0%|          | 0/131 [00:00<?, ?it/s]

Data converted: 87.2%
Processing #23: Site-5da138364db8ce0c98bc00f1


  0%|          | 0/82 [00:00<?, ?it/s]

Data converted: 85.7%
Processing #24: Site-5da138274db8ce0c98bbd3d2


  0%|          | 0/213 [00:00<?, ?it/s]

Data converted: 67.2%


Create Train data with ranked BSSID/RSSI/Delay (for DNNv2 -> LSTMv2 + LSTMv3)

In [2]:
wifi_data_train = pickle.load(open("./data_out/train_24IDs_standardF_wifi.pkl", "rb"))

In [3]:
rotate_data = pickle.load(open("./data_out/train_24IDs_standardF_rotate.pkl", "rb"))
magnetic_data = pickle.load(open("./data_out/train_24IDs_standardF_magnetic.pkl", "rb"))

steps_data = pickle.load(open("./data_out/train_24IDs_standardF_steps.pkl", "rb"))

In [18]:
# create list of intersectional train/test bssids
train_bssid = pickle.load(open("./data_out/train_24IDs_standardF_bssid_ranks.pkl", "rb"))
test_bssid = pickle.load(open("./data_out/test_bssid_ranks.pkl", "rb"))

bssid = {}
for site_id in test_bssid["count"]:
    _train_bssid = train_bssid["count"][site_id].bssid.tolist()
    _test_bssid = test_bssid["count"][site_id].bssid.tolist()
    _mix = list(set(_train_bssid) & set(_test_bssid))   
    bssid[site_id] = _mix

num_bssid = 100
tlim = 3000  # in miliseconds. If 0= no timelimit
col_names = [[f"bssid_{i}", f"rssi_{i}", f"d_{i}"] for i in range(num_bssid)] 
col_names = [item for sublist in col_names for item in sublist]

num_s = 0
for site_id in tqdm(bssid.keys()): # over sites
    num_s += 1
    print(f"Processing #{num_s}: Site-{site_id} with {len(steps_data[site_id])} traces and {len(bssid[site_id])} bssids")
    
    site_trace_record_wifi = []
    for trace_id in tqdm(wifi_data_train[site_id].keys()):  # over traces

        wifi_record = wifi_data_train[site_id][trace_id][1].copy()  # bssid vs rssi/delays
        steps_record = steps_data[site_id][trace_id].copy()  # x,y
        rotate_record = rotate_data[site_id][trace_id][1]  # r
        magnetic_record = magnetic_data[site_id][trace_id][1] # m      

        # initialize dataframe for a given trace
        timestamps = wifi_record.time.unique()
        if len(timestamps) < 1: continue  # no wifi data in file
            
        trace_record_wifi = pd.DataFrame(columns=col_names).astype(int)
        trace_record_wifi["time"] = timestamps
####### Combine rssi/delays vs BSSIDs   ################################### 
        wifi_record["rank"] = wifi_record.rssi*(1 if tlim > 1 else wifi_record.delay)   # or np.log10(wifi_record.delay)
        for i_t, t in enumerate(timestamps):  # over times in trace/for each wifi record
            
            if tlim > 1:
                wifi_record_t = wifi_record[(wifi_record.time == t) & (wifi_record.delay < tlim)].sort_values(by="rank", ascending=False, ignore_index=True)   # bssid/rssi/delay
            else:
                wifi_record_t = wifi_record[wifi_record.time == t].sort_values(by="rank", ascending=False, ignore_index=True)   # bssid/rssi/delay
            num_bssid_t = len(wifi_record_t)
            
            if num_bssid > num_bssid_t: # generate random bssid names and fill with extreme values
                dummy_bssid = np.arange(num_bssid-num_bssid_t).astype(str)
                wifi_record_dummy = pd.DataFrame({"bssid": dummy_bssid, 
                                                  "rssi": train_bssid["min_rssi"][site_id], 
                                                  "delay": tlim if tlim > 1 else train_bssid["max_delay"][site_id], 
                                                  "rank": train_bssid["min_rssi"][site_id]*(1 if tlim > 1 else train_bssid["max_delay"][site_id])})
                wifi_record_t = pd.concat([wifi_record_t, wifi_record_dummy], ignore_index=True)
             
            # fill with wifi data
            for i_rank in range(num_bssid):
                trace_record_wifi.loc[i_t, f"bssid_{i_rank}"] = wifi_record_t.bssid[i_rank]
                trace_record_wifi.loc[i_t, f"rssi_{i_rank}"] = wifi_record_t.rssi[i_rank]
                trace_record_wifi.loc[i_t, f"d_{i_rank}"] = wifi_record_t.delay[i_rank]
############################################################################################          

        # absolute x/y positions of steps, floor number
        trace_record_wifi["f"] = steps_record[0].FloorName[0]
        trace_record_wifi["x"] = interp1d(steps_record[1].time.to_numpy(), steps_record[1].x.to_numpy(), kind="linear", fill_value="extrapolate", copy=False, assume_sorted=True)(timestamps)
        trace_record_wifi["y"] = interp1d(steps_record[1].time.to_numpy(), steps_record[1].y.to_numpy(), kind="linear", fill_value="extrapolate", copy=False, assume_sorted=True)(timestamps)
        # magnetic field intensity
        magnetic_intensity = np.sqrt(np.square(magnetic_record.x_axis.to_numpy()) + np.square(magnetic_record.y_axis.to_numpy()) + np.square(magnetic_record.z_axis.to_numpy()))
        trace_record_wifi["m"] = interp1d(magnetic_record.time.to_numpy(), magnetic_intensity, kind="linear", fill_value="extrapolate", copy=False, assume_sorted=True)(timestamps)
        # z_axis rotation angle (i.e. inplane direction)
        trace_record_wifi["r"] = interp1d(rotate_record.time.to_numpy(), rotate_record.z_axis.to_numpy(), kind="linear", fill_value="extrapolate", copy=False, assume_sorted=True)(timestamps)
        # trace_id
        trace_record_wifi["trace"] = trace_id
        
        site_trace_record_wifi.append(trace_record_wifi)
        #break  # only fist trace

    site_trace_record_wifi = pd.concat(site_trace_record_wifi, ignore_index=True)
        
    # saving model for a given site_id
    with open(f"./data_out/wifi24/{site_id}_interBSSID_limit{int(tlim/1000)}s.pkl", "wb") as f:
        pickle.dump(site_trace_record_wifi, f)
    #break  # only first site

  0%|          | 0/24 [00:00<?, ?it/s]

Processing #1: Site-5da1389e4db8ce0c98bd0547 with 514 traces and 424 bssids


  0%|          | 0/514 [00:00<?, ?it/s]

Processing #2: Site-5da138b74db8ce0c98bd4774 with 748 traces and 2358 bssids


  0%|          | 0/748 [00:00<?, ?it/s]

Processing #3: Site-5da138764db8ce0c98bcaa46 with 513 traces and 1218 bssids


  0%|          | 0/513 [00:00<?, ?it/s]

Processing #4: Site-5dbc1d84c1eb61796cf7c010 with 793 traces and 3594 bssids


  0%|          | 0/793 [00:00<?, ?it/s]

Processing #5: Site-5da1383b4db8ce0c98bc11ab with 239 traces and 1087 bssids


  0%|          | 0/239 [00:00<?, ?it/s]

Processing #6: Site-5d2709a003f801723c3251bf with 310 traces and 868 bssids


  0%|          | 0/310 [00:00<?, ?it/s]

Processing #7: Site-5a0546857ecc773753327266 with 514 traces and 2399 bssids


  0%|          | 0/514 [00:00<?, ?it/s]

Processing #8: Site-5da138274db8ce0c98bbd3d2 with 213 traces and 290 bssids


  0%|          | 0/213 [00:00<?, ?it/s]

Processing #9: Site-5d2709b303f801723c327472 with 639 traces and 1146 bssids


  0%|          | 0/639 [00:00<?, ?it/s]

Processing #10: Site-5da958dd46f8266d0737457b with 552 traces and 2863 bssids


  0%|          | 0/552 [00:00<?, ?it/s]

Processing #11: Site-5d2709bb03f801723c32852c with 264 traces and 1516 bssids


  0%|          | 0/264 [00:00<?, ?it/s]

Processing #12: Site-5d2709d403f801723c32bd39 with 361 traces and 1603 bssids


  0%|          | 0/361 [00:00<?, ?it/s]

Processing #13: Site-5dc8cea7659e181adb076a3f with 745 traces and 2464 bssids


  0%|          | 0/745 [00:00<?, ?it/s]

Processing #14: Site-5d27096c03f801723c31e5e0 with 351 traces and 1033 bssids


  0%|          | 0/351 [00:00<?, ?it/s]

Processing #15: Site-5d2709c303f801723c3299ee with 664 traces and 4325 bssids


  0%|          | 0/664 [00:00<?, ?it/s]

Processing #16: Site-5da138314db8ce0c98bbf3a0 with 428 traces and 898 bssids


  0%|          | 0/428 [00:00<?, ?it/s]

Processing #17: Site-5da1382d4db8ce0c98bbe92e with 338 traces and 1781 bssids


  0%|          | 0/338 [00:00<?, ?it/s]

Processing #18: Site-5d27075f03f801723c2e360f with 1141 traces and 1234 bssids


  0%|          | 0/1141 [00:00<?, ?it/s]

Processing #19: Site-5da138754db8ce0c98bca82f with 205 traces and 1091 bssids


  0%|          | 0/205 [00:00<?, ?it/s]

Processing #20: Site-5d2709e003f801723c32d896 with 343 traces and 954 bssids


  0%|          | 0/343 [00:00<?, ?it/s]

Processing #21: Site-5d27097f03f801723c320d97 with 404 traces and 1114 bssids


  0%|          | 0/404 [00:00<?, ?it/s]

Processing #22: Site-5da138364db8ce0c98bc00f1 with 82 traces and 500 bssids


  0%|          | 0/82 [00:00<?, ?it/s]

Processing #23: Site-5c3c44b80379370013e0fd2b with 385 traces and 1070 bssids


  0%|          | 0/385 [00:00<?, ?it/s]

Processing #24: Site-5d27099f03f801723c32511d with 131 traces and 554 bssids


  0%|          | 0/131 [00:00<?, ?it/s]

Convert Train data with ordered BSSID/RSSI/Delay into BSSID-RANK-sequences (LSTMv2)

In [71]:
site_ids = ["5d27075f03f801723c2e360f", "5dc8cea7659e181adb076a3f", "5dbc1d84c1eb61796cf7c010", "5da138b74db8ce0c98bd4774",
            "5d2709c303f801723c3299ee", "5da958dd46f8266d0737457b", "5d2709bb03f801723c32852c", "5d27096c03f801723c31e5e0",
            "5a0546857ecc773753327266", "5c3c44b80379370013e0fd2b", "5d27097f03f801723c320d97", "5da1382d4db8ce0c98bbe92e",
            "5d2709b303f801723c327472", "5d2709d403f801723c32bd39", "5da138764db8ce0c98bcaa46", "5da1383b4db8ce0c98bc11ab",
            "5d2709e003f801723c32d896", "5da138754db8ce0c98bca82f", "5da1389e4db8ce0c98bd0547", "5da138314db8ce0c98bbf3a0",
            "5d2709a003f801723c3251bf", "5d27099f03f801723c32511d", "5da138364db8ce0c98bc00f1", "5da138274db8ce0c98bbd3d2"]
#site_ids = ["5da1389e4db8ce0c98bd0547"]

len_seq = 100  # max 100

num_s = 0
for site_id in tqdm(site_ids): # over sites
    num_s += 1
    print(f"Processing #{num_s}: Site-{site_id}")
    record = pickle.load(open(f"./data_out/wifi24/{site_id}_interBSSID_limit3s.pkl", "rb"))
    
    main_dfs = []
    for irec, rec in tqdm(record.iterrows()):
        seq = pd.DataFrame({"bssid": rec[bssid_cols].tolist()[:len_seq], 
                            "rssi": rec[rssi_cols].tolist()[:len_seq], 
                            "f": rec["f"], 
                            "x": rec["x"], 
                            "y": rec["y"], 
                            "m": rec["m"], 
                            "r": rec["r"], 
                            "time": rec["time"],  
                            "trace": rec["trace"]})

        main_dfs.append(seq)
        #break

    main_dfs = pd.concat(main_dfs).reset_index(drop=True) if len(main_dfs)>0 else None

    # saving model for a given site_id
    with open(f"./data_out/wifi24/seq/{site_id}_interBSSID_limit3s_s{len_seq}.pkl", "wb") as f:
        pickle.dump(main_dfs, f)
    #break
    

  0%|          | 0/24 [00:00<?, ?it/s]

Processing #1: Site-5d27075f03f801723c2e360f


0it [00:00, ?it/s]

Processing #2: Site-5dc8cea7659e181adb076a3f


0it [00:00, ?it/s]

Processing #3: Site-5dbc1d84c1eb61796cf7c010


0it [00:00, ?it/s]

Processing #4: Site-5da138b74db8ce0c98bd4774


0it [00:00, ?it/s]

Processing #5: Site-5d2709c303f801723c3299ee


0it [00:00, ?it/s]

Processing #6: Site-5da958dd46f8266d0737457b


0it [00:00, ?it/s]

Processing #7: Site-5d2709bb03f801723c32852c


0it [00:00, ?it/s]

Processing #8: Site-5d27096c03f801723c31e5e0


0it [00:00, ?it/s]

Processing #9: Site-5a0546857ecc773753327266


0it [00:00, ?it/s]

Processing #10: Site-5c3c44b80379370013e0fd2b


0it [00:00, ?it/s]

Processing #11: Site-5d27097f03f801723c320d97


0it [00:00, ?it/s]

Processing #12: Site-5da1382d4db8ce0c98bbe92e


0it [00:00, ?it/s]

Processing #13: Site-5d2709b303f801723c327472


0it [00:00, ?it/s]

Processing #14: Site-5d2709d403f801723c32bd39


0it [00:00, ?it/s]

Processing #15: Site-5da138764db8ce0c98bcaa46


0it [00:00, ?it/s]

Processing #16: Site-5da1383b4db8ce0c98bc11ab


0it [00:00, ?it/s]

Processing #17: Site-5d2709e003f801723c32d896


0it [00:00, ?it/s]

Processing #18: Site-5da138754db8ce0c98bca82f


0it [00:00, ?it/s]

Processing #19: Site-5da1389e4db8ce0c98bd0547


0it [00:00, ?it/s]

Processing #20: Site-5da138314db8ce0c98bbf3a0


0it [00:00, ?it/s]

Processing #21: Site-5d2709a003f801723c3251bf


0it [00:00, ?it/s]

Processing #22: Site-5d27099f03f801723c32511d


0it [00:00, ?it/s]

Processing #23: Site-5da138364db8ce0c98bc00f1


0it [00:00, ?it/s]

Processing #24: Site-5da138274db8ce0c98bbd3d2


0it [00:00, ?it/s]

***
Convert Test Data into RSSI/DELAY DataFrame for Inference Phase (DNN+LSTMv1)
***

In [3]:
train_bssid = pickle.load(open("./data_out/train_24IDs_standardF_bssid_ranks.pkl", "rb"))
test_data = pickle.load(open("./data_out/test_data.pkl", "rb"))

Wifi-based timesteps

In [12]:
parsed_test_data = {}
num_s = 0

for site_id in tqdm(bssid.keys()):  # over sites
    num_s += 1
    #print(f"Processing #{num_s}: Site-{site_id} with {len(test_data[site_id])} traces and {len(bssid[site_id])} bssids")
    
    parsed_test_data[site_id] = {}
    
    col_names = []
    for feature_bssid in bssid[site_id]:  # create features-columns list
        col_names.append(feature_bssid)
        col_names.append(feature_bssid+"_D")
    
    for trace_id in tqdm(test_data[site_id]):  # over traces
        
        wifi_record = test_data[site_id][trace_id].wifi.copy()
        
        accel_record = test_data[site_id][trace_id].acce
        rotate_record = test_data[site_id][trace_id].rotate
        magnetic_record = test_data[site_id][trace_id].magn  
        
        timestamps = wifi_record.time.unique().tolist()
        trace_record_wifi = np.zeros((len(timestamps), len(col_names)))

        for col_id, _ in enumerate(col_names):
            if col_id%2 == 0:
                trace_record_wifi[:, col_id] = int(train_bssid["min_rssi"][site_id])
            else:
                trace_record_wifi[:, col_id] = int(train_bssid["max_delay"][site_id])

        trace_record_wifi = pd.DataFrame(trace_record_wifi, columns=col_names)
        
        trace_record_wifi["time"] = timestamps
        trace_record_wifi["trace"] = trace_id
        timestamps = trace_record_wifi.time.to_numpy()

        # magnetic field intensity
        magnetic_intensity = np.sqrt(np.square(magnetic_record.x_axis.to_numpy()) + np.square(magnetic_record.y_axis.to_numpy()) + np.square(magnetic_record.z_axis.to_numpy()))
        trace_record_wifi["m"] = interp1d(magnetic_record.time.to_numpy(), magnetic_intensity, kind="linear", fill_value="extrapolate", copy=False, assume_sorted=True)(timestamps)
        # z_axis rotation angle (i.e. inplane direction)
        trace_record_wifi["r"] = interp1d(rotate_record.time.to_numpy(), rotate_record.z_axis.to_numpy(), kind="linear", fill_value="extrapolate", copy=False, assume_sorted=True)(timestamps)
        # relative x/y steps positions
        step_timestamps, step_indexs, step_acce_max_mins = compute_steps(accel_record[["time", "x_axis", "y_axis", "z_axis"]].to_numpy())
        rel_step_positions = pd.DataFrame(compute_rel_positions(compute_stride_length(step_acce_max_mins),
                                                                compute_step_heading(step_timestamps,
                                                                                     compute_headings(rotate_record[["time", "x_axis", "y_axis", "z_axis"]].to_numpy()))), 
                                          columns=["time", "x", "y"])
        # recalculate relative steps for a given timestamps(cumsum->interpolate->differences)
        _rt = np.insert(rel_step_positions.time.to_numpy(), 0, 0.0)
        rx_cum = interp1d(_rt, np.insert(rel_step_positions.x.to_numpy(), 0, 0.0).cumsum(), kind="linear", fill_value="extrapolate", copy=False, assume_sorted=True)(timestamps)
        ry_cum = interp1d(_rt, np.insert(rel_step_positions.y.to_numpy(), 0, 0.0).cumsum(), kind="linear", fill_value="extrapolate", copy=False, assume_sorted=True)(timestamps)
        trace_record_wifi["rx"] = np.diff(rx_cum, prepend=0.0) 
        trace_record_wifi["ry"] = np.diff(ry_cum, prepend=0.0)
        trace_record_wifi["rx_cum"] = rx_cum
        trace_record_wifi["ry_cum"] = ry_cum
        trace_record_wifi.loc[0, ["rx", "ry", "rx_cum", "ry_cum"]] = 0.
                   
        # Take wifi data the nearest to each waypoint
        for j, t0 in enumerate(trace_record_wifi.time):  # over times in trace/for each waypoint

            wifi_min = wifi_record[wifi_record.time == t0]
            wifi_min_bssid_list = wifi_min.bssid.tolist()
            wifi_min_delay_list = [col+"_D" for col in wifi_min_bssid_list]
            
            trace_record_wifi.loc[j, wifi_min_bssid_list] = wifi_min.rssi.to_numpy()
            trace_record_wifi.loc[j, wifi_min_delay_list] = wifi_min.delay.to_numpy().astype(int)

        parsed_test_data[site_id][trace_id] = trace_record_wifi
   
        #break  # only fist trace

    #break  # only first site

  0%|          | 0/24 [00:00<?, ?it/s]

Processing #1: Site-5da1389e4db8ce0c98bd0547 with 13 traces and 1041 bssids
Processing #2: Site-5da138b74db8ce0c98bd4774 with 29 traces and 3546 bssids
Processing #3: Site-5da138764db8ce0c98bcaa46 with 36 traces and 1949 bssids
Processing #4: Site-5dbc1d84c1eb61796cf7c010 with 50 traces and 4544 bssids
Processing #5: Site-5da1383b4db8ce0c98bc11ab with 26 traces and 1554 bssids
Processing #6: Site-5d2709a003f801723c3251bf with 20 traces and 1269 bssids
Processing #7: Site-5a0546857ecc773753327266 with 29 traces and 3433 bssids
Processing #8: Site-5da138274db8ce0c98bbd3d2 with 8 traces and 498 bssids
Processing #9: Site-5d2709b303f801723c327472 with 31 traces and 1934 bssids
Processing #10: Site-5da958dd46f8266d0737457b with 51 traces and 3531 bssids
Processing #11: Site-5d2709bb03f801723c32852c with 34 traces and 2492 bssids
Processing #12: Site-5d2709d403f801723c32bd39 with 51 traces and 2308 bssids
Processing #13: Site-5dc8cea7659e181adb076a3f with 35 traces and 4923 bssids
Processing

In [13]:
with open(f"./data_out/full24/test-10k_mix-counts.pkl", "wb") as f:
    pickle.dump(parsed_test_data, f)

0.55sec-based timesteps (DNN+LSTMv1)

In [4]:
parsed_test_data = {}
num_s = 0
dt = 550 # in milisceonds

for site_id in tqdm(bssid.keys()):  # over sites
    num_s += 1
    #print(f"Processing #{num_s}: Site-{site_id} with {len(test_data[site_id])} traces and {len(bssid[site_id])} bssids")
    
    parsed_test_data[site_id] = {}
    
    col_names = []
    for feature_bssid in bssid[site_id]:  # create features-columns list
        col_names.append(feature_bssid)
        col_names.append(feature_bssid+"_D")
    
    for trace_id in tqdm(test_data[site_id]):  # over traces
        
        wifi_record = test_data[site_id][trace_id].wifi.copy()
        
        accel_record = test_data[site_id][trace_id].acce
        rotate_record = test_data[site_id][trace_id].rotate
        magnetic_record = test_data[site_id][trace_id].magn  
        
        timestamps = wifi_record.time.unique().tolist()
        timestamps = np.linspace(timestamps[0], timestamps[-1], int(1+(timestamps[-1]-timestamps[0])/dt)).tolist()
        
        trace_record_wifi = np.zeros((len(timestamps), len(col_names)))

        for col_id, _ in enumerate(col_names):
            if col_id%2 == 0:
                trace_record_wifi[:, col_id] = int(train_bssid["min_rssi"][site_id])
            else:
                trace_record_wifi[:, col_id] = int(train_bssid["max_delay"][site_id])

        trace_record_wifi = pd.DataFrame(trace_record_wifi, columns=col_names)
        
        trace_record_wifi["time"] = timestamps
        trace_record_wifi["trace"] = trace_id
        timestamps = trace_record_wifi.time.to_numpy()

        # magnetic field intensity
        magnetic_intensity = np.sqrt(np.square(magnetic_record.x_axis.to_numpy()) + np.square(magnetic_record.y_axis.to_numpy()) + np.square(magnetic_record.z_axis.to_numpy()))
        trace_record_wifi["m"] = interp1d(magnetic_record.time.to_numpy(), magnetic_intensity, kind="linear", fill_value="extrapolate", copy=False, assume_sorted=True)(timestamps)
        # z_axis rotation angle (i.e. inplane direction)
        trace_record_wifi["r"] = interp1d(rotate_record.time.to_numpy(), rotate_record.z_axis.to_numpy(), kind="linear", fill_value="extrapolate", copy=False, assume_sorted=True)(timestamps)
        # relative x/y steps positions
        step_timestamps, step_indexs, step_acce_max_mins = compute_steps(accel_record[["time", "x_axis", "y_axis", "z_axis"]].to_numpy())
        rel_step_positions = pd.DataFrame(compute_rel_positions(compute_stride_length(step_acce_max_mins),
                                                                compute_step_heading(step_timestamps,
                                                                                     compute_headings(rotate_record[["time", "x_axis", "y_axis", "z_axis"]].to_numpy()))), 
                                          columns=["time", "x", "y"])
        # recalculate relative steps for a given timestamps(cumsum->interpolate->differences)
        
        _rt = np.insert(rel_step_positions.time.to_numpy(), 0, 0.0)
        rx_cum = interp1d(_rt, np.insert(rel_step_positions.x.to_numpy(), 0, 0.0).cumsum(), kind="linear", fill_value="extrapolate", copy=False, assume_sorted=True)(timestamps)
        ry_cum = interp1d(_rt, np.insert(rel_step_positions.y.to_numpy(), 0, 0.0).cumsum(), kind="linear", fill_value="extrapolate", copy=False, assume_sorted=True)(timestamps)
        trace_record_wifi["rx"] = np.diff(rx_cum, prepend=0.0) 
        trace_record_wifi["ry"] = np.diff(ry_cum, prepend=0.0)
        trace_record_wifi["rx_cum"] = rx_cum
        trace_record_wifi["ry_cum"] = ry_cum
        trace_record_wifi.loc[0, ["rx", "ry", "rx_cum", "ry_cum"]] = 0.
                   
        # Take wifi data the nearest to each waypoint
        for j, t0 in enumerate(trace_record_wifi.time):  # over times in trace/for each waypoint
            
            deltas = (wifi_record.time - t0).abs()
            tmin = deltas.min()
            
            wifi_min = wifi_record[deltas == tmin]
            wifi_min_bssid_list = wifi_min.bssid.tolist()
            wifi_min_delay_list = [col+"_D" for col in wifi_min_bssid_list]
            
            trace_record_wifi.loc[j, wifi_min_bssid_list] = wifi_min.rssi.to_numpy()
            
# >>>> delays should be adjusted for the time difference  (closest delay may become larger than time_delay_limit for models)
            trace_record_wifi.loc[j, wifi_min_delay_list] = wifi_min.delay.to_numpy().astype(int)
## >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

        parsed_test_data[site_id][trace_id] = trace_record_wifi
   
        #break  # only fist trace

    #break  # only first site

  0%|          | 0/24 [00:00<?, ?it/s]

  0%|          | 0/13 [00:00<?, ?it/s]

  0%|          | 0/29 [00:00<?, ?it/s]

  0%|          | 0/36 [00:00<?, ?it/s]

  0%|          | 0/50 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/29 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

  0%|          | 0/34 [00:00<?, ?it/s]

  0%|          | 0/51 [00:00<?, ?it/s]

  0%|          | 0/35 [00:00<?, ?it/s]

  0%|          | 0/60 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

  0%|          | 0/17 [00:00<?, ?it/s]

  0%|          | 0/11 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/23 [00:00<?, ?it/s]

  0%|          | 0/31 [00:00<?, ?it/s]

  0%|          | 0/17 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/3 [00:00<?, ?it/s]

  0%|          | 0/5 [00:00<?, ?it/s]

In [5]:
with open(f"./data_out/full24/test-10k_mix-counts_t550.pkl", "wb") as f:
    pickle.dump(parsed_test_data, f)