In [1]:
import numpy as np
import pandas as pd

import pickle
import os
from pathlib import Path
import glob
import json
from tqdm.notebook import tqdm

import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import plotly
import plotly.graph_objs as go
from PIL import Image
#%matplotlib inline
plotly.offline.init_notebook_mode(connected=True)

sns.set_style('darkgrid')

from xyz10.io_f_mod import read_data_file
from xyz10.visualize_f_mod import visualize_trajectory

Create list of unique test siteIDs and parse Test Data into dict of dict

In [26]:
test_data = {}
test_data_paths = glob.glob('./data_in/test/*')

for i, file_name in enumerate(test_data_paths):
    test_data_paths[i] = file_name.replace("\\","/")
test_data_paths

for file_name in tqdm(test_data_paths):
    _split = file_name.split("/")
    _trace = _split[-1].replace(".txt", "")

    try:
        _data = read_data_file(file_name, False)
        if _data is None: 
            print("Empty data", file_name)

        else:
            _siteID = _data.header.SiteID.values[0]

            if _siteID in test_data.keys():
                test_data[_siteID][_trace] = _data
            else:
                test_data[_siteID] = {}
                test_data[_siteID][_trace] = _data
    except:
        print("exception", file_name)
    #break

100%|████████████████████████████████████████████████████████████████████████████████| 626/626 [02:30<00:00,  4.16it/s]


In [28]:
siteIDs = test_data.keys()
siteIDs_df = pd.DataFrame(siteIDs)
siteIDs_df.to_csv("./data_out/unique_test_siteIDs.csv")

In [30]:
with open("./data_out/test_data.pkl", "wb") as f:
    pickle.dump(test_data, f)

Create Dict of SIteIds with list of unique BSSID (Wifi) in test data

In [None]:
test_data = pickle.load(open("./data_out/test_data.pkl", "rb"))

In [68]:
test_bssid = {}

for site in test_data.keys():  # loop over siteIds
    
    for i, trace in enumerate(test_data[site].keys()): # loop over traces on site
        bssid = list(pd.Series(test_data[site][trace].wifi.loc[:, "bssid"]).unique())
        if i==0:
            test_bssid[site] = bssid
        else:
            test_bssid[site] += bssid
    test_bssid[site] = list(pd.Series(test_bssid[site]).unique())
    #break
    
with open("./data_out/unique_test_bssid.pkl", "wb") as f:
    pickle.dump(test_bssid, f)
test_bssid = pickle.load(open("./data_out/unique_test_bssid.pkl", "rb"))
len(test_bssid)

24

Process Train Data

For 24/All test sites with Standard/All floor numbers

In [2]:
siteIDs_df = pd.read_csv("./data_out/unique_test_siteIDs.csv", index_col=0)
train_data_paths = glob.glob('./data_in/train/*/*/*')

for i, file_name in enumerate(train_data_paths):
    train_data_paths[i] = file_name.replace("\\","/")

test_siteIDs = pd.read_csv("./data_out/unique_test_siteIDs.csv", index_col=0)
test_siteIDs = test_siteIDs["0"].tolist()

motion_data = {}
rotate_data ={}
gyro_data = {}
magnetic_data = {}
wifi_data = {}
ibeacon_data = {}
waypoints_data = {}

for site in test_siteIDs:  # todo add option for all sites
    motion_data[site] = {}
    rotate_data[site] = {}
    gyro_data[site] = {}
    magnetic_data[site] = {}
    wifi_data[site] = {}
    ibeacon_data[site] = {}
    waypoints_data[site] = {}
    
floor_convert = {'1F': 0, '2F': 1, '3F': 2, '4F': 3, '5F': 4,
                 '6F': 5, '7F': 6, '8F': 7, '9F': 8, '10F': 9,
                 'B': -1, 'B1': -1, 'B2': -2, 'B3': -3, 'B4': -4,
                 'F1': 0, 'F2': 1, 'F3': 2, 'F4': 3, 'F5': 4,
                 'F6': 5, 'F7': 6, 'F8': 7, 'F9': 8, 'F10': 9,
                 }

floor_convert_full = {'1F': 0, '2F': 1, '3F': 2, '4F': 3, '5F': 4,
                      '6F': 5, '7F': 6, '8F': 7, '9F': 8, '10F': 9,
                      'B': -1, 'B1': -1, 'B2': -2, 'B3': -3, 'B4': -4,
                      'BF': -1, 'BM': -1,
                      'F1': 0, 'F2': 1, 'F3': 2, 'F4': 3, 'F5': 4,
                      'F6': 5, 'F7': 6, 'F8': 7, 'F9': 8, 'F10': 9,
                      'L1': 0, 'L2': 1, 'L3': 2, 'L4': 3, 'L5': 4,
                      'L6': 5, 'L7': 6, 'L8': 7, 'L9': 8, 'L10': 9,
                      'L11': 10,
                      'G': 0, 'LG1': 0, 'LG2': 1, 'LM': 0, 'M': 0,
                      'P1': 0, 'P2': 1, }

#floor_convert = floor_convert_full  # use in case all IDs should be used

In [3]:
trace_num = 0
for file_name in tqdm(train_data_paths):  # 21 min = 10877 files output
    _split = file_name.split("/")
    _siteID = _split[-3]
    _floor = _split[-2]
    _trace = _split[-1].replace(".txt", "")

    if _floor in floor_convert.keys() and _siteID in motion_data.keys():
        _data = read_data_file(file_name, True)
        if _data is None:
            print(file_name)
        else:
            trace_num += 1
            if len(_data.wifi) == 0: print(file_name)
            motion_data[_siteID][_trace] = [_data.header, _data.acce]
            rotate_data[_siteID][_trace] = [_data.header, _data.rotate]
            gyro_data[_siteID][_trace] = [_data.header, _data.gyro]
            magnetic_data[_siteID][_trace] = [_data.header, _data.magn]
            wifi_data[_siteID][_trace] = [_data.header, _data.wifi]
            ibeacon_data[_siteID][_trace] = [_data.header, _data.ibeacon]
            waypoints_data[_siteID][_trace] = [_data.header, _data.waypoint]
print("Number of records:", trace_num)

  2%|█▏                                                                            | 419/26925 [00:42<19:59, 22.09it/s]

./data_in/train/5a0546857ecc773753327266/F3/5d8f0954b6e29d0006fb8c0d.txt
./data_in/train/5a0546857ecc773753327266/F3/5d8f0955b6e29d0006fb8c0f.txt


 62%|███████████████████████████████████████████████▏                            | 16733/26925 [03:00<04:01, 42.19it/s]

./data_in/train/5d27075f03f801723c2e360f/F2/5de0a58bbbb32e0006603c93.txt
./data_in/train/5d27075f03f801723c2e360f/F2/5de0cd4fbbb32e0006603cdb.txt


 76%|██████████████████████████████████████████████████████████                  | 20554/26925 [09:11<06:29, 16.34it/s]

./data_in/train/5d2709d403f801723c32bd39/2F/5dc77d571cda370006030ef7.txt


 77%|██████████████████████████████████████████████████████████▏                 | 20601/26925 [09:15<11:42,  9.00it/s]

./data_in/train/5d2709d403f801723c32bd39/2F/5dcd15b723759900063d5524.txt


 88%|██████████████████████████████████████████████████████████████████▉         | 23694/26925 [14:26<03:03, 17.64it/s]

./data_in/train/5da1389e4db8ce0c98bd0547/B2/5dc54bd81cda37000602fca4.txt
./data_in/train/5da1389e4db8ce0c98bd0547/B2/5dc54bd921dceb0006114b4b.txt


 88%|██████████████████████████████████████████████████████████████████▉         | 23705/26925 [14:26<02:42, 19.80it/s]

./data_in/train/5da1389e4db8ce0c98bd0547/B2/5dc6530e1cda37000603043b.txt
./data_in/train/5da1389e4db8ce0c98bd0547/B2/5dc653101cda37000603043f.txt


 88%|███████████████████████████████████████████████████████████████████▏        | 23809/26925 [14:34<02:50, 18.24it/s]

./data_in/train/5da1389e4db8ce0c98bd0547/F3/5dc63f2817ffdd0006f111d7.txt


 88%|███████████████████████████████████████████████████████████████████▏        | 23825/26925 [14:35<03:25, 15.12it/s]

./data_in/train/5da1389e4db8ce0c98bd0547/F3/5dc6479d1cda370006030330.txt


 89%|███████████████████████████████████████████████████████████████████▎        | 23847/26925 [14:37<02:48, 18.22it/s]

./data_in/train/5da1389e4db8ce0c98bd0547/F3/5dc647af17ffdd0006f112bb.txt


 89%|███████████████████████████████████████████████████████████████████▎        | 23866/26925 [14:37<02:10, 23.35it/s]

./data_in/train/5da1389e4db8ce0c98bd0547/F3/5dc682f41cda3700060308e2.txt
./data_in/train/5da1389e4db8ce0c98bd0547/F3/5dc682f91cda3700060308e8.txt


 89%|███████████████████████████████████████████████████████████████████▉        | 24067/26925 [14:48<01:38, 28.93it/s]

./data_in/train/5da1389e4db8ce0c98bd0547/F4/5dc698691cda370006030a77.txt
./data_in/train/5da1389e4db8ce0c98bd0547/F4/5dc6986f1cda370006030a7b.txt
./data_in/train/5da1389e4db8ce0c98bd0547/F4/5dc6987017ffdd0006f11974.txt


 95%|████████████████████████████████████████████████████████████████████████    | 25544/26925 [18:05<02:08, 10.72it/s]

./data_in/train/5dbc1d84c1eb61796cf7c010/F2/5dd37f0444333f00067aa24d.txt


 95%|████████████████████████████████████████████████████████████████████████▏   | 25595/26925 [18:10<02:17,  9.64it/s]

./data_in/train/5dbc1d84c1eb61796cf7c010/F3/5dd3902827889b0006b76ae2.txt


 98%|██████████████████████████████████████████████████████████████████████████▎ | 26345/26925 [19:43<01:02,  9.25it/s]

./data_in/train/5dc8cea7659e181adb076a3f/F1/5dcf7852878f3300066c6d5e.txt


 98%|██████████████████████████████████████████████████████████████████████████▌ | 26420/26925 [19:50<00:43, 11.71it/s]

./data_in/train/5dc8cea7659e181adb076a3f/F2/5dcfafc494e4900006125908.txt


 98%|██████████████████████████████████████████████████████████████████████████▋ | 26463/26925 [19:54<00:45, 10.09it/s]

./data_in/train/5dc8cea7659e181adb076a3f/F2/5dcfb01594e490000612593c.txt


 98%|██████████████████████████████████████████████████████████████████████████▋ | 26466/26925 [19:55<01:12,  6.34it/s]

./data_in/train/5dc8cea7659e181adb076a3f/F2/5dcfb02594e4900006125943.txt


 99%|███████████████████████████████████████████████████████████████████████████ | 26583/26925 [20:09<00:48,  7.12it/s]

./data_in/train/5dc8cea7659e181adb076a3f/F3/5dccee18c04f060006e6e2e6.txt


100%|████████████████████████████████████████████████████████████████████████████| 26925/26925 [20:50<00:00, 21.53it/s]

Number of records: 10877





Saving each data type per file

In [40]:
save_all_sites = False

if save_all_sites:
    with open("./data_out/train_AllIDs_AllF_motion.pkl", "wb") as f:
        pickle.dump(motion_data, f)
    with open("./data_out/train_AllIDs_AllF_rotate.pkl", "wb") as f:
        pickle.dump(rotate_data, f)
    with open("./data_out/train_AllIDs_AllF_gyro.pkl", "wb") as f:
        pickle.dump(gyro_data, f)
    with open("./data_out/train_AllIDs_AllF_magnetic.pkl", "wb") as f:
        pickle.dump(magnetic_data, f)
    with open("./data_out/train_AllIDs_AllF_wifi.pkl", "wb") as f:
        pickle.dump(wifi_data, f)
    with open("./data_out/train_AllIDs_AllF_ibeacon.pkl", "wb") as f:
        pickle.dump(ibeacon_data, f)
    with open("./data_out/train_AllIDs_AllF_waypoints.pkl", "wb") as f:
        pickle.dump(waypoints_data, f)
else:
    with open("./data_out/train_24IDs_standardF_motion.pkl", "wb") as f:
        pickle.dump(motion_data, f)
    with open("./data_out/train_24IDs_standardF_rotate.pkl", "wb") as f:
        pickle.dump(rotate_data, f)
    with open("./data_out/train_24IDs_standardF_gyro.pkl", "wb") as f:
        pickle.dump(gyro_data, f)
    with open("./data_out/train_24IDs_standardF_magnetic.pkl", "wb") as f:
        pickle.dump(magnetic_data, f)
    with open("./data_out/train_24IDs_standardF_wifi.pkl", "wb") as f:
        pickle.dump(wifi_data, f)
    with open("./data_out/train_24IDs_standardF_ibeacon.pkl", "wb") as f:
        pickle.dump(ibeacon_data, f)
    with open("./data_out/train_24IDs_standardF_waypoints.pkl", "wb") as f:
        pickle.dump(waypoints_data, f)

INTERPOLATE WAYPOINTS into STEPS

In [30]:
motion_data = pickle.load(open("./data_out/train_24IDs_standardF_motion.pkl", "rb"))
rotate_data = pickle.load(open("./data_out/train_24IDs_standardF_rotate.pkl", "rb"))
waypoints_data = pickle.load(open("./data_out/train_24IDs_standardF_waypoints.pkl", "rb"))

In [38]:
from xyz10.compute_f_mod import compute_step_positions

steps_data = {}

for site_id in motion_data.keys():  # over sites
    print(f"Processing Site-{site_id} with {len(waypoints_data[site_id])} traces")
    steps_data[site_id] = {}
    
    for trace_id in tqdm(waypoints_data[site_id]):  # over traces
        
        header = waypoints_data[site_id][trace_id][0]
        
        waypoint = waypoints_data[site_id][trace_id][1]
        motion = motion_data[site_id][trace_id][1]
        rotate = rotate_data[site_id][trace_id][1]
               
        step_positions = pd.DataFrame(compute_step_positions(motion[["time", "x_axis", "y_axis", "z_axis"]].to_numpy(), 
                                                             rotate[["time", "x_axis", "y_axis", "z_axis"]].to_numpy(), 
                                                             waypoint[["time", "x", "y"]].to_numpy()), columns=["time", "x", "y"])
        
        steps_data[site_id][trace_id] = [header, step_positions]
        
    #break  # only first siteID

  0%|                                                                                          | 0/514 [00:00<?, ?it/s]

Processing Site-5da1389e4db8ce0c98bd0547 with 514 traces


100%|████████████████████████████████████████████████████████████████████████████████| 514/514 [01:23<00:00,  6.18it/s]
  0%|                                                                                          | 0/748 [00:00<?, ?it/s]

Processing Site-5da138b74db8ce0c98bd4774 with 748 traces


100%|████████████████████████████████████████████████████████████████████████████████| 748/748 [03:47<00:00,  3.29it/s]
  0%|                                                                                          | 0/513 [00:00<?, ?it/s]

Processing Site-5da138764db8ce0c98bcaa46 with 513 traces


100%|████████████████████████████████████████████████████████████████████████████████| 513/513 [01:58<00:00,  4.34it/s]
  0%|                                                                                  | 1/793 [00:00<02:49,  4.66it/s]

Processing Site-5dbc1d84c1eb61796cf7c010 with 793 traces


100%|████████████████████████████████████████████████████████████████████████████████| 793/793 [03:56<00:00,  3.35it/s]
  0%|                                                                                          | 0/239 [00:00<?, ?it/s]

Processing Site-5da1383b4db8ce0c98bc11ab with 239 traces


100%|████████████████████████████████████████████████████████████████████████████████| 239/239 [02:43<00:00,  1.46it/s]
  0%|▎                                                                                 | 1/310 [00:00<00:34,  8.87it/s]

Processing Site-5d2709a003f801723c3251bf with 310 traces


100%|████████████████████████████████████████████████████████████████████████████████| 310/310 [00:50<00:00,  6.12it/s]
  0%|                                                                                          | 0/514 [00:00<?, ?it/s]

Processing Site-5a0546857ecc773753327266 with 514 traces


100%|████████████████████████████████████████████████████████████████████████████████| 514/514 [02:08<00:00,  3.99it/s]
  0%|                                                                                          | 0/213 [00:00<?, ?it/s]

Processing Site-5da138274db8ce0c98bbd3d2 with 213 traces


100%|████████████████████████████████████████████████████████████████████████████████| 213/213 [00:33<00:00,  6.42it/s]
  0%|                                                                                          | 0/639 [00:00<?, ?it/s]

Processing Site-5d2709b303f801723c327472 with 639 traces


100%|████████████████████████████████████████████████████████████████████████████████| 639/639 [03:17<00:00,  3.23it/s]
  0%|                                                                                          | 0/552 [00:00<?, ?it/s]

Processing Site-5da958dd46f8266d0737457b with 552 traces


100%|████████████████████████████████████████████████████████████████████████████████| 552/552 [03:23<00:00,  2.71it/s]
  0%|                                                                                          | 0/264 [00:00<?, ?it/s]

Processing Site-5d2709bb03f801723c32852c with 264 traces


100%|████████████████████████████████████████████████████████████████████████████████| 264/264 [03:37<00:00,  1.21it/s]
  0%|                                                                                          | 0/361 [00:00<?, ?it/s]

Processing Site-5d2709d403f801723c32bd39 with 361 traces


100%|████████████████████████████████████████████████████████████████████████████████| 361/361 [02:11<00:00,  2.75it/s]
  0%|                                                                                          | 0/745 [00:00<?, ?it/s]

Processing Site-5dc8cea7659e181adb076a3f with 745 traces


100%|████████████████████████████████████████████████████████████████████████████████| 745/745 [03:43<00:00,  3.33it/s]
  0%|                                                                                          | 0/351 [00:00<?, ?it/s]

Processing Site-5d27096c03f801723c31e5e0 with 351 traces


100%|████████████████████████████████████████████████████████████████████████████████| 351/351 [01:54<00:00,  3.05it/s]
  0%|                                                                                          | 0/664 [00:00<?, ?it/s]

Processing Site-5d2709c303f801723c3299ee with 664 traces


100%|████████████████████████████████████████████████████████████████████████████████| 664/664 [02:16<00:00,  4.87it/s]
  0%|                                                                                          | 0/428 [00:00<?, ?it/s]

Processing Site-5da138314db8ce0c98bbf3a0 with 428 traces


100%|████████████████████████████████████████████████████████████████████████████████| 428/428 [01:56<00:00,  3.68it/s]
  0%|                                                                                          | 0/338 [00:00<?, ?it/s]

Processing Site-5da1382d4db8ce0c98bbe92e with 338 traces


100%|████████████████████████████████████████████████████████████████████████████████| 338/338 [02:14<00:00,  2.51it/s]
  0%|                                                                                         | 0/1141 [00:00<?, ?it/s]

Processing Site-5d27075f03f801723c2e360f with 1141 traces


100%|██████████████████████████████████████████████████████████████████████████████| 1141/1141 [06:11<00:00,  3.07it/s]
  0%|                                                                                          | 0/205 [00:00<?, ?it/s]

Processing Site-5da138754db8ce0c98bca82f with 205 traces


100%|████████████████████████████████████████████████████████████████████████████████| 205/205 [01:27<00:00,  2.33it/s]
  0%|                                                                                          | 0/343 [00:00<?, ?it/s]

Processing Site-5d2709e003f801723c32d896 with 343 traces


100%|████████████████████████████████████████████████████████████████████████████████| 343/343 [02:22<00:00,  2.41it/s]
  0%|                                                                                          | 0/404 [00:00<?, ?it/s]

Processing Site-5d27097f03f801723c320d97 with 404 traces


100%|████████████████████████████████████████████████████████████████████████████████| 404/404 [02:19<00:00,  2.89it/s]
  0%|                                                                                           | 0/82 [00:00<?, ?it/s]

Processing Site-5da138364db8ce0c98bc00f1 with 82 traces


100%|██████████████████████████████████████████████████████████████████████████████████| 82/82 [00:34<00:00,  2.35it/s]
  0%|                                                                                          | 0/385 [00:00<?, ?it/s]

Processing Site-5c3c44b80379370013e0fd2b with 385 traces


100%|████████████████████████████████████████████████████████████████████████████████| 385/385 [02:12<00:00,  2.90it/s]
  0%|                                                                                          | 0/131 [00:00<?, ?it/s]

Processing Site-5d27099f03f801723c32511d with 131 traces


100%|████████████████████████████████████████████████████████████████████████████████| 131/131 [00:56<00:00,  2.30it/s]


In [39]:
with open("./data_out/train_24IDs_standardF_steps.pkl", "wb") as f:
    pickle.dump(steps_data, f)

PRE-PROCESSING
- Create list of the the most representative (original/noninterpolated) BSSID with minimum value of RSSI(-1 to be still less) and maximum value of DELAY(+1 to be still more) per site (for dummy values of absent bssid)   

TEST DATA

   > 1) max counts/occurence per site
   
   > 2) max RSSI per site
   
   > 3) max abs(mean(1/RSSI))*counts per site

In [497]:
test_data = pickle.load(open("./data_out/test_data.pkl", "rb"))
test_bssid = pickle.load(open("./data_out/unique_test_bssid.pkl", "rb"))

In [498]:
test_bssid_min_rssi = {}
test_bssid_max_delay = {}

test_bssid_max_rssi_rank = {}
test_bssid_max_count_rank = {}
test_bssid_max_weight_rank = {}

for site in tqdm(test_data.keys()):
    
    bssids = test_bssid[site]
    site_data = test_data[site]
    site_wifi = []
    
    for trace in site_data.keys():
        wifi = site_data[trace].wifi
        wifi["rssi_inv"] = 1/wifi.rssi
        site_wifi.append(wifi)

    site_wifi = pd.concat(site_wifi, ignore_index=True)
    
    test_bssid_min_rssi[site] = site_wifi.rssi.min() - 1
    test_bssid_max_delay[site] = site_wifi.delay.max() + 1
    
    # variosu ranking section
    gr = site_wifi.groupby(["bssid"])
    
    max_rssi = gr.rssi.max().reset_index().sort_values("rssi", ascending=False).reset_index().drop("index", axis = 1)
    max_count = gr.time.count().reset_index().rename(columns={"time": "count"}).sort_values("count", ascending=False).reset_index().drop("index", axis = 1)
        
    max_weight = gr.rssi_inv.mean().reset_index()
    max_weight["weight"] = abs(max_weight["rssi_inv"]*gr.rssi_inv.count().reset_index()["rssi_inv"])
    max_weight = max_weight.sort_values("weight", ascending=False).reset_index().drop("index", axis = 1).drop("rssi_inv", axis = 1)
    
    test_bssid_max_rssi_rank[site] = max_rssi
    test_bssid_max_count_rank[site] = max_count
    test_bssid_max_weight_rank[site] =  max_weight

    #break

100%|██████████████████████████████████████████████████████████████████████████████████| 24/24 [00:04<00:00,  5.91it/s]


In [499]:
test_bssid_ranks = {"count":test_bssid_max_count_rank, "rssi":test_bssid_max_rssi_rank, "weight": test_bssid_max_weight_rank, "min_rssi": test_bssid_min_rssi, "max_delay": test_bssid_max_delay}
with open("./data_out/test_bssid_ranks.pkl", "wb") as f:
    pickle.dump(test_bssid_ranks, f)

TRAIN DATA

   > 1) max counts/occurence per site
   
   > 2) max RSSI per site
   
   > 3) max abs(mean(1/RSSI))*counts per site

In [500]:
wifi_data_train = pickle.load(open("./data_out/train_24IDs_standardF_wifi.pkl", "rb"))

In [502]:
train_bssid_min_rssi = {}
train_bssid_max_delay = {}

train_bssid_max_rssi_rank = {}
train_bssid_max_count_rank = {}
train_bssid_max_weight_rank = {}

for site in tqdm(wifi_data_train.keys()):
    
    #bssids = train_bssid[site]
    site_data = wifi_data_train[site]
    site_wifi = []
    
    for trace in site_data.keys():
        wifi = site_data[trace][1]
        wifi["rssi_inv"] = 1/wifi.rssi
        site_wifi.append(wifi)

    site_wifi = pd.concat(site_wifi, ignore_index=True)
    
    train_bssid_min_rssi[site] = site_wifi.rssi.min() - 1
    train_bssid_max_delay[site] = site_wifi.delay.max() + 1
    
    # variosu ranking section
    gr = site_wifi.groupby(["bssid"])
    
    max_rssi = gr.rssi.max().reset_index().sort_values("rssi", ascending=False).reset_index().drop("index", axis = 1)
    max_count = gr.time.count().reset_index().rename(columns={"time": "count"}).sort_values("count", ascending=False).reset_index().drop("index", axis = 1)
        
    max_weight = gr.rssi_inv.mean().reset_index()
    max_weight["weight"] = abs(max_weight["rssi_inv"]*gr.rssi_inv.count().reset_index()["rssi_inv"])
    max_weight = max_weight.sort_values("weight", ascending=False).reset_index().drop("index", axis = 1).drop("rssi_inv", axis = 1)
    
    train_bssid_max_rssi_rank[site] = max_rssi
    train_bssid_max_count_rank[site] = max_count
    train_bssid_max_weight_rank[site] =  max_weight
    #break

100%|██████████████████████████████████████████████████████████████████████████████████| 24/24 [01:40<00:00,  4.17s/it]


In [503]:
train_bssid_ranks = {"count":train_bssid_max_count_rank, "rssi":train_bssid_max_rssi_rank, "weight": train_bssid_max_weight_rank, "min_rssi": train_bssid_min_rssi, "max_delay": train_bssid_max_delay}
with open("./data_out/train_24IDs_standardF_bssid_ranks.pkl", "wb") as f:
    pickle.dump(train_bssid_ranks, f)

PROCESSING

In [9]:
test_bssid = pickle.load(open("./data_out/test_bssid_ranks.pkl", "rb"))
train_bssid = pickle.load(open("./data_out/train_24IDs_standardF_bssid_ranks.pkl", "rb"))

fraction_bssid = 10000
bssid_modes = ["count", "rssi", "weight"]
group_modes = ["train", "test", "mix"]
# TODO change here for different modes 
bssid_mode = bssid_modes[0]
group_mode = group_modes[2]

bssid = {}
for site in test_bssid[bssid_mode]:
    if fraction_bssid < 1:
        _test_bssid = test_bssid[bssid_mode][site].bssid[:int(fraction_bssid*len(test_bssid[bssid_mode][site]))]
        _train_bssid = train_bssid[bssid_mode][site].bssid[:int(fraction_bssid*len(train_bssid[bssid_mode][site]))]
    else:
        _test_bssid = test_bssid[bssid_mode][site].bssid[:int(fraction_bssid)]
        _train_bssid = train_bssid[bssid_mode][site].bssid[:int(fraction_bssid)]
    _mix_bssid = pd.concat([_test_bssid,  _train_bssid]).unique()
    
    # TODO change here for different lists 
    _list = _mix_bssid.tolist()
    print(f"Site-{site}: test-{len(_test_bssid)}, train-{len(_train_bssid)}, mix-{len(_mix_bssid)}: => train overlap-{100*len(_train_bssid)/len(_mix_bssid)}")
    
    bssid[site] = _list
    #print(len(_list))
    #break

Site-5da1389e4db8ce0c98bd0547: test-444, train-1021, mix-1041: => train overlap-98.07877041306436
Site-5da138b74db8ce0c98bd4774: test-2369, train-3535, mix-3546: => train overlap-99.68979131415679
Site-5da138764db8ce0c98bcaa46: test-1279, train-1888, mix-1949: => train overlap-96.87018984094408
Site-5dbc1d84c1eb61796cf7c010: test-3619, train-4519, mix-4544: => train overlap-99.44982394366197
Site-5da1383b4db8ce0c98bc11ab: test-1116, train-1525, mix-1554: => train overlap-98.13384813384813
Site-5d2709a003f801723c3251bf: test-885, train-1252, mix-1269: => train overlap-98.66036249014972
Site-5a0546857ecc773753327266: test-2435, train-3397, mix-3433: => train overlap-98.95135450043693
Site-5da138274db8ce0c98bbd3d2: test-298, train-490, mix-498: => train overlap-98.39357429718875
Site-5d2709b303f801723c327472: test-1167, train-1913, mix-1934: => train overlap-98.91416752843847
Site-5da958dd46f8266d0737457b: test-2895, train-3499, mix-3531: => train overlap-99.09374114981591
Site-5d2709bb03

> Compact DATA: Original WAYPOINTS (snap to waypoints): Convert/Append/Save each trace per site to DataFrame: Features=BSSID/RSSI + DELAYS + TRACEID;  Targets=XYF (BSSID not in trace gets min-RSSI and max-DELAY per site) 

> Full DATA: Interpolated STEPS (snap to steps; where steps 0.5s/wifi 0.5-1s dt = expected error is 0.5step=0.25m): Convert/Append/Save each trace per site to DataFrame: Features=BSSID/RSSI + DELAYS + TRACEID;  Targets=XYF (BSSID not in trace gets min-RSSI and max-DELAY per site) 

In [5]:
wifi_data_train = pickle.load(open("./data_out/train_24IDs_standardF_wifi.pkl", "rb"))

In [67]:
modes = ["compact", "full"]
mode = modes[1]

if mode == "compact":
    spatial_data = pickle.load(open("./data_out/train_24IDs_standardF_waypoints.pkl", "rb"))
else:
    spatial_data = pickle.load(open("./data_out/train_24IDs_standardF_steps.pkl", "rb"))

In [23]:
combined_data = {}
num_s = 0

for site_id in bssid.keys():  # over sites
    
    num_s += 1
    if True: #site_id != "5da138b74db8ce0c98bd4774":
        print(f"Processing #{num_s}: Site-{site_id} with {len(spatial_data[site_id])} traces and {len(bssid[site_id])} bssids")
        site_trace_record_wifi = []

        col_names = []
        for feature_bssid in bssid[site_id]:  # create features-columns list
            col_names.append(feature_bssid)
            col_names.append(feature_bssid+"_D")

        for trace_id in tqdm(spatial_data[site_id]):  # over traces
            
            wifi_record = wifi_data_train[site_id][trace_id][1].copy(deep=True)
            trace_record = spatial_data[site_id][trace_id].copy(deep=True)
            
            trace_record_wifi = np.zeros((trace_record[1].shape[0], len(col_names)))
            
            for col_id, _ in enumerate(col_names):
                if col_id%2 == 0:
                    trace_record_wifi[:, col_id] = int(train_bssid["min_rssi"][site_id])
                else:
                    trace_record_wifi[:, col_id] = int(train_bssid["max_delay"][site_id])
            
            trace_record_wifi = pd.DataFrame(trace_record_wifi, columns=col_names)

            trace_record_wifi[["time", "x", "y"]] = trace_record[1][["time", "x", "y"]].copy(deep=True)
            trace_record_wifi["f"] = trace_record[0].FloorName[0]
            trace_record_wifi["trace"] = trace_id

            # Take wifi data the nearest to each waypoint
            for j, t0 in enumerate(trace_record_wifi.time):  # over times in trace/for each waypoint

                wifi_record["time0"] = wifi_record.time - t0
                wifi_record["time0"] = wifi_record.time0.apply(abs)

                wifi_min = wifi_record[wifi_record.time0 == wifi_record.time0.min()]
                wifi_min_bssid_list = wifi_min.bssid.tolist()
                wifi_min_delay_list = [col+"_D" for col in wifi_min_bssid_list]

                trace_record_wifi.loc[j, wifi_min_bssid_list] = wifi_min.rssi.to_numpy()
                trace_record_wifi.loc[j, wifi_min_delay_list] = wifi_min.delay.to_numpy().astype(int)

                #break  # only first feature-bssid         

            site_trace_record_wifi.append(trace_record_wifi)

            #break  # only fist trace

        site_trace_record_wifi = pd.concat(site_trace_record_wifi, ignore_index=True)
        #combined_data[site_id] = site_trace_record_wifi
        
    # saving model for a given site_id
    fr = str(fraction_bssid).replace(".","")
    with open(f"./data_out/{mode}24/{site_id}_mode-{bssid_mode}_fraction-{fr}_{group_mode}-bssid.pkl", "wb") as f:
        pickle.dump(site_trace_record_wifi, f)
    #break  # only first site

  0%|                                                                                          | 0/514 [00:00<?, ?it/s]

Processing #1: Site-5da1389e4db8ce0c98bd0547 with 514 traces and 1041 bssids


100%|████████████████████████████████████████████████████████████████████████████████| 514/514 [05:05<00:00,  1.68it/s]
  0%|                                                                                          | 0/748 [00:00<?, ?it/s]

Processing #2: Site-5da138b74db8ce0c98bd4774 with 748 traces and 3546 bssids


100%|████████████████████████████████████████████████████████████████████████████████| 748/748 [50:36<00:00,  4.06s/it]
  0%|                                                                                          | 0/513 [00:00<?, ?it/s]

Processing #3: Site-5da138764db8ce0c98bcaa46 with 513 traces and 1949 bssids


100%|████████████████████████████████████████████████████████████████████████████████| 513/513 [10:42<00:00,  1.25s/it]
  0%|                                                                                          | 0/793 [00:00<?, ?it/s]

Processing #4: Site-5dbc1d84c1eb61796cf7c010 with 793 traces and 4544 bssids


100%|██████████████████████████████████████████████████████████████████████████████| 793/793 [1:09:48<00:00,  5.28s/it]
  0%|                                                                                          | 0/239 [00:00<?, ?it/s]

Processing #5: Site-5da1383b4db8ce0c98bc11ab with 239 traces and 1554 bssids


100%|████████████████████████████████████████████████████████████████████████████████| 239/239 [20:29<00:00,  5.15s/it]
  0%|                                                                                          | 0/310 [00:00<?, ?it/s]

Processing #6: Site-5d2709a003f801723c3251bf with 310 traces and 1269 bssids


100%|████████████████████████████████████████████████████████████████████████████████| 310/310 [04:16<00:00,  1.21it/s]
  0%|                                                                                          | 0/514 [00:00<?, ?it/s]

Processing #7: Site-5a0546857ecc773753327266 with 514 traces and 3433 bssids


100%|████████████████████████████████████████████████████████████████████████████████| 514/514 [33:41<00:00,  3.93s/it]
  0%|                                                                                          | 0/213 [00:00<?, ?it/s]

Processing #8: Site-5da138274db8ce0c98bbd3d2 with 213 traces and 498 bssids


100%|████████████████████████████████████████████████████████████████████████████████| 213/213 [01:20<00:00,  2.65it/s]
  0%|                                                                                          | 0/639 [00:00<?, ?it/s]

Processing #9: Site-5d2709b303f801723c327472 with 639 traces and 1934 bssids


100%|████████████████████████████████████████████████████████████████████████████████| 639/639 [19:22<00:00,  1.82s/it]
  0%|                                                                                          | 0/552 [00:00<?, ?it/s]

Processing #10: Site-5da958dd46f8266d0737457b with 552 traces and 3531 bssids


100%|████████████████████████████████████████████████████████████████████████████████| 552/552 [48:41<00:00,  5.29s/it]
  0%|                                                                                          | 0/264 [00:00<?, ?it/s]

Processing #11: Site-5d2709bb03f801723c32852c with 264 traces and 2492 bssids


100%|████████████████████████████████████████████████████████████████████████████████| 264/264 [29:16<00:00,  6.65s/it]
  0%|                                                                                          | 0/361 [00:00<?, ?it/s]

Processing #12: Site-5d2709d403f801723c32bd39 with 361 traces and 2308 bssids


100%|████████████████████████████████████████████████████████████████████████████████| 361/361 [09:28<00:00,  1.58s/it]
  0%|                                                                                          | 0/745 [00:00<?, ?it/s]

Processing #13: Site-5dc8cea7659e181adb076a3f with 745 traces and 4923 bssids


100%|████████████████████████████████████████████████████████████████████████████████| 745/745 [31:34<00:00,  2.54s/it]
  0%|                                                                                          | 0/351 [00:00<?, ?it/s]

Processing #14: Site-5d27096c03f801723c31e5e0 with 351 traces and 5469 bssids


100%|████████████████████████████████████████████████████████████████████████████████| 351/351 [09:35<00:00,  1.64s/it]
  0%|                                                                                          | 0/664 [00:00<?, ?it/s]

Processing #15: Site-5d2709c303f801723c3299ee with 664 traces and 5873 bssids


100%|████████████████████████████████████████████████████████████████████████████████| 664/664 [34:14<00:00,  3.09s/it]
  0%|                                                                                          | 0/428 [00:00<?, ?it/s]

Processing #16: Site-5da138314db8ce0c98bbf3a0 with 428 traces and 1223 bssids


100%|████████████████████████████████████████████████████████████████████████████████| 428/428 [09:52<00:00,  1.38s/it]
  0%|                                                                                          | 0/338 [00:00<?, ?it/s]

Processing #17: Site-5da1382d4db8ce0c98bbe92e with 338 traces and 2920 bssids


100%|████████████████████████████████████████████████████████████████████████████████| 338/338 [42:40<00:00,  7.57s/it]
  0%|                                                                                         | 0/1141 [00:00<?, ?it/s]

Processing #18: Site-5d27075f03f801723c2e360f with 1141 traces and 7033 bssids


100%|████████████████████████████████████████████████████████████████████████████| 1141/1141 [1:20:39<00:00,  4.24s/it]
  0%|                                                                                          | 0/205 [00:00<?, ?it/s]

Processing #19: Site-5da138754db8ce0c98bca82f with 205 traces and 1792 bssids


100%|████████████████████████████████████████████████████████████████████████████████| 205/205 [05:41<00:00,  1.67s/it]
  0%|▏                                                                                 | 1/343 [00:00<01:09,  4.91it/s]

Processing #20: Site-5d2709e003f801723c32d896 with 343 traces and 1355 bssids


100%|████████████████████████████████████████████████████████████████████████████████| 343/343 [10:53<00:00,  1.90s/it]
  0%|                                                                                          | 0/404 [00:00<?, ?it/s]

Processing #21: Site-5d27097f03f801723c320d97 with 404 traces and 2514 bssids


100%|████████████████████████████████████████████████████████████████████████████████| 404/404 [16:13<00:00,  2.41s/it]
  0%|                                                                                           | 0/82 [00:00<?, ?it/s]

Processing #22: Site-5da138364db8ce0c98bc00f1 with 82 traces and 844 bssids


100%|██████████████████████████████████████████████████████████████████████████████████| 82/82 [02:05<00:00,  1.53s/it]
  0%|                                                                                          | 0/385 [00:00<?, ?it/s]

Processing #23: Site-5c3c44b80379370013e0fd2b with 385 traces and 3064 bssids


100%|████████████████████████████████████████████████████████████████████████████████| 385/385 [33:00<00:00,  5.14s/it]
  0%|                                                                                          | 0/131 [00:00<?, ?it/s]

Processing #24: Site-5d27099f03f801723c32511d with 131 traces and 928 bssids


100%|████████████████████████████████████████████████████████████████████████████████| 131/131 [05:37<00:00,  2.58s/it]


***
Convert Test Data into RSSI/DELAY DataFrame for Inference Phase
***

In [10]:
train_bssid = pickle.load(open("./data_out/train_24IDs_standardF_bssid_ranks.pkl", "rb"))
test_data = pickle.load(open("./data_out/test_data.pkl", "rb"))

In [15]:
parsed_test_data = {}
num_s = 0

for site_id in bssid.keys():  # over sites
    num_s += 1
    print(f"Processing #{num_s}: Site-{site_id} with {len(test_data[site_id])} traces and {len(bssid[site_id])} bssids")
    
    parsed_test_data[site_id] = {}
    
    col_names = []
    for feature_bssid in bssid[site_id]:  # create features-columns list
        col_names.append(feature_bssid)
        col_names.append(feature_bssid+"_D")
    
    
    for trace_id in tqdm(test_data[site_id]):  # over traces
        
        wifi_record = test_data[site_id][trace_id].wifi.copy(deep=True)
        
        time_stamps = wifi_record.time.unique().tolist()
        #trace_record_wifi = pd.DataFrame(index=time_stamps).reset_index().drop("index", axis = 1)            
        trace_record_wifi = np.zeros((len(time_stamps), len(col_names)))

        for col_id, _ in enumerate(col_names):
            if col_id%2 == 0:
                trace_record_wifi[:, col_id] = int(train_bssid["min_rssi"][site_id])
            else:
                trace_record_wifi[:, col_id] = int(train_bssid["max_delay"][site_id])

        trace_record_wifi = pd.DataFrame(trace_record_wifi, columns=col_names)
        
        trace_record_wifi["time"] = time_stamps
        trace_record_wifi["trace"] = trace_id
                   
        # Take wifi data the nearest to each waypoint
        for j, t0 in enumerate(trace_record_wifi.time):  # over times in trace/for each waypoint

            wifi_min = wifi_record[wifi_record.time == t0]
            wifi_min_bssid_list = wifi_min.bssid.tolist()
            wifi_min_delay_list = [col+"_D" for col in wifi_min_bssid_list]
            
            trace_record_wifi.loc[j, wifi_min_bssid_list] = wifi_min.rssi.to_numpy()
            trace_record_wifi.loc[j, wifi_min_delay_list] = wifi_min.delay.to_numpy().astype(int)

        parsed_test_data[site_id][trace_id] = trace_record_wifi
   
        #break  # only fist trace

    #break  # only first site

Processing #1: Site-5da1382d4db8ce0c98bbe92e with 11 traces and 2920 bssids
    time                                      ssid  \
0   1911  383a6185d30aa442d718b5209ecd4eb49cb73c54   
1   1911  3f2c15f34befeab48e04a706071dd12f20bcbf75   
2   1911  8b11efe31eeb88209290065632b99739c4a57f11   
3   1911  372308ce2a04b1ca088d269fec6966c697b02619   
4   1911  5ec944cb0e043327906acce8d1b61f6e32d8db1c   
5   1911  7123b8a309997c3fc78a7897089a478e1efc33c7   
6   1911  da39a3ee5e6b4b0d3255bfef95601890afd80709   
7   1911  0fa36db77af713808449ff54868815dc26f88e45   
8   1911  da39a3ee5e6b4b0d3255bfef95601890afd80709   
9   1911  d17bb082e04f08a5f47e5f92f26b8d0ba2b800de   
10  1911  e5ac03a98be38ed977dd6073a8c34a59606c2309   
11  1911  0b72e3ecd8f9071d311fd78bc78b4696fbbd35e5   
12  1911  da39a3ee5e6b4b0d3255bfef95601890afd80709   
13  1911  8e34926eb2a2a0663b2e03ac3c4cccf31ecc1965   
14  1911  5ec944cb0e043327906acce8d1b61f6e32d8db1c   
15  1911  da39a3ee5e6b4b0d3255bfef95601890afd80709   
16  19

In [28]:
fr = str(fraction_bssid).replace(".","")
with open(f"./data_out/test/mode-{bssid_mode}_fraction-{fr}_{group_mode}-bssid.pkl", "wb") as f:
    pickle.dump(parsed_test_data, f)

In [4]:
parsed_data = {}
num_s = 0

for site_id in test_data.keys():  # over sites
    num_s += 1
    print(f"Processing #{num_s}: Site-{site_id} with {len(test_data[site_id])} traces and {len(bssid[site_id])} bssids")
    
    parsed_data[site_id] = {}
    
    for trace_id in tqdm(test_data[site_id]):  # over traces
        
        wifi_record = test_data[site_id][trace_id].wifi.copy(deep=True)
        
        time_stamps = wifi_record.time.unique().tolist()
        trace_record_wifi = pd.DataFrame(index=time_stamps).reset_index().drop("index", axis = 1)
        
        trace_record_wifi["time"] = time_stamps
        trace_record_wifi["trace"] = trace_id
        
        for feature_bssid in bssid[site_id]:  # create feature-column/fill default values over best bssids
            trace_record_wifi[feature_bssid] = train_bssid["min_rssi"][site_id]
            trace_record_wifi[feature_bssid+"_D"] = train_bssid["max_delay"][site_id]
            
        # Take wifi data the nearest to each waypoint
        for j, t0 in enumerate(trace_record_wifi.time):  # over times in trace/for each waypoint

            wifi_min = wifi_record[wifi_record.time == t0]
            
            for feature_bssid in wifi_min.bssid.tolist():  # over bssids at given time
                if feature_bssid in bssid[site_id]:
                    trace_record_wifi.loc[j, feature_bssid] = wifi_min.rssi[wifi_min.index[wifi_min.bssid == feature_bssid]].tolist()[0]
                    trace_record_wifi.loc[j, feature_bssid+"_D"] = wifi_min.delay[wifi_min.index[wifi_min.bssid == feature_bssid]].tolist()[0]
                
            #break  # only first feature-bssid         
                    
        parsed_data[site_id][trace_id] = trace_record_wifi
   
        #break  # only fist trace

    #break  # only first site
    

  0%|                                                                                           | 0/13 [00:00<?, ?it/s]

Processing Site-5da1389e4db8ce0c98bd0547 with 13 traces and 106 bssids


100%|██████████████████████████████████████████████████████████████████████████████████| 13/13 [00:26<00:00,  2.02s/it]
  0%|                                                                                           | 0/29 [00:00<?, ?it/s]

Processing Site-5da138b74db8ce0c98bd4774 with 29 traces and 111 bssids


100%|██████████████████████████████████████████████████████████████████████████████████| 29/29 [01:13<00:00,  2.53s/it]
  0%|                                                                                           | 0/36 [00:00<?, ?it/s]

Processing Site-5da138764db8ce0c98bcaa46 with 36 traces and 132 bssids


100%|██████████████████████████████████████████████████████████████████████████████████| 36/36 [01:01<00:00,  1.72s/it]
  0%|                                                                                           | 0/50 [00:00<?, ?it/s]

Processing Site-5dbc1d84c1eb61796cf7c010 with 50 traces and 120 bssids


100%|██████████████████████████████████████████████████████████████████████████████████| 50/50 [01:26<00:00,  1.73s/it]
  0%|                                                                                           | 0/26 [00:00<?, ?it/s]

Processing Site-5da1383b4db8ce0c98bc11ab with 26 traces and 146 bssids


100%|██████████████████████████████████████████████████████████████████████████████████| 26/26 [00:57<00:00,  2.20s/it]
  0%|                                                                                           | 0/20 [00:00<?, ?it/s]

Processing Site-5d2709a003f801723c3251bf with 20 traces and 139 bssids


100%|██████████████████████████████████████████████████████████████████████████████████| 20/20 [00:50<00:00,  2.52s/it]
  0%|                                                                                           | 0/29 [00:00<?, ?it/s]

Processing Site-5a0546857ecc773753327266 with 29 traces and 134 bssids


100%|██████████████████████████████████████████████████████████████████████████████████| 29/29 [01:04<00:00,  2.21s/it]
  0%|                                                                                            | 0/8 [00:00<?, ?it/s]

Processing Site-5da138274db8ce0c98bbd3d2 with 8 traces and 139 bssids


100%|████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:14<00:00,  1.87s/it]
  0%|                                                                                           | 0/31 [00:00<?, ?it/s]

Processing Site-5d2709b303f801723c327472 with 31 traces and 135 bssids


100%|██████████████████████████████████████████████████████████████████████████████████| 31/31 [02:36<00:00,  5.05s/it]
  0%|                                                                                           | 0/51 [00:00<?, ?it/s]

Processing Site-5da958dd46f8266d0737457b with 51 traces and 142 bssids


100%|██████████████████████████████████████████████████████████████████████████████████| 51/51 [03:13<00:00,  3.79s/it]
  0%|                                                                                           | 0/34 [00:00<?, ?it/s]

Processing Site-5d2709bb03f801723c32852c with 34 traces and 109 bssids


100%|██████████████████████████████████████████████████████████████████████████████████| 34/34 [03:11<00:00,  5.65s/it]
  0%|                                                                                           | 0/51 [00:00<?, ?it/s]

Processing Site-5d2709d403f801723c32bd39 with 51 traces and 139 bssids


100%|██████████████████████████████████████████████████████████████████████████████████| 51/51 [02:15<00:00,  2.66s/it]
  0%|                                                                                           | 0/35 [00:00<?, ?it/s]

Processing Site-5dc8cea7659e181adb076a3f with 35 traces and 132 bssids


100%|██████████████████████████████████████████████████████████████████████████████████| 35/35 [01:08<00:00,  1.95s/it]
  0%|                                                                                           | 0/60 [00:00<?, ?it/s]

Processing Site-5d27096c03f801723c31e5e0 with 60 traces and 114 bssids


100%|██████████████████████████████████████████████████████████████████████████████████| 60/60 [03:06<00:00,  3.11s/it]
  0%|                                                                                           | 0/31 [00:00<?, ?it/s]

Processing Site-5d2709c303f801723c3299ee with 31 traces and 181 bssids


100%|██████████████████████████████████████████████████████████████████████████████████| 31/31 [00:49<00:00,  1.61s/it]
  0%|                                                                                           | 0/17 [00:00<?, ?it/s]

Processing Site-5da138314db8ce0c98bbf3a0 with 17 traces and 116 bssids


100%|██████████████████████████████████████████████████████████████████████████████████| 17/17 [01:01<00:00,  3.61s/it]
  0%|                                                                                           | 0/11 [00:00<?, ?it/s]

Processing Site-5da1382d4db8ce0c98bbe92e with 11 traces and 120 bssids


100%|██████████████████████████████████████████████████████████████████████████████████| 11/11 [01:29<00:00,  8.16s/it]
  0%|                                                                                            | 0/5 [00:00<?, ?it/s]

Processing Site-5d27075f03f801723c2e360f with 5 traces and 199 bssids


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:16<00:00,  3.27s/it]
  0%|                                                                                           | 0/23 [00:00<?, ?it/s]

Processing Site-5da138754db8ce0c98bca82f with 23 traces and 148 bssids


100%|██████████████████████████████████████████████████████████████████████████████████| 23/23 [01:38<00:00,  4.28s/it]
  0%|                                                                                           | 0/31 [00:00<?, ?it/s]

Processing Site-5d2709e003f801723c32d896 with 31 traces and 133 bssids


100%|██████████████████████████████████████████████████████████████████████████████████| 31/31 [01:41<00:00,  3.27s/it]
  0%|                                                                                           | 0/17 [00:00<?, ?it/s]

Processing Site-5d27097f03f801723c320d97 with 17 traces and 136 bssids


100%|██████████████████████████████████████████████████████████████████████████████████| 17/17 [00:50<00:00,  2.99s/it]
  0%|                                                                                           | 0/10 [00:00<?, ?it/s]

Processing Site-5da138364db8ce0c98bc00f1 with 10 traces and 135 bssids


100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [00:43<00:00,  4.33s/it]
  0%|                                                                                            | 0/3 [00:00<?, ?it/s]

Processing Site-5c3c44b80379370013e0fd2b with 3 traces and 163 bssids


100%|████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:16<00:00,  5.57s/it]
  0%|                                                                                            | 0/5 [00:00<?, ?it/s]

Processing Site-5d27099f03f801723c32511d with 5 traces and 125 bssids


100%|████████████████████████████████████████████████████████████████████████████████████| 5/5 [00:20<00:00,  4.13s/it]


Create np.array of steps ([x,y]) per site(dict) per floor (dict)

In [15]:
spatial_data = pickle.load(open("./data_out/train_24IDs_standardF_steps.pkl", "rb"))

floor_convert = {'5a0546857ecc773753327266': {-1: 'B1', 0: 'F1', 1: 'F2', 2: 'F3', 3: 'F4'},
                 '5c3c44b80379370013e0fd2b': {-1: 'B1', 0: 'F1', 1: 'F2', 2: 'F3', 3: 'F4', 4: 'F5'},
                 '5d27075f03f801723c2e360f': {-1: 'B1', 0: 'F1', 1: 'F2', 2: 'F3', 3: 'F4', 4: 'F5', 5: 'F6', 6: 'F7'},
                 '5d27096c03f801723c31e5e0': {-1: 'B1', 0: 'F1', 1: 'F2', 2: 'F3', 3: 'F4', 4: 'F5', 5: 'F6'},
                 '5d27097f03f801723c320d97': {-2: 'B2', -1: 'B1', 0: 'F1', 1: 'F2', 2: 'F3', 3: 'F4', 4: 'F5'},
                 '5d27099f03f801723c32511d': {-1: 'B1', 0: 'F1', 1: 'F2', 2: 'F3', 3: 'F4'},
                 '5d2709a003f801723c3251bf': {0: '1F', 1: '2F', 2: '3F', 3: '4F'},
                 '5d2709b303f801723c327472': {-1: 'B1', 0: '1F', 1: '2F', 2: '3F', 3: '4F'},
                 '5d2709bb03f801723c32852c': {-1: 'B1', 0: 'F1', 1: 'F2', 2: 'F3', 3: 'F4'},
                 '5d2709c303f801723c3299ee': {-1: 'B1', 0: '1F', 1: '2F', 2: '3F', 3: '4F', 4: '5F', 5: '6F', 6: '7F', 7: '8F', 8: '9F'},
                 '5d2709d403f801723c32bd39': {-1: 'B1', 0: '1F', 1: '2F', 2: '3F'},
                 '5d2709e003f801723c32d896': {-1: 'B1', 0: 'F1', 1: 'F2', 2: 'F3', 3: 'F4', 4: 'F5'},
                 '5da138274db8ce0c98bbd3d2': {0: 'F1', 1: 'F2', 2: 'F3'},
                 '5da1382d4db8ce0c98bbe92e': {-1: 'B1', 0: 'F1', 1: 'F2', 2: 'F3', 3: 'F4', 4: 'F5'},
                 '5da138314db8ce0c98bbf3a0': {-2: 'B2', -1: 'B1', 0: 'F1', 1: 'F2', 2: 'F3'},
                 '5da138364db8ce0c98bc00f1': {0: 'F1', 1: 'F2', 2: 'F3'},
                 '5da1383b4db8ce0c98bc11ab': {0: 'F1', 1: 'F2', 2: 'F3'},
                 '5da138754db8ce0c98bca82f': {0: 'F1', 1: 'F2', 2: 'F3', 3: 'F4'},
                 '5da138764db8ce0c98bcaa46': {-1: 'B1', 0: 'F1', 1: 'F2', 2: 'F3', 3: 'F4', 4: 'F5'},
                 '5da1389e4db8ce0c98bd0547': {-2: 'B2', -1: 'B1', 0: 'F1', 1: 'F2', 2: 'F3', 3: 'F4'},
                 '5da138b74db8ce0c98bd4774': {-2: 'B2', -1: 'B1', 0: 'F1', 1: 'F2', 2: 'F3', 3: 'F4', 4: 'F5'},
                 '5da958dd46f8266d0737457b': {-1: 'B1', 0: 'F1', 1: 'F2', 2: 'F3', 3: 'F4', 4: 'F5', 5: 'F6', 6: 'F7'},
                 '5dbc1d84c1eb61796cf7c010': {-1: 'B1', 1: 'F2', 2: 'F3', 3: 'F4', 4: 'F5', 5: 'F6', 6: 'F7', 7: 'F8'},
                 '5dc8cea7659e181adb076a3f': {-1: 'B1', 0: 'F1', 1: 'F2', 2: 'F3', 3: 'F4', 4: 'F5', 5: 'F6', 6: 'F7'}}

steps_siteid_floorid = {}

num_s = 0

for site_id in tqdm(floor_convert.keys()):
    num_s += 1
    #print(f"Processing #{num_s}: Site-{site_id}")
    
    steps_siteid_floorid[site_id] = {}
    for floor_id in floor_convert[site_id].keys():
        
        steps_siteid_floorid[site_id][floor_id] = []
        for trace_id in spatial_data[site_id].keys():
            
            trace_record = spatial_data[site_id][trace_id] 
            if trace_record[0].FloorName[0] == floor_id:
                steps_siteid_floorid[site_id][floor_id].append(trace_record[1][["x", "y"]].to_numpy())
        
        steps_siteid_floorid[site_id][floor_id] = np.concatenate(steps_siteid_floorid[site_id][floor_id], axis=0) 

with open(f"./data_out/steps_siteid_floorid.pkl", "wb") as f:
    pickle.dump(steps_siteid_floorid, f)

100%|██████████████████████████████████████████████████████████████████████████████████| 24/24 [00:06<00:00,  3.54it/s]


Create np.array of waypoints ([x,y]) per site(dict) per floor (dict)

In [3]:
spatial_data = pickle.load(open("./data_out/train_24IDs_standardF_waypoints.pkl", "rb"))

floor_convert = {'5a0546857ecc773753327266': {-1: 'B1', 0: 'F1', 1: 'F2', 2: 'F3', 3: 'F4'},
                 '5c3c44b80379370013e0fd2b': {-1: 'B1', 0: 'F1', 1: 'F2', 2: 'F3', 3: 'F4', 4: 'F5'},
                 '5d27075f03f801723c2e360f': {-1: 'B1', 0: 'F1', 1: 'F2', 2: 'F3', 3: 'F4', 4: 'F5', 5: 'F6', 6: 'F7'},
                 '5d27096c03f801723c31e5e0': {-1: 'B1', 0: 'F1', 1: 'F2', 2: 'F3', 3: 'F4', 4: 'F5', 5: 'F6'},
                 '5d27097f03f801723c320d97': {-2: 'B2', -1: 'B1', 0: 'F1', 1: 'F2', 2: 'F3', 3: 'F4', 4: 'F5'},
                 '5d27099f03f801723c32511d': {-1: 'B1', 0: 'F1', 1: 'F2', 2: 'F3', 3: 'F4'},
                 '5d2709a003f801723c3251bf': {0: '1F', 1: '2F', 2: '3F', 3: '4F'},
                 '5d2709b303f801723c327472': {-1: 'B1', 0: '1F', 1: '2F', 2: '3F', 3: '4F'},
                 '5d2709bb03f801723c32852c': {-1: 'B1', 0: 'F1', 1: 'F2', 2: 'F3', 3: 'F4'},
                 '5d2709c303f801723c3299ee': {-1: 'B1', 0: '1F', 1: '2F', 2: '3F', 3: '4F', 4: '5F', 5: '6F', 6: '7F', 7: '8F', 8: '9F'},
                 '5d2709d403f801723c32bd39': {-1: 'B1', 0: '1F', 1: '2F', 2: '3F'},
                 '5d2709e003f801723c32d896': {-1: 'B1', 0: 'F1', 1: 'F2', 2: 'F3', 3: 'F4', 4: 'F5'},
                 '5da138274db8ce0c98bbd3d2': {0: 'F1', 1: 'F2', 2: 'F3'},
                 '5da1382d4db8ce0c98bbe92e': {-1: 'B1', 0: 'F1', 1: 'F2', 2: 'F3', 3: 'F4', 4: 'F5'},
                 '5da138314db8ce0c98bbf3a0': {-2: 'B2', -1: 'B1', 0: 'F1', 1: 'F2', 2: 'F3'},
                 '5da138364db8ce0c98bc00f1': {0: 'F1', 1: 'F2', 2: 'F3'},
                 '5da1383b4db8ce0c98bc11ab': {0: 'F1', 1: 'F2', 2: 'F3'},
                 '5da138754db8ce0c98bca82f': {0: 'F1', 1: 'F2', 2: 'F3', 3: 'F4'},
                 '5da138764db8ce0c98bcaa46': {-1: 'B1', 0: 'F1', 1: 'F2', 2: 'F3', 3: 'F4', 4: 'F5'},
                 '5da1389e4db8ce0c98bd0547': {-2: 'B2', -1: 'B1', 0: 'F1', 1: 'F2', 2: 'F3', 3: 'F4'},
                 '5da138b74db8ce0c98bd4774': {-2: 'B2', -1: 'B1', 0: 'F1', 1: 'F2', 2: 'F3', 3: 'F4', 4: 'F5'},
                 '5da958dd46f8266d0737457b': {-1: 'B1', 0: 'F1', 1: 'F2', 2: 'F3', 3: 'F4', 4: 'F5', 5: 'F6', 6: 'F7'},
                 '5dbc1d84c1eb61796cf7c010': {-1: 'B1', 1: 'F2', 2: 'F3', 3: 'F4', 4: 'F5', 5: 'F6', 6: 'F7', 7: 'F8'},
                 '5dc8cea7659e181adb076a3f': {-1: 'B1', 0: 'F1', 1: 'F2', 2: 'F3', 3: 'F4', 4: 'F5', 5: 'F6', 6: 'F7'}}

steps_siteid_floorid = {}

num_s = 0

for site_id in tqdm(floor_convert.keys()):
    num_s += 1
    #print(f"Processing #{num_s}: Site-{site_id}")
    
    steps_siteid_floorid[site_id] = {}
    for floor_id in floor_convert[site_id].keys():
        
        steps_siteid_floorid[site_id][floor_id] = []
        for trace_id in spatial_data[site_id].keys():
            
            trace_record = spatial_data[site_id][trace_id] 
            if trace_record[0].FloorName[0] == floor_id:
                steps_siteid_floorid[site_id][floor_id].append(trace_record[1][["x", "y"]].to_numpy())
        
        steps_siteid_floorid[site_id][floor_id] = np.concatenate(steps_siteid_floorid[site_id][floor_id], axis=0) 

with open(f"./data_out/waypoints_siteid_floorid.pkl", "wb") as f:
    pickle.dump(steps_siteid_floorid, f)

100%|██████████████████████████████████████████████████████████████████████████████████| 24/24 [00:05<00:00,  4.49it/s]


Create np.array of contours ([x,y]) per site(dict) per floor (dict)

In [64]:
import alphashape as alsp

steps_siteid_floorid = pickle.load(open("./data_out/steps_siteid_floorid.pkl", "rb"))

contours_siteid_floorid_a0 = {}  # alpha=0, i.e. convex hull

num_s = 0
for site_id in tqdm(steps_siteid_floorid.keys()):
    num_s += 1
    #print(f"Processing #{num_s}: Site-{site_id}")
    
    contours_siteid_floorid_a0[site_id] = {}
    
    for floor_id in steps_siteid_floorid[site_id].keys():
        # todo check conversions, i.e. lists vs np.array
        contours_siteid_floorid_a0[site_id][floor_id] = np.array(alsp.alphashape(steps_siteid_floorid[site_id][floor_id], 0.).exterior.coords) 
        #aa = 2.#1.#alsp.optimizealpha(steps_siteid_floorid[site_id][floor_id])  
        #contours_siteid_floorid_a0[site_id][floor_id] = alsp.alphashape(steps_siteid_floorid[site_id][floor_id], aa)
        
        #break
    #break

with open(f"./data_out/contours_siteid_floorid_a0.pkl", "wb") as f:
    pickle.dump(contours_siteid_floorid_a0, f)
    
#with open(f"./data_out/contours_siteid_floorid_aa.pkl", "wb") as f:
 #   pickle.dump(contours_siteid_floorid_aa, f)

100%|██████████████████████████████████████████████████████████████████████████████████| 24/24 [00:04<00:00,  5.25it/s]


CONVERT FLOOR99 model to dict(site_id) of dict(trace)

In [73]:
floor99_submit = pd.read_csv("./submit/submission_floor99.csv")
floor99_submit[["site","path", "stamp"]] = pd.DataFrame(floor99_submit.site_path_timestamp.str.split("_").to_list())
gr_site = floor99_submit.groupby(["site"])

gr_trace = {}
for el in gr_site:
    gr_trace[el[0]] = el[1].groupby(["path"])
    
for site_id in gr_trace.keys():
    gr_trace[site_id] = dict(gr_trace[site_id].floor.mean())
    
with open("./data_out/floor99_siteid_traceid.pkl", "wb") as f:
    pickle.dump(gr_trace, f)

CONVERT Model4p7 model to dict(site_id) of dict(trace) of DataFrame(x,y,time)

In [72]:
mode4p7_submit = pd.read_csv("./submit/submission_4p7.csv")
mode4p7_submit[["site","path", "stamp"]] = pd.DataFrame(mode4p7_submit.site_path_timestamp.str.split("_").to_list())
mode4p7_submit.drop(["site_path_timestamp"], axis=1, inplace=True)
mode4p7_submit.stamp = mode4p7_submit.stamp.astype(int)
floor100 = pickle.load(open(f"./data_out/floor100_siteid_traceid.pkl", "rb"))

gr_site = mode4p7_submit.groupby(["site"])

gr_trace = {}
for el in gr_site:
    gr_trace[el[0]] = el[1].groupby(["path"])

gr_site_trace = {}
for site_id in gr_trace.keys():
    
    gr_site_trace[site_id] = {}
    for trace_id in gr_trace[site_id]:
        gr_site_trace[site_id][trace_id[0]] = trace_id[1].reset_index()#.drop("index", axis=1, inplace=True)
        gr_site_trace[site_id][trace_id[0]].drop(["index", "floor", "site", "path"], axis=1, inplace=True)
        gr_site_trace[site_id][trace_id[0]].rename(columns={"stamp": "time"}, inplace=True)
        gr_site_trace[site_id][trace_id[0]]["floor"] = floor100[site_id][trace_id[0]]
        gr_site_trace[site_id][trace_id[0]] = gr_site_trace[site_id][trace_id[0]][["x","y","floor","time"]]
    
with open("./data_out/mode4p7_waypoints_siteid_traceid.pkl", "wb") as f:
    pickle.dump(gr_site_trace, f)