In [4]:
import numpy as np
import pandas as pd

import pickle
import random
import os
import time
from datetime import datetime
from pathlib import Path
import glob
import json
from tqdm.notebook import tqdm

import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt
import plotly
import plotly.graph_objs as go
from PIL import Image
#%matplotlib inline
plotly.offline.init_notebook_mode(connected=True)

sns.set_style('darkgrid')

from xyz10.io_f_mod import read_data_file
from xyz10.visualize_f_mod import visualize_trajectory, save_figure_to_image


from sklearn.pipeline import make_pipeline
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.model_selection import train_test_split
from sklearn.utils import resample, shuffle
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.linear_model import SGDRegressor


import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import metrics
from tensorflow.keras.callbacks import TensorBoard

from scipy.ndimage import median_filter
from scipy.signal import medfilt
from scipy.interpolate import interp1d

print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
physical_devices = tf.config.list_physical_devices('GPU') 
tf.config.experimental.set_memory_growth(physical_devices[0], True)
tf.compat.v1.disable_eager_execution()

Num GPUs Available:  1


Supporting Functions (PLOT PREDICTIONS / MAKE SUBMISSIONS)

In [2]:
def make_submission(model_name, data, sufix="coarse"):

    sample_submit = pd.read_csv("./submit/sample_submission.csv")
    splits = sample_submit.site_path_timestamp.str.split(pat="_", expand=True)
    sub_data = sample_submit.copy(deep=True).join(splits)
    sub_data.rename(columns={0:"site", 1:"path", 2:"timestamp"}, inplace=True)

    for i in tqdm(list(sub_data.index)):
        site_id = sub_data.site[i]
        trace_id = sub_data.path[i]
        timestamp = sub_data.timestamp[i]

        predicted_record = data[site_id][trace_id].to_numpy()

        func_x = interp1d(predicted_record[:, 3], predicted_record[:, 0], kind="linear", copy=False, fill_value="extrapolate")
        func_y = interp1d(predicted_record[:, 3], predicted_record[:, 1], kind="linear", copy=False, fill_value="extrapolate")

        sub_data.loc[i, "x"] = func_x(timestamp)
        sub_data.loc[i, "y"] = func_y(timestamp)
        sub_data.loc[i, "floor"] = int(np.median(predicted_record[:, 2]))
        #break

    _ = [sub_data.pop(col) for col in ["site", "path", "timestamp"]]

    sub_data.to_csv(f"./submit/{model_name}_{sufix}.csv", index=False)

def plot_predictions_multi(model_name, data, sufix="coarse"):
    
    def swap_trace_floor(predicted_data):
        swap = {}

        for site_id in predicted_data.keys():

            swap[site_id] = {}
            for trace_id in predicted_data[site_id].keys():

                floor_id = predicted_data[site_id][trace_id].floor[0]
                if floor_id not in swap[site_id].keys():
                    swap[site_id][floor_id] = {}
                swap[site_id][floor_id][trace_id] = predicted_data[site_id][trace_id]

        return swap

    data = swap_trace_floor(data)
    
    floor_convert = {'5a0546857ecc773753327266': {-1: 'B1', 0: 'F1', 1: 'F2', 2: 'F3', 3: 'F4'},
                     '5c3c44b80379370013e0fd2b': {-1: 'B1', 0: 'F1', 1: 'F2', 2: 'F3', 3: 'F4', 4: 'F5'},
                     '5d27075f03f801723c2e360f': {-1: 'B1', 0: 'F1', 1: 'F2', 2: 'F3', 3: 'F4', 4: 'F5', 5: 'F6', 6: 'F7'},
                     '5d27096c03f801723c31e5e0': {-1: 'B1', 0: 'F1', 1: 'F2', 2: 'F3', 3: 'F4', 4: 'F5', 5: 'F6'},
                     '5d27097f03f801723c320d97': {-2: 'B2', -1: 'B1', 0: 'F1', 1: 'F2', 2: 'F3', 3: 'F4', 4: 'F5'},
                     '5d27099f03f801723c32511d': {-1: 'B1', 0: 'F1', 1: 'F2', 2: 'F3', 3: 'F4'},
                     '5d2709a003f801723c3251bf': {0: '1F', 1: '2F', 2: '3F', 3: '4F'},
                     '5d2709b303f801723c327472': {-1: 'B1', 0: '1F', 1: '2F', 2: '3F', 3: '4F'},
                     '5d2709bb03f801723c32852c': {-1: 'B1', 0: 'F1', 1: 'F2', 2: 'F3', 3: 'F4'},
                     '5d2709c303f801723c3299ee': {-1: 'B1', 0: '1F', 1: '2F', 2: '3F', 3: '4F', 4: '5F', 5: '6F', 6: '7F', 7: '8F', 8: '9F'},
                     '5d2709d403f801723c32bd39': {-1: 'B1', 0: '1F', 1: '2F', 2: '3F'},
                     '5d2709e003f801723c32d896': {-1: 'B1', 0: 'F1', 1: 'F2', 2: 'F3', 3: 'F4', 4: 'F5'},
                     '5da138274db8ce0c98bbd3d2': {0: 'F1', 1: 'F2', 2: 'F3'},
                     '5da1382d4db8ce0c98bbe92e': {-1: 'B1', 0: 'F1', 1: 'F2', 2: 'F3', 3: 'F4', 4: 'F5'},
                     '5da138314db8ce0c98bbf3a0': {-2: 'B2', -1: 'B1', 0: 'F1', 1: 'F2', 2: 'F3'},
                     '5da138364db8ce0c98bc00f1': {0: 'F1', 1: 'F2', 2: 'F3'},
                     '5da1383b4db8ce0c98bc11ab': {0: 'F1', 1: 'F2', 2: 'F3'},
                     '5da138754db8ce0c98bca82f': {0: 'F1', 1: 'F2', 2: 'F3', 3: 'F4'},
                     '5da138764db8ce0c98bcaa46': {-1: 'B1', 0: 'F1', 1: 'F2', 2: 'F3', 3: 'F4', 4: 'F5'},
                     '5da1389e4db8ce0c98bd0547': {-2: 'B2', -1: 'B1', 0: 'F1', 1: 'F2', 2: 'F3', 3: 'F4'},
                     '5da138b74db8ce0c98bd4774': {-2: 'B2', -1: 'B1', 0: 'F1', 1: 'F2', 2: 'F3', 3: 'F4', 4: 'F5'},
                     '5da958dd46f8266d0737457b': {-1: 'B1', 0: 'F1', 1: 'F2', 2: 'F3', 3: 'F4', 4: 'F5', 5: 'F6', 6: 'F7'},
                     '5dbc1d84c1eb61796cf7c010': {-1: 'B1', 1: 'F2', 2: 'F3', 3: 'F4', 4: 'F5', 5: 'F6', 6: 'F7', 7: 'F8'},
                     '5dc8cea7659e181adb076a3f': {-1: 'B1', 0: 'F1', 1: 'F2', 2: 'F3', 3: 'F4', 4: 'F5', 5: 'F6', 6: 'F7'}}

    try:
        os.makedirs(f"./img_out/predictions/{model_name}/")
    except:
        pass

    n_s = 0
    for site_id in tqdm(data.keys()):  # over sites 
        n_s += 1
        #print(f"Processing Trajectories #{n_s}: Site-{site_id} with {len(data[site_id])} traces")

        try:
            os.makedirs(f"./img_out/predictions/{model_name}/{site_id}")
        except:
            pass

        for floor_id in data[site_id]:  # over traces
            site_path = "./data_in/metadata/" + site_id + "/"
            
            positions = []
            legends = []
            for trace_id in data[site_id][floor_id].keys():
                positions.append(data[site_id][floor_id][trace_id].to_numpy()[:, :2])
                legends.append(trace_id)

            try:
                floor = floor_convert[site_id][floor_id]

                meta_path = site_path + floor
                map_path = meta_path + "/floor_image.png"
                info_path = meta_path + "/floor_info.json" 

                meta_path = site_path + floor
                map_path = meta_path + "/floor_image.png"
                info_path = meta_path + "/floor_info.json" 

                with open(info_path) as info_file:
                    info_data = json.load(info_file)             

                map_width = info_data["map_info"]["width"]
                map_height = info_data["map_info"]["height"]

                fig_steps = visualize_trajectory(trajectory=positions, is_multi = True,
                                                 floor_plan_filename=map_path, mode="lines + markers", title=f"{site_id}_{floor}_{sufix}", legends=legends, 
                                                 width_meter=map_width,  height_meter=map_height)
                save_figure_to_image(fig_steps, f"./img_out/predictions/{model_name}/{site_id}/{floor}_{sufix}.png")
            except:
                print(f"Exception: wrong floor-{floor} site-{site_id}")

        #break  # only first site_id

Make Predictions

In [3]:
parsed_test_data = pickle.load(open("./data_out/full24/test-10k_mix-counts.pkl", "rb"))   # CHANGE
floor100_siteid_traceid = pickle.load(open("./data_out/floor100_siteid_traceid.pkl", "rb"))

In [6]:
model_name = "models24_v6_FcatMR_fix_data95"
model_path = "./saved_models/" + model_name

site_ids = ["5da1389e4db8ce0c98bd0547"] 

predicted_data = {}

def xy_loss_metric(y_true, y_pred):
    e_xy = tf.sqrt(tf.square(y_true[:, 0] - y_pred[:, 0]) +  tf.square(y_true[:, 1] - y_pred[:, 1])) 
    return tf.reduce_mean(e_xy, axis=-1)

def xy_loss_metric_mse(y_true, y_pred):
    e_xy = tf.square(y_true[:, 0] - y_pred[:, 0]) +  tf.square(y_true[:, 1] - y_pred[:, 1]) 
    return tf.sqrt(tf.reduce_mean(e_xy, axis=-1))

n_s= 0
for site_id in tqdm(parsed_test_data.keys()):#site_ids:#tqdm(parsed_test_data.keys()):  # over sites
    n_s += 1
    print(f"Processing Predictions #{n_s}: Site-{site_id} with {len(parsed_test_data[site_id])} traces")
    ############# GET MODELS (suffix indicates target values)####################   
    features_xy = pickle.load(open(model_path + f"/{site_id}/features_list.pkl", "rb"))
    scaler_xy = pickle.load(open(model_path + f"/{site_id}/scaler.pkl", "rb"))
    encoder_xy = pickle.load(open(model_path + f"/{site_id}/f_binarizer.pkl", "rb"))
    model_xy = tf.keras.models.load_model(model_path + f"/{site_id}", custom_objects={"xy_loss_metric_mse": xy_loss_metric_mse, "xy_loss_metric": xy_loss_metric})
    ##############################################
    predicted_data[site_id] = {}
    for trace_id in parsed_test_data[site_id]:  # over traces
        
        trace_record = parsed_test_data[site_id][trace_id].copy(deep=True)
        ######### GENERAL FEATURE MANIPULATION ################    
        _time = trace_record.pop("time").to_numpy()
        _magnetic = trace_record.pop("m").to_numpy().reshape((-1, 1))
        _rotate = trace_record.pop("r").to_numpy().reshape((-1, 1))
        trace_record_xy = trace_record[features_xy].copy(deep=True)
        
        rssi_limit = -94
        delay_limit = 1000
####################################################
        trace_record_xy[trace_record_xy > delay_limit] = delay_limit
        trace_record_xy[trace_record_xy < rssi_limit] = rssi_limit
####################################################
        trace_record_xy = trace_record_xy.to_numpy()
        
        ##########################################################
        #  PREDICT XY (with F100 feature)
        ##########################################################
        _pred_f = floor100_siteid_traceid[site_id][trace_id]
        
        trace_record_xy_scaled = scaler_xy.transform(np.concatenate((trace_record_xy, _rotate, _magnetic, encoder_xy.transform(np.full_like(_time, _pred_f))), axis=1))
        predictions_xy = model_xy.predict(trace_record_xy_scaled)
        
        predictions_xy_x = predictions_xy[:, 0].reshape((-1, 1))
        predictions_xy_y = predictions_xy[:, 1].reshape((-1, 1))
        # median filter to remove outliers  + average over folds
        #predictions_xy_x = medfilt(predictions_xy_x)
        #predictions_xy_y = medfilt(predictions_xy_y)
        #predictions_xy_x = np.median(median_filter(predictions_xy_x, (3,3)), axis=0).reshape((-1, 1))
        #predictions_xy_y = np.median(median_filter(predictions_xy_y, (3,3)), axis=0).reshape((-1, 1))
                                                              
        #  COMBINE into final DataFrame
        predictions_xyf = pd.DataFrame(np.concatenate((predictions_xy_x, predictions_xy_y), axis=1), columns=["x", "y"])
        predictions_xyf["floor"] = _pred_f
        predictions_xyf["time"] = _time

        predicted_data[site_id][trace_id] = predictions_xyf
        
    keras.backend.clear_session()
        
        #break  # only first trace
    
    #break  # only first site_id


  0%|          | 0/24 [00:00<?, ?it/s]

Processing Predictions #1: Site-5da1389e4db8ce0c98bd0547 with 13 traces
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Processing Predictions #2: Site-5da138b74db8ce0c98bd4774 with 29 traces
Processing Predictions #3: Site-

In [7]:
with open(f"./submit/fit_data/{model_name}_predicted.pkl", "wb") as f:
    pickle.dump(predicted_data, f)

In [8]:
#predicted_data = pickle.load(open(f"./submit/fit_data/{model_name}_predicted.pkl", "rb"))
plot_predictions_multi(model_name, predicted_data, sufix="coarse")

  0%|          | 0/24 [00:00<?, ?it/s]

In [9]:
predicted_data = pickle.load(open(f"./submit/fit_data/{model_name}_predicted.pkl", "rb"))
make_submission(model_name, predicted_data, sufix="coarse")

  0%|          | 0/10133 [00:00<?, ?it/s]