In [None]:
import time, sys
from typing import Type, List, Dict, Tuple, Set
import argparse
try:
    from sklearn.externals import joblib
    from sklearn.externals.joblib import parallel_backend, Parallel, delayed
except ImportError:
    import joblib
    from joblib import parallel_backend, Parallel, delayed
    
import pandas as pd
import json, ijson
import os, sys, uuid
from pykalman import KalmanFilter
from PIL import Image
import math
import ast
import os
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from shapely.geometry import Polygon, Point
from shapely.geometry import Polygon
from matplotlib.backends.backend_pdf import PdfPages
from geopy.distance import geodesic, distance
from geopy import Point
from shapely.geometry import Point, Polygon as ShapelyPolygon
from matplotlib.patches import Polygon as MplPolygon

from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
from datetime import datetime
import random

from matplotlib.patches import Polygon
import ast

from collections import defaultdict
import pyarrow.parquet as pq

import pickle
from ast import literal_eval

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.colors as mcolors

import matplotlib.image as mpimg

from tqdm.notebook import tqdm, trange
from scipy.optimize import minimize
from scipy.optimize import least_squares

from os import walk
from os import listdir
from os.path import isfile, join, isdir

import scipy.optimize as opt
from shapely.geometry import Polygon
from shapely.geometry import Polygon

from PIL import Image
from matplotlib.patches import Polygon

from sklearn import model_selection
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split, cross_val_score, KFold
from sklearn.linear_model import LinearRegression, LogisticRegression, SGDClassifier
from sklearn.feature_selection import SelectKBest, mutual_info_classif
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, VotingClassifier, GradientBoostingClassifier, BaggingClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, ConfusionMatrixDisplay

import warnings
warnings.filterwarnings("ignore")

In [None]:
start =time.time()
CHANNELS = [37,38,39]
N_ESTIMATORS = 100
MISSING_VALUE = -100
DEBUG_LOGGING = False
S3_CACHING_BUCKET = 'cognosos-ml-data'

In [None]:
def parse_scan_data_woc(scan: List[Dict]) -> Dict:
    # Parse each scan to get maximum reading for each MAC address in specified channels
    readings_by_mac_addr_and_channel = defaultdict(list)
    for beacon_reading in scan:
        if beacon_reading['channel'] in CHANNELS:
            mac_addr = beacon_reading['macHex']
            readings = beacon_reading['readings']
            readings_by_mac_addr_and_channel[mac_addr] += readings
    return {mac_addr: int(max(readings)) for mac_addr, readings in readings_by_mac_addr_and_channel.items() if readings}


def process_training(data_filepath: str) -> List[Dict]:
    X = []

    # parse it incrementally
    with open(data_filepath, 'r') as f:
        # reads the JSON incrementally
        objects = ijson.items(f, 'item') 

        print('Done loading JSON incrementally')

        for scan in objects:
            
            Zone_id = str(scan['zoneId'])
            Room_name = str(scan['zoneName'])
            parent_zone_id = str(scan['parentZoneId'])
            tagId = scan['tagId']
            timestamp = scan['rxAt']
            scan_readings: List[Dict] = scan['scandata']
            
            row = parse_scan_data_woc(scan_readings) 

            row.update({
                'Zone_id': Zone_id,
                'Room_name': Room_name,
                'parent_zone_id': parent_zone_id,
                'tagId': tagId,
                'timestamp': timestamp,
            })

            if row:
                X.append(row)

    print('Done processing data')

    return X

In [None]:
def train_variable(X_train, y_train_floor, y_train, save_models=False):
    
    floor_pipeline = Pipeline([
        ('rf', RandomForestClassifier(random_state=42))
    ])

    floor_pipeline.fit(X_train, y_train_floor)

    clf_floor = floor_pipeline.named_steps['rf']

    clf_rooms = {}

    selected_features = {}

    for floor_num, samples in X_train.groupby(y_train_floor):
        
        floor_labels = y_train[samples.index]

        non_all_neg_120_columns = samples.columns[~np.all(samples == -120, axis=0)]

        selected_samples = samples[non_all_neg_120_columns]

        classifier = RandomForestClassifier(n_estimators=200, random_state=100)

        classifier.fit(selected_samples, floor_labels)

        clf_rooms[str(floor_num)] = classifier

        selected_features[str(floor_num)] = selected_samples.columns.tolist()

    if save_models:
        model = {
        'selected_features': selected_features,
        'clf_rooms': clf_rooms,
        'clf_floor': clf_floor
        }
        joblib.dump(model, 'Hier_Features.joblib')
        
    return selected_features, clf_rooms, clf_floor

def predict_variable(X_test, clf_floor, clf_rooms, selected_features):
    
    predicted_floors = clf_floor.predict(X_test)

    predictions = []
    for floor_num, sample in zip(predicted_floors, X_test.values):
        classifier = clf_rooms[str(floor_num)]

        selected_names = selected_features[floor_num]

        selected_sample = sample[X_test.columns.isin(selected_names)].reshape(1, -1)

        predicted_room = classifier.predict(selected_sample)[0]
#         predicted_room = predicted_room.astype(str)
        predictions.append(predicted_room)

    return predictions, predicted_floors

In [None]:
def extract_values(scan_data):

    if scan_data is None:
        return []
    return [
        {'macHex': entry['macHex'], 'channel': entry['channel'], 'readings': [entry['rssi'][0]]}
        for entry in scan_data if 'macHex' in entry and 'rssi' in entry
    ]

def parse_scan_data(scan: List[Dict]) -> Dict:

    readings_by_mac_addr_and_channel = defaultdict(list)
    for beacon_reading in scan:
        if beacon_reading['channel'] in CHANNELS:
            mac_addr = beacon_reading['macHex']
            readings = beacon_reading['readings']
            channel = beacon_reading['channel']
            readings_by_mac_addr_and_channel[f'{mac_addr}'] += readings#-{channel}
    return {mac_addr: max(readings) for mac_addr, readings in readings_by_mac_addr_and_channel.items() if len(readings) > 0 }

In [None]:
def create_digital_twin(Anchor_point_location_file, ground_truth_file_location, map_file_location):

    anchor_df = pd.read_csv(Anchor_point_location_file)
    anchor_df["x"] = anchor_df["x"].astype(int)
    anchor_df["y"] = anchor_df["y"].astype(int)
    
    # I ADD THIS TO Ensure MAC addresses are strings and zero-padded to length 12
    anchor_df['Mac'] = anchor_df['Mac'].astype(str).str.zfill(12)
    
    macLists = anchor_df['Mac'].to_list()

    ground_truth_df = pd.read_csv(ground_truth_file_location)
    ground_truth_df["Zone_id"] = ground_truth_df["Zone_id"].astype(str)
    
    #create a empty map with 0s for future calculation
    map_ = np.zeros((65,28))

    plt.figure(figsize=(12, 6))
    
    image = Image.open(map_file_location)
    
    plt.scatter(anchor_df.x,anchor_df.y, color='blue', s=50, edgecolors='black', label='Beacons', marker='o', alpha=0.6)

#     plt.scatter(ground_truth_df["x"], ground_truth_df["y"], color='red', s=20, label='Ground Truth', marker='^')
#     for i, label in enumerate(ground_truth_df['Room_name']):  
#         plt.text(ground_truth_df['x'][i], ground_truth_df['y'][i], label, fontsize=9, color='w', ha='right', va='bottom')
    plt.imshow(image, extent=[0, 65, 0, 28], aspect='auto')

    plt.xlim(0, 65)
    plt.ylim(0, 28)

    plt.grid(True)
    plt.xticks([i for i in range(0, 65, 5)])
    plt.yticks([i for i in range(0, 28, 4)])
    plt.xlabel('x', fontsize=14)
    plt.ylabel('y', fontsize=14)
    plt.title("Beacon distribution in meters")
    plt.legend()
    plt.savefig('beacon_map_cognosos.png')

    return anchor_df, ground_truth_df, map_

In [None]:
beacon_file = 'ground_truth/Beacon_map_cognosos_flr3.csv'
ground_truth_file = "ground_truth/Ground_truth_Mar25.csv"
map_file = 'ground_truth/Cognosos_view.png'

anchor_point_df, ground_truth_df, map_ = create_digital_twin(beacon_file, ground_truth_file, map_file)

In [None]:
# anchor_point_df

In [None]:
def filter_valid_features(row, df1):

    valid_features = {}
    
    for mac in df1['Mac']:
       
        if mac in row.index and isinstance(row[mac], (int, float)) and row[mac] != -100:
            valid_features[mac] = row[mac]
    
    return valid_features

def convert_coordinates(coord_str):
    if isinstance(coord_str, str):
       
        try:
            coord_str = coord_str.strip("[]")
            elements = coord_str.split()
            return [float(elem) for elem in elements] 
        except ValueError:
            pass  

        try:
            coord_str = coord_str.replace(" ", ",")
            coord_str = coord_str.replace(",,", ",")
            coord_str = coord_str.strip(',')
            return ast.literal_eval(coord_str)
        except (ValueError, SyntaxError) as e:
            print(f"Error processing coordinate string: {coord_str}")
            return None
    else:
        return coord_str

In [None]:
def plot_predicted_all(result, ground_truth_df, map_file_location, output_file="compare_plot_MLE_NLOS.png"):
    results = []
    total_points_mle = 0
    total_points_Optimisation = 0
    total_points_fuse = 0

    total_inside_mle = 0
    total_inside_Optimisation = 0
    total_inside_fuse = 0

    merged_df = pd.merge(result, ground_truth_df, on=["Zone_id", "Room_name"], how="left")
    unique_rooms = merged_df['Room_name'].unique()

    n_rows = math.ceil(len(unique_rooms) / 2)
    fig, axes = plt.subplots(n_rows, 2, figsize=(14, 3 * n_rows))
    axes = axes.flatten()

    for i, room_name in enumerate(unique_rooms):
        room_data = merged_df[merged_df['Room_name'] == room_name]
        has_fused = 'Predicted_NLOS' in room_data.columns

        zone = room_data["Zone_id"].iloc[0]
        room_type = room_data["Room_Type"].iloc[0]
        room_box = room_data.iloc[0]

        x_coords = [room_box.get(f'x{i+1}', None) for i in range(8) if pd.notnull(room_box.get(f'x{i+1}', None))]
        y_coords = [room_box.get(f'y{i+1}', None) for i in range(8) if pd.notnull(room_box.get(f'y{i+1}', None))]

        coordinates = list(zip(x_coords, y_coords))
        polygon = Polygon(coordinates)

        if not polygon.is_valid:
            print(f"Invalid polygon for '{room_name}', attempting to fix with buffer(0).")
            polygon = polygon.buffer(0)

        # Parse MLE predictions
        x_pred_mle, y_pred_mle = [], []
        for coord in room_data["Predicted_MLE"]:
            try:
                coord = ast.literal_eval(coord) if isinstance(coord, str) else coord
                x_pred_mle.append(float(coord[0]))
                y_pred_mle.append(float(coord[1]))
            except:
                print(f"Invalid MLE coord in '{room_name}': {coord}")

        # Parse Optimisation predictions
        x_pred_Optimisation, y_pred_Optimisation = [], []
        for coord in room_data["Predicted_Optimisation"]:
            try:
                coord = ast.literal_eval(coord) if isinstance(coord, str) else coord
                x_pred_Optimisation.append(float(coord[0]))
                y_pred_Optimisation.append(float(coord[1]))
            except:
                print(f"Invalid Optimisation coord in '{room_name}': {coord}")

        # Parse Fused predictions only if available
        x_pred_fuse, y_pred_fuse = [], []
        if has_fused:
            for coord in room_data["Predicted_NLOS"]:
                try:
                    coord = ast.literal_eval(coord) if isinstance(coord, str) else coord
                    x_pred_fuse.append(float(coord[0]))
                    y_pred_fuse.append(float(coord[1]))
                except:
                    print(f"Invalid NLOS coord in '{room_name}': {coord}")

        inside_count_mle = sum(1 for x, y in zip(x_pred_mle, y_pred_mle) if Point(x, y).within(polygon))
        inside_count_Optimisation = sum(1 for x, y in zip(x_pred_Optimisation, y_pred_Optimisation) if Point(x, y).within(polygon))
        inside_count_fuse = sum(1 for x, y in zip(x_pred_fuse, y_pred_fuse) if Point(x, y).within(polygon)) if has_fused else 0

        total_points_mle += len(x_pred_mle)
        total_inside_mle += inside_count_mle

        total_points_Optimisation += len(x_pred_Optimisation)
        total_inside_Optimisation += inside_count_Optimisation

        if has_fused:
            total_points_fuse += len(x_pred_fuse)
            total_inside_fuse += inside_count_fuse

        percentage_inside_mle = (inside_count_mle / len(x_pred_mle)) * 100 if x_pred_mle else 0
        percentage_inside_Optimisation = (inside_count_Optimisation / len(x_pred_Optimisation)) * 100 if x_pred_Optimisation else 0
        percentage_inside_fuse = (inside_count_fuse / len(x_pred_fuse)) * 100 if has_fused and x_pred_fuse else 0

        results.append({
            "Zone_id": zone,
            'Room_name': room_name,
            "Room_Type": room_type,
            'MLE_Accuracy': percentage_inside_mle,
            'Optimisation_Accuracy': percentage_inside_Optimisation,
            'NLOS_Accuracy': percentage_inside_fuse if has_fused else None,
            'MLE_Inside_Points': inside_count_mle,
            'Optimisation_Inside_Points': inside_count_Optimisation,
            'NLOS_Inside_Points': inside_count_fuse if has_fused else None,
            'Total_Points': len(x_pred_mle),
        })

        # Plot
        ax = axes[i]
        image = mpimg.imread(map_file_location)
        ax.imshow(image, extent=[0, 65, 0, 28], aspect='auto')
        ax.plot(x_coords + [x_coords[0]], y_coords + [y_coords[0]], 'r-', label='Room Boundary')
        ax.scatter(x_pred_mle, y_pred_mle, color='blue', s=8, label='MLE')
        ax.scatter(x_pred_Optimisation, y_pred_Optimisation, color='green', s=8, label='Optimisation')
        if has_fused:
            ax.scatter(x_pred_fuse, y_pred_fuse, color='red', s=8, label='NLOS')

        ax.set_xlim([0, 65])
        ax.set_ylim([0, 28])
        ax.set_xlabel("X Coordinate")
        ax.set_ylabel("Y Coordinate")
        title_str = f"{room_name} - MLE: {percentage_inside_mle:.1f}%, Optimisation: {percentage_inside_Optimisation:.1f}%"
        if has_fused:
            title_str += f", NLOS: {percentage_inside_fuse:.1f}%"
        ax.set_title(title_str)
        ax.legend(loc='lower left', bbox_to_anchor=(0, 0), ncol=2)

    for j in range(i + 1, len(axes)):
        fig.delaxes(axes[j])

    overall_mle_accuracy = (total_inside_mle / total_points_mle) * 100 if total_points_mle > 0 else 0
    overall_Optimisation_accuracy = (total_inside_Optimisation / total_points_Optimisation) * 100 if total_points_Optimisation > 0 else 0
    overall_fuse_accuracy = (total_inside_fuse / total_points_fuse) * 100 if total_points_fuse > 0 else 0

    print(f"\nOverall MLE Accuracy: {overall_mle_accuracy:.2f}%")
    print(f"Overall Optimisation Accuracy: {overall_Optimisation_accuracy:.2f}%")
    if total_points_fuse > 0:
        print(f"Overall NLOS Accuracy: {overall_fuse_accuracy:.2f}%")

    accuracy_df = pd.DataFrame(results)
    plt.tight_layout()
    plt.savefig(output_file, format="png")
    plt.show()

    return accuracy_df

In [None]:
def read_MLE_data_survey_portal(df, ground_truth_df, anchor_point_df, export_unheard=False, export_path="unheard_anchor_points.csv"):

    # I ADD THIS TO Ensure MAC addresses are strings and zero-padded to length 12
    anchor_point_df['Mac'] = anchor_point_df['Mac'].astype(str).str.zfill(12)

    data_set_df = pd.DataFrame()
    merged_df = pd.merge(df, ground_truth_df, on=["Zone_id", 'Room_name'], how='inner').drop(['parent_zone_id'], axis=1)
    zones = df['Zone_id']
    heard_anchor_points = []

    for mac_addr in anchor_point_df['Mac']:
        if mac_addr in merged_df.columns:
            data_set_df[mac_addr] = merged_df[mac_addr]
            heard_anchor_points.append(mac_addr)

    heard_anchor_point_df = anchor_point_df[anchor_point_df['Mac'].isin(heard_anchor_points)].reset_index(drop=True)
    unheard_anchor_point_df = anchor_point_df[~anchor_point_df['Mac'].isin(heard_anchor_points)].reset_index(drop=True)

    heard_anchor_points_coord = heard_anchor_point_df[['x', 'y']].values

    data_set_df["Zone_id"] = merged_df["Zone_id"]
    data_set_df["Room_name"] = merged_df["Room_name"]
    data_set_df["tagId"] = merged_df["tagId"]
    data_set_df["timestamp"] = merged_df["timestamp"]
    if "channel" in merged_df.columns:
        data_set_df["channel"] = merged_df["channel"]
    
    data_set_df["x"] = merged_df["x"]
    data_set_df["y"] = merged_df["y"]

    return data_set_df, heard_anchor_points_coord, unheard_anchor_point_df

### Excluding zone outside the ofiice as I remove all beacon there

In [None]:
zones= ground_truth_df.Zone_id.unique().tolist()

In [None]:
filepath = "data/Asset/Original data/1_data_Walk_Around_Mar25.json"
filename = os.path.basename(filepath)

X1 = process_training(filepath)
df1 = pd.DataFrame(X1)
df1 = df1.fillna(MISSING_VALUE)  

float_cols = df1.select_dtypes(include=['float']).columns
df1[float_cols] = df1[float_cols].astype(np.int8)
df1['timestamp'] = pd.to_datetime(df1['timestamp'], utc=True, errors='coerce')
df1 = df1.sort_values(by='timestamp')

ordered_columns = ['timestamp', 'tagId', 'Zone_id', 'Room_name', "parent_zone_id"]

columns = [col for col in anchor_point_df.Mac.unique().tolist() if col not in ordered_columns]
new_column_order = columns + ordered_columns
df1 = df1.reindex(columns=new_column_order)

df1 = df1.reset_index(drop=True)
df1['Room_name'] = df1['Room_name'].str.split('-').str[-1].str.strip()

# Fix specific zone_id
df1.loc[df1['Zone_id'] == "30598", 'Zone_id'] = "30539"

# # Conditional exclusion based on filename
# if filename == "data_duress_access_Jun23.json":
#     rooms_to_exclude = ["Tech Office 2"]  # this room has issue with data as very low beacon signal, beacon count
#     df1 = df1[~df1['Room_name'].isin(rooms_to_exclude)]

# Beacon processing
beacon_cols = [col for col in df1.columns if str(col).startswith('0')]
df1 = df1.fillna(MISSING_VALUE)
df1['beacon_count'] = (df1[beacon_cols] != -100).sum(axis=1)
df1= df1[df1.Zone_id.isin(zones)]
print(df1.shape)

df1 = df1[df1['beacon_count'] >= 5]
print(df1.shape)


In [None]:
len(list(set(df1.columns.tolist()) & set(anchor_point_df.Mac)))

In [None]:
df1['beacon_count'].max(), df1['beacon_count'].min()

In [None]:
# df2=df1.copy()

In [None]:
# df = pd.concat([df1, df2], axis=0, ignore_index=True, sort=False)
# df.shape

In [None]:
# df1= df.copy()

In [None]:
# df1= df1[df1.Room_name != 'Tech Office 2']

In [None]:
df1.Room_name.nunique()

In [None]:
# df1.to_csv("data/Asset/2_data_asset_walk_around_combine_1_2.csv", index= False)

In [None]:
df1= pd.read_csv("data/Asset/2_data_asset_walk_around_combine_1_2.csv")
df1.tagId= df1.tagId.astype(str)
df1.Zone_id= df1.Zone_id.astype(str)
df1= df1[df1.Zone_id.isin(zones)]

ordered_columns = ['timestamp', 'tagId', 'Zone_id', 'Room_name', "parent_zone_id"]

columns = [col for col in anchor_point_df.Mac.unique().tolist() if col not in ordered_columns]
new_column_order = columns + ordered_columns
df1 = df1.reindex(columns=new_column_order)
df1 = df1.reset_index(drop=True)
df1['Room_name'] = df1['Room_name'].str.split('-').str[-1].str.strip()
beacon_cols = [col for col in df1.columns if str(col).startswith('0')]
df1 = df1.fillna(MISSING_VALUE)
df1['beacon_count'] = (df1[beacon_cols] != -100).sum(axis=1)
df1.shape

## REMOVE ALL ROWS < -90

### Read the data for walk around with a plastic box of tags

In [None]:
# df1= pd.read_csv("data/Asset/2_data_asset_walk_around_combine_1_2.csv")
# df1.Zone_id= df1.Zone_id.astype(str)
# df1.head(1)

In [None]:
df1.shape

In [None]:
beacon_cols = [col for col in df1.columns if str(col).startswith('0')]
rows_all_below_90 = df1[beacon_cols].lt(-99).all(axis=1)
df1 = df1[~rows_all_below_90]
df1.shape

In [None]:
df1.Room_name.nunique()

### Check if dataset have enoguh beacon heard >=-90, SELECT ONLY the number of strong features >=5

In [None]:
df2 = pd.merge(df1.drop(columns=["parent_zone_id", "beacon_count"]), \
                                ground_truth_df[['Zone_id','x', 'y']], on=["Zone_id"], how='left')
rssi_cols = [col for col in df2.columns if col.startswith('0')]

# Create a new column counting RSSIs >= -90
df2['num_strong_features'] = (df2[rssi_cols] >= -95).sum(axis=1)

df2.shape

In [None]:
data_set_df = df2[df2['num_strong_features'] >= 5].copy()

data_set_df=data_set_df.drop(columns='num_strong_features').reset_index(drop=True)
data_set_df.shape

In [None]:
# df2.iloc[[2885]].describe().T.sort_values(by="max", ascending= False)

# Take 10% sample

In [None]:
# data_set_df = data_set_df_all.groupby('Zone_id', group_keys=False).\
#         apply(lambda x: x.sample(frac=1, random_state=42))
# data_set_df.shape

# *** MAKE SURE THAT ALL BEACONS ARE BEING HEARD

In [None]:
## Check if any ionstalled beacon not heard by the tags
# unheard_anchor_point_df

In [None]:
data_set_df.Room_name.nunique()

In [None]:
anchor_point_df.shape

In [None]:
len(data_set_df.columns.intersection(anchor_point_df.Mac))

# Location AI

In [None]:
# filepath = "data/Asset/Original data/2_data_asset_acess_Jun23.json"

# X1 = process_training(filepath)
# df_asset = pd.DataFrame(X1)
# df_asset = df_asset.fillna(MISSING_VALUE)  

# float_cols = df_asset.select_dtypes(include=['float']).columns
# df_asset[float_cols] = df_asset[float_cols].astype(np.int8)
# df_asset['timestamp'] = pd.to_datetime(df_asset['timestamp'], utc=True, errors='coerce')
# df_asset = df_asset.sort_values(by='timestamp')

# ordered_columns = ['timestamp', 'tagId', 'Zone_id', 'Room_name', "parent_zone_id"]

# columns = [col for col in anchor_point_df.Mac.unique().tolist() if col not in ordered_columns]
# new_column_order = columns + ordered_columns
# df_asset = df_asset.reindex(columns=new_column_order)

# df_asset = df_asset.reset_index(drop=True)
# df_asset['Room_name'] = df_asset['Room_name'].str.split('-').str[-1].str.strip()

# # Fix specific zone_id
# df_asset.loc[df_asset['Zone_id'] == "30598", 'Zone_id'] = "30539"

# # # Conditional exclusion based on filename
# # if filename == "data_duress_access_Jun23.json":
# #     rooms_to_exclude = ["Tech Office 2"]  # this room has issue with data as very low beacon signal, beacon count
# #     df_asset = df_asset[~df_asset['Room_name'].isin(rooms_to_exclude)]

# # Beacon processing
# beacon_cols = [col for col in df_asset.columns if str(col).startswith('0')]
# df_asset = df_asset.fillna(MISSING_VALUE)
# df_asset['beacon_count'] = (df_asset[beacon_cols] != -100).sum(axis=1)
# df_asset= df_asset[df_asset.Zone_id.isin(zones)]
# print(df_asset.shape)

# df_asset = df_asset[df_asset['beacon_count'] >= 5]
# print(df_asset.shape)


In [None]:
df1.Room_name.nunique()

#### as the CEO data is not available for the trainign data, so I just take randomly from another data and append it into the trainign data

In [None]:
# columns_to_keep = df_asset.columns  

# filtered_df = df1[df1.Zone_id == "30519"]
# sampled_df = filtered_df.sample(n=150, random_state=42)

# # Keep only train columns
# sampled_df = sampled_df[columns_to_keep]

# # Append to training data
# df2 = pd.concat([df_asset, sampled_df], ignore_index=True)

In [None]:
# df2.columns

In [None]:
# df2.to_csv("data/Asset/Original data/2_data_asset_acess_Jun23_used_for_locAI_training.csv")

In [None]:
anchor_point_df.shape

## Took this data as for traning the LocAI

In [None]:
columns_to_drop= anchor_point_df[anchor_point_df.Remove=="remove"].Mac.tolist()
len(columns_to_drop)

In [None]:
# df2= pd.read_csv("data/Asset/Original data/2_data_asset_acess_Jun23_used_for_locAI_training.csv")
# df2['Zone_id']=df2['Zone_id'].astype(str)
# df2= df2[df1.columns]
# df2.shape

In [None]:
# df2=df1.copy()

In [None]:
df2= df1.drop(columns= columns_to_drop)
df2["parent_zone_id"]=df2["parent_zone_id"].astype(str)

train_data, test_data = train_test_split(df2, test_size=0.2, random_state=42, \
                                         stratify=df2["Zone_id"])

X_train = train_data[[col for col in train_data.columns if col.startswith("0")]]
y_train_floor = train_data['parent_zone_id'] 
y_train = train_data['Zone_id']

X_test = test_data[[col for col in train_data.columns if col.startswith("0")]] 
y_test_floor = test_data['parent_zone_id'] 
y_test = test_data['Zone_id'] 
df2.shape, df1.shape

In [None]:
selected_features, clf_rooms, clf_floor = train_variable(X_train, y_train_floor, y_train, save_models = False)

## Test against other dtaset

In [None]:
# df1= pd.read_csv("data/Asset/1_data_asset_tag_stay_still_combine_all.csv")
# df1.tagId= df1.tagId.astype(str)
# df1.Zone_id= df1.Zone_id.astype(str)
# df1= df1[df1.Zone_id.isin(zones)]

# ordered_columns = ['timestamp', 'tagId', 'Zone_id', 'Room_name', "parent_zone_id"]

# columns = [col for col in anchor_point_df.Mac.unique().tolist() if col not in ordered_columns]
# new_column_order = columns + ordered_columns
# df1 = df1.reindex(columns=new_column_order)
# df1 = df1.reset_index(drop=True)
# df1['Room_name'] = df1['Room_name'].str.split('-').str[-1].str.strip()
# beacon_cols = [col for col in df1.columns if str(col).startswith('0')]
# df1 = df1.fillna(MISSING_VALUE)
# df1['beacon_count'] = (df1[beacon_cols] != -100).sum(axis=1)
# df1.shape

# df_test= df1[train_data.columns].copy()
# X_test = df_test[[col for col in df_test.columns if col.startswith("0")]] 
# y_test_floor = df_test['parent_zone_id'] 
# y_test = df_test['Zone_id'] 

In [None]:
predicted_rooms, predicted_floors = predict_variable(X_test, clf_floor, clf_rooms, selected_features)

In [None]:
score = accuracy_score(y_test, predicted_rooms)
print('Room Accuracy: {:.2f}%'.format(score * 100))

In [None]:
result_d = test_data[["Room_name", 'tagId', 'Zone_id']]\
    .merge(ground_truth_df[["Zone_id", "Room_Type"]], on = "Zone_id", how="left")
result_d["Prediction"] = predicted_rooms
result_d["Accuracy"] = np.where(result_d.Zone_id == result_d.Prediction, 100, 0)
result_d.head(1)

In [None]:
result_rf = pd.DataFrame(result_d.groupby(['Zone_id', "Room_name"]).Accuracy.mean()).reset_index()
result_d.groupby("Room_Type").Accuracy.mean()

In [None]:
res= pd.DataFrame(result_d.groupby(['Zone_id', "Room_name", "Room_Type", "tagId"]).Accuracy.mean()).reset_index()
# res.sort_values(by="Accuracy")

In [None]:
palette = {"Room": "skyblue", "Open": "orange"}

# Ensure consistent types
res["Room_Type"] = res["Room_Type"].astype(str)

# 1. Sort by Zone_id FIRST
res_sorted = res.sort_values(by="Zone_id")

# 2. Create a sort key for Room_Type so "Room" comes before "Open"
type_priority = {"Room": 0, "Open": 1}

# 3. Build final room order sorted by:
#    (Zone_id, type_priority, Room_name)
res_sorted = res_sorted.sort_values(
    by=["Zone_id", "Room_Type", "Room_name"],
    key=lambda col: col.map(type_priority) if col.name == "Room_Type" else col
)

# 4. Create the ordered x-axis list
room_order = list(res_sorted["Room_name"].unique())

plt.figure(figsize=(12,6))

sns.boxplot(
    data=res_sorted,
    x="Room_name",
    y="Accuracy",
    order=room_order,
    hue="Room_Type",
    dodge=False,
    palette=palette
)

plt.xticks(rotation=90)
plt.ylabel("Accuracy (%)")
plt.xlabel("")
plt.title("LocationAI_Tag Accuracy Distribution per Room")
plt.legend(title="")
plt.tight_layout()
plt.grid(False)

In [None]:
filename

In [None]:
def compute_summary_accuracy(df):
    """
    df: DataFrame with columns
        Room_name, tagId, Zone_id, Room_Type, Prediction, Accuracy
    Returns a summary dataframe with:
        - Overall Accuracy
        - Accuracy per Room_Type
    """

    # Overall accuracy (mean of Accuracy column)
    overall_acc = df["Accuracy"].mean()

    # Accuracy per Room_Type
    room_type_acc = df.groupby("Room_Type")["Accuracy"].mean().reset_index()
    room_type_acc.rename(columns={"Accuracy": "Accuracy_by_Room_Type"}, inplace=True)

    # Combine into a single dataframe
    summary_df = pd.DataFrame({
        "Overall_Accuracy": [overall_acc]
    })

    # Merge room_type accuracy as separate columns
    for _, row in room_type_acc.iterrows():
        summary_df[row["Room_Type"] + "_Accuracy"] = row["Accuracy_by_Room_Type"]

    return summary_df


In [None]:
result_d_room= compute_summary_accuracy(result_d)

# NLOS: Fused of Opt and MLE: using MLE as initial for Opt

In [None]:
def euclidean_dist(point, points, height_diff=1.5):
    return np.sqrt(np.sum((points - point) ** 2, axis=1) + height_diff ** 2)

def generate_grid(center, resolution=5, radius=4):
    step = 1 / resolution
    x_vals = np.arange(center[0] - radius, center[0] + radius + step, step)
    y_vals = np.arange(center[1] - radius, center[1] + radius + step, step)
    xv, yv = np.meshgrid(x_vals, y_vals)
    return np.stack([xv.ravel(), yv.ravel()], axis=1)

def rssi_to_distance(rssi, A=-40, n=3.5, scale=0.8):
    return scale * np.exp((A - rssi) / (10 * n))

def filter_rssi(row, beacon_positions, rssi_threshold=-90):
    return {
        mac: row[mac]
        for mac in beacon_positions.keys()
        if mac in row.index and isinstance(row[mac], (int, float)) and row[mac] > rssi_threshold
    }

def localization_error(tag_position, beacons, distances):
    estimated_distances = np.linalg.norm(beacons - tag_position, axis=1)
    sigma = np.std(distances) + 1e-3
    weights = np.exp(- (distances ** 2) / (2 * sigma ** 2))
    return np.sum(weights * (estimated_distances - distances) ** 2)

def generate_expansion_area(initial_guess, std_dev=0.3, radius=0.5, num_points=50):
    """Compact expansion area around MLE"""
    num_gauss = int(num_points * 0.6)
    num_circle = num_points - num_gauss
    gauss_points = np.random.normal(0, std_dev, size=(num_gauss, 2)) + initial_guess
    r = radius * np.sqrt(np.random.uniform(0, 1, num_circle))
    theta = np.random.uniform(0, 2 * np.pi, num_circle)
    circ_points = np.column_stack((initial_guess[0] + r * np.cos(theta), initial_guess[1] + r * np.sin(theta)))
    return np.vstack((gauss_points, circ_points))

def compute_likelihood_weighted(grid_coords, anchor_coords, rssi_values, T, n, sigma_noise=4, anchor_weights=None):
    if anchor_weights is None:
        anchor_weights = np.ones_like(rssi_values)
    diff = grid_coords[:, None, :] - anchor_coords[None, :, :]
    dists = np.sqrt(np.sum(diff ** 2, axis=2) + 1.5**2)
    pred_rssi = T - 10 * n * np.log10(dists + 1e-5)
    residuals = pred_rssi - rssi_values
    weighted_residuals = (residuals / sigma_noise)**2 * anchor_weights
    likelihood = np.exp(-0.5 * weighted_residuals)
    return np.prod(likelihood, axis=1)

def find_mle_params(P_j, d_ij, init_guess=[-40, 3]):
    def squared_error(params, dists, rssi):
        T_i, n_p = params
        valid_mask = rssi != -100
        pred_rssi = T_i - 10 * n_p * np.log10(dists + 1e-5)
        return np.sum((pred_rssi[valid_mask] - rssi[valid_mask]) ** 2)
    bounds = [(-100, -30), (2, 6)]
    result = minimize(squared_error, init_guess, args=(d_ij, P_j),
                      method='L-BFGS-B', bounds=bounds)
    return result.x if result.success else init_guess

# -------------------------------
# Fused Localization (MLE → Opt → Refine)
# -------------------------------
def fused_localization_mle_opt(data_df, anchor_point_df,
                               sigma_noise=4, coarse_res=2, fine_res=5, fine_radius=3,
                               rssi_threshold=-95, strong_rssi_threshold=-75,
                               top_k_anchors=5, roi_margin=8, top_coarse_points=200, topN_ratio=0.05,
                               map_x_bounds=(0, 65), map_y_bounds=(0, 28),
                               epsilon=1e-12,
                               expansion_radius=0.8, expansion_points=100,
                               enable_refinement=True):
    
    results = []
    beacon_positions = anchor_point_df[["x","y","Mac"]].set_index("Mac")[["x","y"]].to_dict(orient="index")

    for idx, row in tqdm(data_df.iterrows(), total=len(data_df)):
        # ---------------------------
        # Extract RSSI
        # ---------------------------
        rssis = row.drop(['Zone_id','Room_name','x','y','tagId','timestamp'], errors='ignore').values.astype(float)
        anchor_coords = anchor_point_df[['x','y']].values

        # ---------------------------
        # MLE Estimation
        # ---------------------------
        mask_mle = rssis > rssi_threshold
        signal_strengths = rssis[mask_mle]
        dp_coords = anchor_coords[mask_mle]

        if len(signal_strengths) < 1:
            signal_strengths = rssis
            dp_coords = anchor_coords

        strong_mask = signal_strengths > strong_rssi_threshold
        if np.sum(strong_mask) < 2:
            dp_coords_selected = dp_coords
            signal_strengths_selected = signal_strengths
        else:
            dp_coords_selected = dp_coords[strong_mask]
            signal_strengths_selected = signal_strengths[strong_mask]

        min_rssi, max_rssi = np.min(signal_strengths_selected), np.max(signal_strengths_selected)
        anchor_weights = (signal_strengths_selected - min_rssi + 1) / (max_rssi - min_rssi + 1e-5)
        sorted_idx = np.argsort(-signal_strengths_selected)
        top_k = min(top_k_anchors, len(sorted_idx))
        top_coords = dp_coords_selected[sorted_idx[:top_k]]

        x_min, y_min = np.min(top_coords, axis=0)
        x_max, y_max = np.max(top_coords, axis=0)
        x_min = max(x_min - roi_margin, map_x_bounds[0])
        x_max = min(x_max + roi_margin, map_x_bounds[1])
        y_min = max(y_min - roi_margin, map_y_bounds[0])
        y_max = min(y_max + roi_margin, map_y_bounds[1])

        coarse_grid = np.stack(np.meshgrid(np.arange(x_min, x_max, 1/coarse_res),
                                           np.arange(y_min, y_max, 1/coarse_res)), axis=-1).reshape(-1,2)

        strongest_coord = top_coords[0]
        dists_for_fit = euclidean_dist(strongest_coord, dp_coords_selected)
        T_global, n_global = find_mle_params(signal_strengths_selected, dists_for_fit)

        coarse_likelihoods = compute_likelihood_weighted(
            coarse_grid, dp_coords_selected, signal_strengths_selected,
            T_global, n_global, sigma_noise, anchor_weights
        )

        top_indices = np.argpartition(coarse_likelihoods, -top_coarse_points)[-top_coarse_points:]
        top_candidates = coarse_grid[top_indices]

        fine_candidates, fine_likelihoods = [], []
        for center in top_candidates:
            fine_grid = generate_grid(center, resolution=fine_res, radius=fine_radius)
            likelihoods_fine = compute_likelihood_weighted(
                fine_grid, dp_coords_selected, signal_strengths_selected,
                T_global, n_global, sigma_noise, anchor_weights
            )
            fine_candidates.append(fine_grid)
            fine_likelihoods.append(likelihoods_fine)

        fine_candidates = np.vstack(fine_candidates)
        fine_likelihoods = np.hstack(fine_likelihoods)
        fine_likelihoods += epsilon
        fine_likelihoods /= np.sum(fine_likelihoods)

        N = max(1, min(100, int(topN_ratio * len(fine_candidates))))
        top_idx = np.argpartition(fine_likelihoods, -N)[-N:]
        top_points = fine_candidates[top_idx]
        top_weights = fine_likelihoods[top_idx]
        top_weights /= np.sum(top_weights)
        pred_mle = np.average(top_points, axis=0, weights=top_weights)
        conf_mle = np.max(top_weights)

        # ---------------------------
        # Optimization around MLE
        # ---------------------------
        rssi_values_opt = dict(sorted(filter_rssi(row, beacon_positions, rssi_threshold).items(), key=lambda x: x[1], reverse=True))
        if len(rssi_values_opt) < 3:
            beacon_coords_opt = anchor_coords
            distances_opt = np.ones(anchor_coords.shape[0])
        else:
            beacon_coords_opt = np.array([list(beacon_positions[b].values()) for b in rssi_values_opt.keys()])
            distances_opt = np.array([rssi_to_distance(rssi) for rssi in rssi_values_opt.values()])

        expansion_area = generate_expansion_area(pred_mle, radius=expansion_radius, num_points=expansion_points)
        quick_errors = np.array([localization_error(p, beacon_coords_opt, distances_opt) for p in expansion_area])
        filtered_expansion_area = expansion_area[np.argsort(quick_errors)[:10]]  # top few

        best_err, best_pos = float("inf"), None
        for point in filtered_expansion_area:
            res = minimize(localization_error, point, args=(beacon_coords_opt, distances_opt),
                           method='L-BFGS-B', options={'maxiter':100})
            if res.success:
                est_pos = res.x
                total_err = np.sum(np.linalg.norm(beacon_coords_opt - est_pos, axis=1))
                if total_err < best_err:
                    best_err = total_err
                    best_pos = est_pos
        pred_opt = best_pos
        conf_opt = 1 / (1 + best_err)

        # ---------------------------
        # Optional refinement
        # ---------------------------
        pre_refined_pos = pred_opt.copy()
        if enable_refinement:
            strong_rssi_indices = [i for i, rssi in enumerate(rssi_values_opt.values()) if rssi > -75]
            if len(strong_rssi_indices) >= 3:
                filtered_coords = beacon_coords_opt[strong_rssi_indices]
                filtered_distances = distances_opt[strong_rssi_indices]
                result = minimize(localization_error, pred_opt,
                                  args=(filtered_coords, filtered_distances),
                                  method='L-BFGS-B',
                                  options={'maxiter':100, 'gtol':1e-8, 'disp':False})
                if result.success:
                    pred_opt = result.x

        refinement_shift = np.linalg.norm(pred_opt - pre_refined_pos)

        # ---------------------------
        # Fused Results
        # ---------------------------
        alpha_dynamic = conf_mle / (conf_mle + conf_opt)
        pred_fused_fixed = 0.5 * pred_mle + 0.5 * pred_opt
        pred_fused_dynamic = alpha_dynamic * pred_mle + (1-alpha_dynamic) * pred_opt

        results.append({
            'original_index': idx,
            'Zone_id': row.get('Zone_id', np.nan),
            'Room_name': row.get('Room_name', np.nan),
            'Tag_id': row.get('tagId', np.nan),
            'timestamp': row.get('timestamp', np.nan),
            'Predicted_MLE': pred_mle,
            'Predicted_Optimisation': pred_opt,
            'Predicted_NLOS': pred_fused_fixed,
            'Predicted_NLOS_Dynamic': pred_fused_dynamic,
            'MLE_Confidence': conf_mle,
            'Opt_Confidence': conf_opt,
            'Ground_Truth': np.array([row['x'], row['y']])
        })

    return pd.DataFrame(results)


In [None]:
# filename= "1_data_asset_tag_stay_still_combine_3_4.json"
filename = "2_data_asset_walk_around_combine_1_2.json"
filename

In [None]:
start_time = time.perf_counter() 

result= fused_localization_mle_opt(data_set_df, anchor_point_df)

save_folder = "Result_Asset"
save_name = f"{filename.replace('.json', '_NLOS.csv')}" 
save_path = os.path.join(save_folder, save_name)

result.to_csv(save_path, index=False)


end_time = time.perf_counter() 

total_time = end_time - start_time
avg_time_per_row = total_time / len(data_set_df)
print(avg_time_per_row)

In [None]:
result= pd.read_csv("Result_Asset/2_data_asset_walk_around_combine_1_2_Hybrid.csv")
result['Predicted_MLE'] = result['Predicted_MLE'].apply(convert_coordinates)
result['Ground_Truth'] = result['Ground_Truth'].apply(convert_coordinates)


result["Predicted_Optimisation"]= result['Predicted_Optimisation'].apply(convert_coordinates)
result["Predicted_NLOS"]= result['Predicted_NLOS'].apply(convert_coordinates)
result["Predicted_RF_Hybrid"]= result['Predicted_RF_Hybrid'].apply(convert_coordinates)

result.Zone_id= result.Zone_id.astype(str)
result.Tag_id= result.Tag_id.astype(str)
result= result[result.Room_name !='Womens Restroom']
result.head(1)

In [None]:
def plot_predicted_fused_dynamic(result_df, ground_truth_df, map_file_location,
                                 fused_cols=['Predicted_NLOS_Dynamic', 'Predicted_NLOS'],
                                 output_file="compare_plot_MLE_Optim_Fused.png"):
    """
    Plot predicted locations from MLE, Optimisation, and fused methods, showing inside-room accuracy.
    Computes overall accuracy and returns per-room statistics including inside-point counts and total points.
    Also provides room-type aggregated accuracy.
    """
    results = []

    merged_df = pd.merge(result_df, ground_truth_df, on=["Zone_id", "Room_name"], how="left")
    unique_rooms = merged_df['Room_name'].unique()

    n_rows = math.ceil(len(unique_rooms) / 2)
    fig, axes = plt.subplots(n_rows, 2, figsize=(14, 3 * n_rows))
    axes = axes.flatten()

    total_inside = {"MLE": 0, "Optimisation": 0}
    total_inside.update({col: 0 for col in fused_cols})
    total_points = 0  # only one total points count

    for i, room_name in enumerate(unique_rooms):
        room_data = merged_df[merged_df['Room_name'] == room_name]
        zone = room_data["Zone_id"].iloc[0]
        room_type = room_data["Room_Type"].iloc[0]
        room_box = room_data.iloc[0]

        # Room polygon
        x_coords = [room_box.get(f'x{i+1}', None) for i in range(8) if pd.notnull(room_box.get(f'x{i+1}', None))]
        y_coords = [room_box.get(f'y{i+1}', None) for i in range(8) if pd.notnull(room_box.get(f'y{i+1}', None))]
        coordinates = list(zip(x_coords, y_coords))
        polygon = Polygon(coordinates)
        if not polygon.is_valid:
            polygon = polygon.buffer(0)

        # Helper function to parse coordinates
        def parse_coords(col_name):
            x_list, y_list = [], []
            for coord in room_data[col_name]:
                try:
                    coord = ast.literal_eval(coord) if isinstance(coord, str) else coord
                    x_list.append(float(coord[0]))
                    y_list.append(float(coord[1]))
                except:
                    pass
            return x_list, y_list

        # Predictions
        x_mle, y_mle = parse_coords("Predicted_MLE")
        x_opt, y_opt = parse_coords("Predicted_Optimisation")
        fused_data = {col: parse_coords(col) for col in fused_cols if col in room_data.columns}

        # Count points inside polygon
        def count_inside(x_list, y_list):
            return sum(1 for x, y in zip(x_list, y_list) if Point(x, y).within(polygon))

        inside_mle = count_inside(x_mle, y_mle)
        inside_opt = count_inside(x_opt, y_opt)
        inside_fused = {k: count_inside(*v) for k, v in fused_data.items()}

        # Update totals
        total_inside["MLE"] += inside_mle
        total_inside["Optimisation"] += inside_opt
        for k, v in fused_data.items():
            total_inside[k] += inside_fused[k]
        total_points += len(x_mle)  # same for all methods

        # Save per-room results
        results.append({
            "Zone_id": zone,
            "Room_name": room_name,
            "Room_Type": room_type,
            "MLE_Accuracy": inside_mle / max(len(x_mle), 1) * 100,
            "Optimisation_Accuracy": inside_opt / max(len(x_opt), 1) * 100,
            **{f"{k}_Accuracy": inside_fused[k] / max(len(fused_data[k][0]), 1) * 100 for k in fused_data},
            "MLE_Inside_Points": inside_mle,
            "Optimisation_Inside_Points": inside_opt,
            **{f"{k}_Inside_Points": inside_fused[k] for k in fused_data},
            "Total_Points": len(x_mle)
        })

        # Plotting
        ax = axes[i]
        image = mpimg.imread(map_file_location)
        ax.imshow(image, extent=[0, 65, 0, 28], aspect='auto')
        ax.plot(x_coords + [x_coords[0]], y_coords + [y_coords[0]], 'r-', label='Room Boundary')
        ax.scatter(x_mle, y_mle, color='blue', s=8, label='MLE')
        ax.scatter(x_opt, y_opt, color='green', s=8, label='Optimisation')
        colors = ['orange', 'purple', 'red', 'cyan']
        for j, (fcol, (x_f, y_f)) in enumerate(fused_data.items()):
            ax.scatter(x_f, y_f, color=colors[j % len(colors)], s=8, label=f"{fcol}")

        ax.set_xlim([0, 65])
        ax.set_ylim([0, 28])
        
        # Build title using percentage accuracy instead of counts
        title_str = (
            f"{room_name} - "
            f"MLE: {inside_mle / max(len(x_mle), 1) * 100:.1f}%, "
            f"Opt: {inside_opt / max(len(x_opt), 1) * 100:.1f}%"
        )

        for fcol, (x_f, y_f) in fused_data.items():
            acc = inside_fused[fcol] / max(len(x_f), 1) * 100
            clean_name = fcol.replace("Predicted_", "")  # <--- removes the prefix
            title_str += f", {clean_name}: {acc:.1f}%"


            
        ax.set_title(title_str)
        ax.legend(loc='lower left', bbox_to_anchor=(0, 0), ncol=2)

    for j in range(i + 1, len(axes)):
        fig.delaxes(axes[j])

    accuracy_df = pd.DataFrame(results)

    # --- Overall accuracy ---
    print("\n=== Overall Accuracy ===")
    for method in total_inside.keys():
        overall = total_inside[method] / max(total_points, 1) * 100
        print(f"{method}: {overall:.2f}%")

    # --- Room-type aggregated accuracy ---
    room_type_stats = accuracy_df.groupby('Room_Type').agg({
        'MLE_Inside_Points': 'sum',
        'Optimisation_Inside_Points': 'sum',
        **{f"{col}_Inside_Points": 'sum' for col in fused_cols},
        'Total_Points': 'sum'
    })

    for method in ['MLE', 'Optimisation'] + fused_cols:
        room_type_stats[f"{method}_Accuracy"] = room_type_stats[f"{method}_Inside_Points"] / room_type_stats['Total_Points'] * 100

    plt.tight_layout()
    plt.savefig(output_file, format="png")
    plt.show()

    return accuracy_df


In [None]:
from matplotlib.patches import Patch
from shapely.geometry import Polygon

accuracy_df = plot_predicted_fused_dynamic(
    result_df=result,
    ground_truth_df=ground_truth_df,
    map_file_location= map_file,
    fused_cols=['Predicted_RF_Hybrid', 'Predicted_NLOS'],
#     output_file="compare_fused_results.png"
)

In [None]:
# Group by Room_Type and compute weighted (point-based) accuracy
weighted_grouped = (
    accuracy_df
    .groupby("Room_Type")
    .apply(lambda g: pd.Series({
        "MLE_Accuracy": (g["MLE_Inside_Points"].sum() / g["Total_Points"].sum()) * 100,
        "Optimisation_Accuracy": (g["Optimisation_Inside_Points"].sum() / g["Total_Points"].sum()) * 100,
        "NLOS_Accuracy": (g["Predicted_NLOS_Inside_Points"].sum() / g["Total_Points"].sum()) * 100
    }))
)

# Calculate overall accuracy (also weighted)
overall = pd.DataFrame([{
    "MLE_Accuracy": (accuracy_df["MLE_Inside_Points"].sum() / accuracy_df["Total_Points"].sum()) * 100,
    "Optimisation_Accuracy": (accuracy_df["Optimisation_Inside_Points"].sum() / accuracy_df["Total_Points"].sum()) * 100,
    "NLOS_Accuracy": (accuracy_df["Predicted_NLOS_Inside_Points"].sum() / accuracy_df["Total_Points"].sum()) * 100
}], index=["Overall"])

# Combine results
summary_df = pd.concat([overall, weighted_grouped]).rename(index={"Open": "Open Space"})
summary_df.to_csv("Result_Asset/temp_result.csv", index= False)
summary_df

In [None]:
accuracy_df.groupby("Room_Type")["Room_Type"].count()

In [None]:
from matplotlib.patches import Patch

def plot_accuracy_per_room(
    accuracy_df,
    ground_truth_df,
    map_file_location,
    colors=("green", "blue", "purple"),
    labels=("MLE", "Optimisation", "Fused"),
    title_text="Room-wise Accuracy",
    output_file=None
):
    """
    Plot room-wise numeric accuracies for three models in different colors.
    First line: MLE/Optimisation (no spaces)
    Second line: Fused
    """

    # Merge with ground truth polygons
    merged_df = pd.merge(accuracy_df, ground_truth_df, on=["Zone_id", "Room_name"], how="left")

    fig, ax = plt.subplots(figsize=(16, 8))
    image = mpimg.imread(map_file_location)
    ax.imshow(image, extent=[0, 65, 0, 28], aspect='auto', zorder=0)

    for _, row in merged_df.iterrows():
        # Polygon coordinates
        x_coords = [row.get(f"x{i+1}", None) for i in range(8) if pd.notnull(row.get(f"x{i+1}", None))]
        y_coords = [row.get(f"y{i+1}", None) for i in range(8) if pd.notnull(row.get(f"y{i+1}", None))]
        if not x_coords or not y_coords:
            continue

        polygon = Polygon(list(zip(x_coords, y_coords)))
        if not polygon.is_valid:
            polygon = polygon.buffer(0)

        # Draw polygon outline
        ax.plot(x_coords + [x_coords[0]], y_coords + [y_coords[0]], 'k-', lw=1, zorder=2)

        # Accuracy values
        acc1 = int(row.get(f"{labels[0]}_Accuracy", 0))
        acc2 = int(row.get(f"{labels[1]}_Accuracy", 0))
        acc3 = int(row.get(f"{labels[2]}_Accuracy", 0))

        centroid = polygon.centroid

        # Line 1: MLE / Optimisation
        ax.text(
            centroid.x, centroid.y + 0.15,
            f"{acc1}/{acc2}",
            color="black", fontsize=10, ha="center", va="center", fontweight="bold"
        )

        # Individual colors
        ax.text(centroid.x - 0.55, centroid.y + 0.15, f"{acc1}", color=colors[0], fontsize=10,
                ha="center", va="center", zorder=4, fontweight="bold")
        ax.text(centroid.x, centroid.y + 0.15, "/", color="black", fontsize=10,
                ha="center", va="center", zorder=4)
        ax.text(centroid.x + 0.55, centroid.y + 0.15, f"{acc2}", color=colors[1], fontsize=10,
                ha="center", va="center", zorder=4, fontweight="bold")

        # Line 2: Fused
        ax.text(
            centroid.x, centroid.y - 0.45,
            f"{acc3}",
            color=colors[2], fontsize=10, ha="center", va="center", zorder=3, fontweight="bold"
        )

    # Automatically scale axes
    all_x = pd.concat([ground_truth_df[f"x{i+1}"] for i in range(8)], axis=0, ignore_index=True).dropna()
    all_y = pd.concat([ground_truth_df[f"y{i+1}"] for i in range(8)], axis=0, ignore_index=True).dropna()
    ax.set_xlim([all_x.min() - 1, all_x.max() + 1])
    ax.set_ylim([all_y.min() - 1, all_y.max() + 1])

    # ✅ Weighted overall accuracies
    total_points = accuracy_df["Total_Points"].sum()
    overall_acc = []
    for label in labels:
        inside_col = f"{label}_Inside_Points"
        if inside_col in accuracy_df:
            overall = (accuracy_df[inside_col].sum() / total_points) * 100
            overall_acc.append(overall)
        else:
            overall_acc.append(0)

    overall_text = " | ".join([f"{label}: {val:.1f}%" for label, val in zip(labels, overall_acc)])
    ax.set_title(f"{title_text}\nOverall Accuracy: {overall_text}", fontsize=15, fontweight="bold")

    # Legend
    legend_handles = [Patch(color=color, label=label) for color, label in zip(colors, labels)]
    ax.legend(handles=legend_handles, loc="lower left")

    ax.set_xlabel("X Coordinate")
    ax.set_ylabel("Y Coordinate")
    plt.tight_layout()

    if output_file:
        plt.savefig(output_file, dpi=150)


In [None]:
filename

In [None]:
plot_accuracy_per_room(
    accuracy_df=accuracy_df,
    ground_truth_df=ground_truth_df,
    map_file_location=map_file,
    colors=("green", "blue", "purple"),
    labels=("MLE", "Optimisation", "Predicted_NLOS"),
    title_text="Asset_Stationary Tag_Nov 19_Single Data Packet",
#     output_file="Result_Asset/Plot_data_asset_tag_stay_still_combine_3_4_NLOS_Single Data Packet.png"
)

In [None]:
import matplotlib.image as mpimg
from matplotlib.patches import Patch
from shapely.geometry import Polygon


def plot_accuracy_per_room_combined(
    accuracy_df,
    ground_truth_df,
    result_d,
    map_file_location,
    fused_color="purple",
    result_d_color="blue",
    title_text="Room-wise Accuracy",
    output_file=None
):
    """
    Plot room-wise accuracy:
      - Single line per room: LocationAI / NLOS (colored)
      - Title shows overall weighted accuracy for both
    """

    # Merge accuracy with polygons
    merged_df = pd.merge(
        accuracy_df,
        ground_truth_df,
        on=["Zone_id", "Room_name"],
        how="left"
    )

    # Merge result_d (LocationAI)
    merged_df = pd.merge(
        merged_df,
        result_d[["Room_name", "Accuracy"]],
        on="Room_name",
        how="left",
        suffixes=("", "_resultD")
    )

    # Load map image
    fig, ax = plt.subplots(figsize=(16, 8))
    image = mpimg.imread(map_file_location)
    ax.imshow(image, extent=[0, 65, 0, 28], aspect='auto', zorder=0)

    # ----- Draw polygons and text -----
    for _, row in merged_df.iterrows():

        x_coords = [row.get(f"x{i+1}") for i in range(8) if pd.notnull(row.get(f"x{i+1}"))]
        y_coords = [row.get(f"y{i+1}") for i in range(8) if pd.notnull(row.get(f"y{i+1}"))]
        if not x_coords or not y_coords:
            continue

        poly = Polygon(list(zip(x_coords, y_coords)))
        if not poly.is_valid:
            poly = poly.buffer(0)

        # Draw polygon outline
        ax.plot(x_coords + [x_coords[0]], y_coords + [y_coords[0]], 'k-', lw=1, zorder=2)

        centroid = poly.centroid

        # Get accuracies
        loc_acc = int(row.get("Accuracy", 0))                     # LocationAI (result_d)
        fused_acc = int(row.get("Predicted_NLOS_Accuracy", 0))    # Fused / NLOS

        # Draw colored numbers side by side
        ax.text(
            centroid.x - 0.5, centroid.y, f"{loc_acc}/ ",
            color=result_d_color, fontsize=11,
            ha="center", va="center", fontweight="bold", zorder=4
        )
        ax.text(
            centroid.x + 0.6, centroid.y, f" {fused_acc}",
            color=fused_color, fontsize=11,
            ha="center", va="center", fontweight="bold", zorder=4
        )

    # ----- Scale axes automatically -----
    all_x = pd.concat([ground_truth_df[f"x{i+1}"] for i in range(8)], axis=0).dropna()
    all_y = pd.concat([ground_truth_df[f"y{i+1}"] for i in range(8)], axis=0).dropna()
    ax.set_xlim([all_x.min() - 1, all_x.max() + 1])
    ax.set_ylim([all_y.min() - 1, all_y.max() + 1])

    # ----- Overall weighted accuracies -----
    total_points = accuracy_df["Total_Points"].sum()

    # Overall NLOS (fused)
    overall_fused = (accuracy_df["Predicted_NLOS_Accuracy"] * accuracy_df["Total_Points"]).sum() / total_points

    # Overall LocationAI (result_d), weighted by Total_Points from accuracy_df
    merged_for_overall = pd.merge(
        accuracy_df[["Room_name", "Total_Points"]],
        result_d[["Room_name", "Accuracy"]],
        on="Room_name", how="left"
    )
    overall_locai = (merged_for_overall["Accuracy"] * merged_for_overall["Total_Points"]).sum() / total_points

    ax.set_title(
        f"{title_text}\nOverall LocationAI: {overall_locai:.1f}% | Overall NLOS: {overall_fused:.1f}%",
        fontsize=15, fontweight="bold"
    )

    # ----- Legend -----
    legend_handles = [
        Patch(color=result_d_color, label="LocationAI"),
        Patch(color=fused_color, label="NLOS")
    ]
    ax.legend(handles=legend_handles, loc="lower left")

    ax.set_xlabel("X Coordinate")
    ax.set_ylabel("Y Coordinate")
    plt.tight_layout()

    if output_file:
        plt.savefig(output_file, dpi=150)

    plt.show()


In [None]:
plot_accuracy_per_room_combined(
    accuracy_df=accuracy_df,
    ground_truth_df=ground_truth_df,
    result_d= result_rf, 
    map_file_location=map_file,

    title_text="Asset_Stationary Tag_Nov 19_Single Data Packet",
#     output_file="Result_Asset/Plot_data_asset_tag_stay_still_combine_3_4_NLOS_Single Data Packet.png"
)

In [None]:
from shapely.geometry import Polygon, Point

def compute_accuracy_per_tag(result_df, ground_truth_df, fused_cols=['Predicted_NLOS_Dynamic', 'Predicted_NLOS']):

    results = []

    # Merge with ground truth polygons
    merged_df = pd.merge(result_df, ground_truth_df, on=["Zone_id", "Room_name"], how="left")

    for _, row in merged_df.iterrows():
        zone_id = row["Zone_id"]
        room_name = row["Room_name"]
        room_type = row.get("Room_Type")
        tag_id = row["Tag_id"]

        # Room polygon
        x_coords = [row.get(f'x{i+1}') for i in range(8) if pd.notnull(row.get(f'x{i+1}'))]
        y_coords = [row.get(f'y{i+1}') for i in range(8) if pd.notnull(row.get(f'y{i+1}'))]
        if not x_coords or not y_coords:
            continue

        polygon = Polygon(list(zip(x_coords, y_coords)))
        if not polygon.is_valid:
            polygon = polygon.buffer(0)

        # Helper to parse prediction coordinates
        def parse_coords(col):
            val = row.get(col)

            # Handle None, NaN, or empty
            if val is None:
                return [], []
            if isinstance(val, float) and pd.isna(val):
                return [], []
            if isinstance(val, (list, np.ndarray)) and len(val) == 0:
                return [], []

            try:
                # If string, evaluate to list
                if isinstance(val, str):
                    coords = ast.literal_eval(val)
                else:
                    coords = val

                # Ensure list of points
                if len(coords) == 0:
                    return [], []
                if isinstance(coords[0], (int, float)):
                    coords = [coords]

                x_list = [float(p[0]) for p in coords]
                y_list = [float(p[1]) for p in coords]
                return x_list, y_list
            except:
                return [], []


        # Compute inside-polygon accuracy
        def inside_accuracy(x_list, y_list):
            total = len(x_list)
            if total == 0:
                return 0
            count = sum(1 for x, y in zip(x_list, y_list) if Point(x, y).within(polygon))
            return count / total * 100

        # Fused methods
        fused_acc_dict = {}
        for col in fused_cols:
            x_list, y_list = parse_coords(col)
            fused_acc_dict[col] = inside_accuracy(x_list, y_list)

        results.append({
            "Zone_id": zone_id,
            "Room_name": room_name,
            "Room_Type": room_type,
            "tagId": tag_id,

            **{f"{col}_Accuracy": acc for col, acc in fused_acc_dict.items()}
        })

    return pd.DataFrame(results)


In [None]:
res_nlos = compute_accuracy_per_tag(result, ground_truth_df, fused_cols=['Predicted_NLOS'])
res_nlos= res_nlos.rename(columns={"Predicted_NLOS_Accuracy": "Accuracy"})
res_nlos = pd.DataFrame(res_nlos.groupby(["Zone_id", "Room_name", "tagId","Room_Type"])["Accuracy"].mean().sort_values()).reset_index()
res_nlos

In [None]:
palette = {"Room": "skyblue", "Open": "orange"}

# Ensure consistent types
res_nlos["Room_Type"] = res_nlos["Room_Type"].astype(str)

# 1. Sort by Zone_id FIRST
res_sorted = res_nlos.sort_values(by="Zone_id")

# 2. Create a sort key for Room_Type so "Room" comes before "Open"
type_priority = {"Room": 0, "Open": 1}

# 3. Build final room order sorted by:
#    (Zone_id, type_priority, Room_name)
res_sorted = res_sorted.sort_values(
    by=["Zone_id", "Room_Type", "Room_name"],
    key=lambda col: col.map(type_priority) if col.name == "Room_Type" else col
)

# 4. Create the ordered x-axis list
room_order = list(res_sorted["Room_name"].unique())

plt.figure(figsize=(12,6))

sns.boxplot(
    data=res_sorted,
    x="Room_name",
    y="Accuracy",
    order=room_order,
    hue="Room_Type",
    dodge=False,
    palette=palette
)

plt.xticks(rotation=90)
plt.ylabel("Accuracy (%)")
plt.xlabel("")
plt.title("NLOS_Tag Accuracy Distribution per Room")
plt.legend(title="")
plt.tight_layout()
plt.grid(False)

## C. Using 3 dp
Using 3 dp and apply the Centroid for location


In [None]:
from ast import literal_eval

def safe_eval(x):
    if isinstance(x, str):
        return literal_eval(x)
    return x

def compute_centroid(points):
    xs, ys = zip(*points)
    return [np.mean(xs), np.mean(ys)]

In [None]:
def compute_centroids_by_window(result, window_sizes=range(1, 11)):
    result = result.copy()

    # Safely evaluate string lists
    result['Predicted_MLE'] = result['Predicted_MLE'].apply(safe_eval)
    result['Ground_Truth'] = result['Ground_Truth'].apply(safe_eval)

    # Handle naming differences
    if 'Predicted_Optimisation' in result.columns:
        optimisation_col = 'Predicted_Optimisation'
    elif 'Predicted_Opt' in result.columns:
        optimisation_col = 'Predicted_Opt'
    else:
        raise KeyError("Neither 'Predicted_Optimisation' nor 'Predicted_Opt' found in DataFrame")

    result[optimisation_col] = result[optimisation_col].apply(safe_eval)

    # Optional fused predictions
    has_fused = 'Predicted_NLOS' in result.columns
    if has_fused:
        result['Predicted_NLOS'] = result['Predicted_NLOS'].apply(safe_eval)

    all_results = []

    group_cols = ['Zone_id', 'Room_name', 'Tag_id']

    for window_size in window_sizes:
        for group_keys, group in result.groupby(group_cols):
            group = group.sort_values('timestamp').reset_index(drop=True)
            n = len(group)

            for i in range(n):

                # ----------- CORRECTED WINDOW LOGIC ---------------
                if i < window_size:
                    # BEGINNING: grow window
                    start = 0
                    end = i + 1
                else:
                    # SLIDING WINDOW: always full windows
                    start = i - window_size + 1
                    end = i + 1
                # --------------------------------------------------

                window = group.iloc[start:end]

                # Extract points
                mle_points = list(window['Predicted_MLE'])
                optimisation_points = list(window[optimisation_col])
                ground_truth_points = list(window['Ground_Truth'])

                mle_centroid = compute_centroid(mle_points)
                optimisation_centroid = compute_centroid(optimisation_points)
                ground_truth_centroid = compute_centroid(ground_truth_points)

                result_row = {
                    'Zone_id': group_keys[0],
                    'Room_name': group_keys[1],
                    'tagId': group_keys[2],
                    'Window_Size': window_size,
                    'Predicted_MLE': mle_centroid,
                    'Predicted_Optimisation': optimisation_centroid,
                    'Ground_Truth': ground_truth_centroid
                }

                if has_fused:
                    fused_points = list(window['Predicted_NLOS'])
                    fused_centroid = compute_centroid(fused_points)
                    result_row['Predicted_NLOS'] = fused_centroid

                all_results.append(result_row)

    centroid_df = pd.DataFrame(all_results)
    return centroid_df[centroid_df['Room_name'] != 'Womens Restroom']



In [None]:
centroid_result = compute_centroids_by_window(result, window_sizes=range(3, 4))
centroid_result.head(1)

In [None]:
accuracy_df_centroid_3 = plot_predicted_all(centroid_result[centroid_result.Window_Size==3], \
                                            ground_truth_df, map_file)

In [None]:
# Group by Room_Type and compute weighted (point-based) accuracy
weighted_grouped = (
    accuracy_df_centroid_3
    .groupby("Room_Type")
    .apply(lambda g: pd.Series({
        "MLE_Accuracy": (g["MLE_Inside_Points"].sum() / g["Total_Points"].sum()) * 100,
        "Optimisation_Accuracy": (g["Optimisation_Inside_Points"].sum() / g["Total_Points"].sum()) * 100,
        "NLOS_Accuracy": (g["NLOS_Inside_Points"].sum() / g["Total_Points"].sum()) * 100
    }))
)

# Calculate overall accuracy (also weighted)
overall = pd.DataFrame([{
    "MLE_Accuracy": (accuracy_df_centroid_3["MLE_Inside_Points"].sum() / accuracy_df_centroid_3["Total_Points"].sum()) * 100,
    "Optimisation_Accuracy": (accuracy_df_centroid_3["Optimisation_Inside_Points"].sum() / accuracy_df_centroid_3["Total_Points"].sum()) * 100,
    "NLOS_Accuracy": (accuracy_df_centroid_3["NLOS_Inside_Points"].sum() / accuracy_df_centroid_3["Total_Points"].sum()) * 100
}], index=["Overall"])

# Combine results
summary_df = pd.concat([overall, weighted_grouped]).rename(index={"Open": "Open Space"})
summary_df.to_csv("Result_Asset/temp_result.csv", index= False)
summary_df

In [None]:
filename

In [None]:
def compute_accuracy_by_window(result, ground_truth_df, map_file_location):

    accuracy_summary = []

    centroid_df = compute_centroids_by_window(result, window_sizes=range(1, 11))
    
    # Add Room_Type to centroid_df by merging
    centroid_df = pd.merge(centroid_df, ground_truth_df[['Zone_id', 'Room_name', 'Room_Type']].drop_duplicates(),
                           on=['Zone_id', 'Room_name'], how='left')

    for w in sorted(centroid_df['Window_Size'].unique()):
        df_w = centroid_df[centroid_df['Window_Size'] == w]

        # Patch: ensure Room_Type is present
        df_w = pd.merge(
            df_w,
            ground_truth_df[['Zone_id', 'Room_name', 'Room_Type']].drop_duplicates(),
            on=['Zone_id', 'Room_name'],
            how='left'
        )

        acc_df = plot_predicted_all(
            result=df_w,
            ground_truth_df=ground_truth_df,
            map_file_location=map_file_location,
#             output_file=f"Result_Duress_Eric_Data_Jun25/temp_plot_ws_{w}.png"
        )

        # Overall accuracy
        mle_overall = acc_df['MLE_Accuracy'].mean()
        opt_overall = acc_df['Optimisation_Accuracy'].mean()
        fused_overall = acc_df['NLOS_Accuracy'].mean()

        grouped = acc_df.groupby("Room_Type")[["MLE_Accuracy", "Optimisation_Accuracy",\
                                              'NLOS_Accuracy']].mean()

        row = {
            "Window_Size": w,
            "MLE_Overall": mle_overall,
            "Optimisation_Overall": opt_overall,
            "NLOS_Overall": fused_overall,
        }

        for room_type in grouped.index:
            row[f"MLE_{room_type}"] = grouped.loc[room_type, "MLE_Accuracy"]
            row[f"Optimisation_{room_type}"] = grouped.loc[room_type, "Optimisation_Accuracy"]
            row[f"NLOS_{room_type}"] = grouped.loc[room_type, "NLOS_Accuracy"]

        accuracy_summary.append(row)

    return pd.DataFrame(accuracy_summary)

In [None]:
accuracy_vs_window = compute_accuracy_by_window(result, ground_truth_df, map_file)

# accuracy_vs_window

In [None]:
accuracy_vs_window[accuracy_vs_window.Window_Size==3]

In [None]:
plt.figure(figsize=(10, 6))

# Plot curves
plt.plot(accuracy_vs_window["Window_Size"], accuracy_vs_window["NLOS_Room"],
         marker='o', label='NLOS_Room', markersize= 4)
# plt.plot(accuracy_vs_window["Window_Size"], accuracy_vs_window["NLOS_Overall"],
#          marker='o', label='NLOS_Overall', markersize= 4)
plt.plot(accuracy_vs_window["Window_Size"], accuracy_vs_window["NLOS_Open"],
         marker='o', label='NLOS_Open', markersize= 4)

# Vertical line at x = 3
plt.axvline(x=3, linestyle='--')

# Find and annotate intersection values
for col in ["NLOS_Open", "NLOS_Room"]:
    y_val = np.interp(3, accuracy_vs_window["Window_Size"], accuracy_vs_window[col])
    plt.scatter(3, y_val, s=30)  # smaller dot
    plt.text(3.15, y_val, f"{y_val:.1f}%", va='center', fontsize=9)

# ----- Location AI points (smaller dots) -----
x_loc = 1

plt.scatter(x_loc, result_d_room["Room_Accuracy"].values[0],
            marker='D', s=40, label='LocAI_Room')
# plt.scatter(x_loc, result_d_room["Overall_Accuracy"].values[0],
#             marker='D', s=40, label='LocAI_Overall')
plt.scatter(x_loc, result_d_room["Open_Accuracy"].values[0],
            marker='D', s=40, label='LocAI_Open')

# ----- Annotate Location AI values -----
plt.text(x_loc + 0.1, result_d_room["Room_Accuracy"].values[0],
         f'{result_d_room["Room_Accuracy"].values[0]:.1f}%', va='center', fontsize=9)

# plt.text(x_loc + 0.1, result_d_room["Overall_Accuracy"].values[0],
#          f'{result_d_room["Overall_Accuracy"].values[0]:.1f}%', va='center', fontsize=9)

plt.text(x_loc + 0.1, result_d_room["Open_Accuracy"].values[0],
         f'{result_d_room["Open_Accuracy"].values[0]:.1f}%', va='center', fontsize=9)

# Formatting
plt.title('Asset_Normal Case_ NLOS Accuracy vs. # Data Packets')
plt.xlabel('# Scans')
plt.ylabel('Accuracy (%)')
plt.xlim(0, 10)
plt.ylim(70, 100)
plt.grid(True)
plt.legend(ncol=2, loc='upper center', bbox_to_anchor=(0.82, 0.15))
plt.tight_layout()
plt.savefig("figure.png", dpi=300, bbox_inches="tight")


In [None]:
filename

In [None]:
plt.figure(figsize=(10, 6))

# Correct column mapping for each method + metric
column_map = {
    "MLE": {
        "Room": "MLE_Room",
        "Open": "MLE_Open",
        "Overall": "MLE_Overall"
    },
    "Optimisation": {
        "Room": "Optimisation_Room",
        "Open": "Optimisation_Open",
        "Overall": "Optimisation_Overall"
    },
    "NLOS": {
        "Room": "NLOS_Room",
        "Open": "NLOS_Open",
        "Overall": "NLOS_Overall"
    }
}

# Style settings
methods = {
    'MLE': 'blue',
    'Optimisation': 'green',
    'NLOS': 'red'
}

metrics_style = {
    'Room': '-',     # solid
    'Open': ':',     # dotted
    'Overall': '-.'  # dash-dot
}

# ---- Plot 3 curves per method ----
for method, color in methods.items():
    for metric, linestyle in metrics_style.items():
        col = column_map[method][metric]
        
        plt.plot(
            accuracy_vs_window["Window_Size"],
            accuracy_vs_window[col],
            linestyle=linestyle,
            marker='o',
            color=color,
            label=f"{method} {metric}"
        )

# ---- Vertical line at x = 5 ----
plt.axvline(x=3, color='black', linestyle='--', label='Scan = 3')

# ---- Annotate intersections ----
for method, color in methods.items():
    for metric in metrics_style.keys():
        col = column_map[method][metric]
        
        # interpolated value at x = 5
        y_val = np.interp(3, accuracy_vs_window["Window_Size"], accuracy_vs_window[col])
        
        plt.scatter(3, y_val, color=color)
        plt.text(3.15, y_val, f"{y_val:.1f}%", va='center', color=color, fontsize=9)

plt.title('Stationary Tags — Asset Accuracy vs. # Data Packets')
plt.xlabel('# Data Packets')
plt.ylabel('Accuracy (%)')
plt.xlim(0, 10)
plt.ylim(80, 100)
plt.grid(True)

plt.tight_layout()
import matplotlib.lines as mlines

# Legend handles for line styles (Room / Open / Overall)
room_line = mlines.Line2D([], [], color='black', linestyle='-', label='Room (solid)')
open_line = mlines.Line2D([], [], color='black', linestyle=':', label='Open Space (dotted)')
overall_line = mlines.Line2D([], [], color='black', linestyle='-.', label='Overall (dash-dot)')

# Legend handles for colors (MLE / Optimisation / Fused)
mle_line = mlines.Line2D([], [], color='blue', linestyle='-', label='MLE')
opt_line = mlines.Line2D([], [], color='green', linestyle='-', label='Optimisation')
fused_line = mlines.Line2D([], [], color='red', linestyle='-', label='NLOS')

plt.legend(
    handles=[room_line, open_line, overall_line, mle_line, opt_line, fused_line],
    loc='lower right',
    fontsize=10,
    title=""
)

# Save figure
plt.savefig("Result_Asset/Plot_data_asset_tag_stay_still_combine_3_4_Fused_asset_accuracy_vs_packets.png", dpi=300)

plt.show()


In [None]:
from shapely.geometry import Point, Polygon

def compute_accuracy_per_room(centroid_df, ground_truth_df, fused_cols=['Predicted_NLOS']):
    
    merged_df = pd.merge(centroid_df, ground_truth_df, on=['Zone_id', 'Room_name'], how='left')
    
    all_results = []

    for _, row in merged_df.iterrows():
        # Room polygon
        x_coords = [row.get(f"x{i+1}") for i in range(8) if pd.notnull(row.get(f"x{i+1}"))]
        y_coords = [row.get(f"y{i+1}") for i in range(8) if pd.notnull(row.get(f"y{i+1}"))]
        polygon = Polygon(list(zip(x_coords, y_coords)))
        if not polygon.is_valid:
            polygon = polygon.buffer(0)

        # Count inside points
        def inside_count(point):
            return int(Point(point).within(polygon))

        mle_inside = inside_count(row['Predicted_MLE'])
        opt_inside = inside_count(row['Predicted_Optimisation'])
        fused_inside = {col: inside_count(row[col]) for col in fused_cols if col in row}

        total_points = 1  # each centroid counts as one point

        results_row = {
            'Zone_id': row['Zone_id'],
            'Room_name': row['Room_name'],
            'MLE_Inside_Points': mle_inside,
            'Optimisation_Inside_Points': opt_inside,
            'Total_Points': total_points,
            'MLE_Accuracy': mle_inside / total_points * 100,
            'Optimisation_Accuracy': opt_inside / total_points * 100,
#             'NLOS_Accuracy': fused_inside / total_points * 100
        }

        for col, val in fused_inside.items():
            results_row[f"{col}_Inside_Points"] = val
            results_row[f"{col}_Accuracy"] = val / total_points * 100

        all_results.append(results_row)

    return pd.DataFrame(all_results)


In [None]:
centroid_df= centroid_result[centroid_result.Window_Size==3]

accuracy_df_centroid = compute_accuracy_per_room(centroid_df, ground_truth_df)

# Average over all windows per room
accuracy_df_centroid = accuracy_df_centroid.groupby(['Zone_id', 'Room_name']).mean().reset_index()
accuracy_df_centroid.head()

In [None]:
import matplotlib.image as mpimg
from matplotlib.patches import Patch
from shapely.geometry import Polygon

def plot_accuracy_per_room_three_datasets(
    accuracy_df,        # original NLOS / fused
    accuracy_df1,       # combined 3-row dataset
    result_d,           # LocationAI
    ground_truth_df,
    map_file_location,
    colors=("purple", "red"),  # accuracy_df, accuracy_df1 colors
    result_d_color="blue",
    title_text="Room-wise Accuracy",
    output_file=None
):
    """
    Plot room-wise accuracy for 3 datasets:
    - Line 1: LocationAI / NLOS (accuracy_df)
    - Line 2: accuracy_df1 (third dataset)
    - Title shows overall weighted accuracy for all three
    """

    # Merge accuracy_df with ground truth polygons
    merged_df = pd.merge(
        accuracy_df,
        ground_truth_df,
        on=["Zone_id", "Room_name"],
        how="left"
    )

    # Merge result_d (LocationAI)
    merged_df = pd.merge(
        merged_df,
        result_d[["Room_name", "Accuracy"]],
        on="Room_name",
        how="left"
    )

    # Merge third dataset (accuracy_df1)
    merged_df = pd.merge(
        merged_df,
        accuracy_df1[["Zone_id", "Room_name", "Predicted_NLOS_Accuracy"]],
        on=["Zone_id", "Room_name"],
        how="left",
        suffixes=("", "_third")
    )

    # Load map
    fig, ax = plt.subplots(figsize=(16, 8))
    image = mpimg.imread(map_file_location)
    ax.imshow(image, extent=[0, 65, 0, 28], aspect='auto', zorder=0)

    # ----- Draw polygons and text -----
    for _, row in merged_df.iterrows():
        x_coords = [row.get(f"x{i+1}") for i in range(8) if pd.notnull(row.get(f"x{i+1}"))]
        y_coords = [row.get(f"y{i+1}") for i in range(8) if pd.notnull(row.get(f"y{i+1}"))]
        if not x_coords or not y_coords:
            continue

        poly = Polygon(list(zip(x_coords, y_coords)))
        if not poly.is_valid:
            poly = poly.buffer(0)

        ax.plot(x_coords + [x_coords[0]], y_coords + [y_coords[0]], 'k-', lw=1, zorder=2)
        centroid = poly.centroid

        # Extract accuracies
        loc_acc = int(row.get("Accuracy", 0))                     # LocationAI
        fused_acc = int(row.get("Predicted_NLOS_Accuracy", 0))    # NLOS / fused
        third_acc = int(row.get("Predicted_NLOS_Accuracy_third", 0))  # third dataset

        # Line 1: LocationAI / fused
        ax.text(
            centroid.x - 0.5, centroid.y, f"{loc_acc}/",
            color=result_d_color, fontsize=11,
            ha="center", va="center", fontweight="bold", zorder=4
        )
        ax.text(
            centroid.x + 0.8, centroid.y, f" {fused_acc}",
            color=colors[0], fontsize=11,
            ha="center", va="center", fontweight="bold", zorder=4
        )

        # Line 2: third dataset
        ax.text(
            centroid.x, centroid.y - 0.75, f"{third_acc}",
            color=colors[1], fontsize=11,
            ha="center", va="center", fontweight="bold", zorder=4
        )

    # ----- Scale axes -----
    all_x = pd.concat([ground_truth_df[f"x{i+1}"] for i in range(8)], axis=0).dropna()
    all_y = pd.concat([ground_truth_df[f"y{i+1}"] for i in range(8)], axis=0).dropna()
    ax.set_xlim([all_x.min() - 1, all_x.max() + 1])
    ax.set_ylim([all_y.min() - 1, all_y.max() + 1])

    # ----- Overall weighted accuracies -----
    total_points = accuracy_df["Total_Points"].sum()

    overall_locai = (
        pd.merge(accuracy_df[["Room_name", "Total_Points"]],
                 result_d[["Room_name", "Accuracy"]],
                 on="Room_name")
        .eval("Accuracy * Total_Points").sum()
    ) / total_points

    overall_fused = (accuracy_df["Predicted_NLOS_Accuracy"] * accuracy_df["Total_Points"]).sum() / total_points
    overall_third = (accuracy_df1.Predicted_NLOS_Accuracy.mean())

    ax.set_title(
        f"{title_text}_LocationAI: {overall_locai:.1f}% | "
        f"NLOS_Single_Data_Packet: {overall_fused:.1f}% | NLOS_3_Data_Packets: {overall_third:.1f}%",
        fontsize=15, fontweight="bold"
    )

    # ----- Legend -----
    legend_handles = [
        Patch(color=result_d_color, label="LocationAI"),
        Patch(color=colors[0], label="NLOS_single_data_packet"),
        Patch(color=colors[1], label="NLOS_3_data_packets")
    ]
    ax.legend(handles=legend_handles, loc="lower left")

    ax.set_xlabel("X Coordinate")
    ax.set_ylabel("Y Coordinate")
    plt.tight_layout()

    if output_file:
        plt.savefig(output_file, dpi=150)

    plt.show()


In [None]:
accuracy_df_centroid.head(1)

In [None]:
plot_accuracy_per_room_three_datasets(
    accuracy_df=accuracy_df,
    accuracy_df1=accuracy_df_centroid,
    ground_truth_df=ground_truth_df,
    result_d= result_rf, 
    map_file_location=map_file,

    title_text="Asset_Moving Tags",
#     output_file="Result_Asset/Plot_data_asset_tag_stay_still_combine_3_4_NLOS_Single Data Packet.png"
)

In [None]:
def compute_accuracy_by_window_with_tags(result, ground_truth_df, map_file_location):

    accuracy_summary = []

    centroid_df = compute_centroids_by_window(result, window_sizes=range(1, 11))
    
    # Add Room_Type to centroid_df by merging
    centroid_df = pd.merge(
        centroid_df,
        ground_truth_df[['Zone_id', 'Room_name', 'Room_Type']].drop_duplicates(),
        on=['Zone_id', 'Room_name'],
        how='left'
    )

    for w in sorted(centroid_df['Window_Size'].unique()):
        df_w = centroid_df[centroid_df['Window_Size'] == w]

        # Patch: ensure Room_Type is present
        df_w = pd.merge(
            df_w,
            ground_truth_df[['Zone_id', 'Room_name', 'Room_Type']].drop_duplicates(),
            on=['Zone_id', 'Room_name'],
            how='left'
        )

        # Group by tagId to get per-tag accuracy
        for tag_id, df_tag in df_w.groupby("tagId"):

            acc_df = plot_predicted_all(
                result=df_tag,
                ground_truth_df=ground_truth_df,
                map_file_location=map_file_location,
            )

            # Overall accuracy per tag
            mle_overall = acc_df['MLE_Accuracy'].mean()
            opt_overall = acc_df['Optimisation_Accuracy'].mean()
            NLOS_overall = acc_df['NLOS_Accuracy'].mean()

            # Room-type aggregated accuracy
            grouped = acc_df.groupby("Room_Type")[["MLE_Accuracy", "Optimisation_Accuracy", 'NLOS_Accuracy']].mean()

            row = {
                "Window_Size": w,
                "tagId": tag_id,              # <-- include tagId
                "MLE_Overall": mle_overall,
                "Optimisation_Overall": opt_overall,
                "NLOS_Overall": NLOS_overall,
            }

            for room_type in grouped.index:
                row[f"MLE_{room_type}"] = grouped.loc[room_type, "MLE_Accuracy"]
                row[f"Optimisation_{room_type}"] = grouped.loc[room_type, "Optimisation_Accuracy"]
                row[f"NLOS_{room_type}"] = grouped.loc[room_type, "NLOS_Accuracy"]

            accuracy_summary.append(row)

    return pd.DataFrame(accuracy_summary)


In [None]:
accuracy_vs_window_tag = compute_accuracy_by_window_with_tags(result, ground_truth_df, map_file)

accuracy_vs_window_tag

In [None]:
def plot_accuracy_by_tag(accuracy_vs_window_tag, col= ['NLOS_Room']):
    """
    Plot accuracy vs. Window_Size for each tag separately.

    Parameters:
        accuracy_vs_window_tag : pd.DataFrame
            Must include columns 'tagId', 'Window_Size', 
            'NLOS_Overall', 'NLOS_Open', 'NLOS_Room'
    """

    # Ensure tagId is string
    accuracy_vs_window_tag['tagId'] = accuracy_vs_window_tag['tagId'].astype(str)

    numeric_cols = col
#     numeric_cols = ['NLOS_Overall', 'NLOS_Open', 'NLOS_Room']

    plt.figure(figsize=(14, 8))
    markers = ['o', 's', '^']  # for Overall, Open, Room
    linestyles = ['-', '--', ':']

    for tag in accuracy_vs_window_tag['tagId'].unique():
        tag_df = accuracy_vs_window_tag[accuracy_vs_window_tag['tagId'] == tag].sort_values('Window_Size')
        
        for col, marker, ls in zip(numeric_cols, markers, linestyles):
            plt.plot(tag_df['Window_Size'], tag_df[col], marker=marker, linestyle=ls,
                     label=f'Tag {tag} {col.replace("_", " ")}')

            # Optional: annotate value at window_size = 5 if exists
            if 5 in tag_df['Window_Size'].values:
                y_val = tag_df.loc[tag_df['Window_Size'] == 3, col].values[0]
                plt.scatter(3, y_val, color='black', marker=marker)
                plt.text(3.2, y_val, f"{y_val:.1f}%", fontsize=8)

    plt.axvline(x=3, color='red', linestyle='--')
    plt.xlabel('# Data Packets')
    plt.ylabel('Accuracy (%)')
    plt.title('Accuracy vs # Data Packets per Tag')
    plt.grid(True, linestyle='--', alpha=0.6)
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize=9)
    plt.tight_layout()
    plt.show()


In [None]:
accuracy_vs_window_tag.head(1)

In [None]:
plot_accuracy_by_tag(accuracy_vs_window_tag, col= ['NLOS_Room'])

In [None]:
data_set_df.tagId.unique()

In [None]:
def plot_rssi_box_each_zone_grid(df, top_n=5):

    # Detect RSSI feature columns (start with digit)
    rssi_cols = [c for c in df.columns if c[0].isdigit()]
    df['tagId'] = df['tagId'].astype(str)

    unique_tags = df['tagId'].unique()

    # Consistent colors for tags
    cmap = plt.cm.get_cmap('tab20', len(unique_tags))
    tag_colors = {tag: cmap(i) for i, tag in enumerate(unique_tags)}

    # All zones
    zones = sorted(df["Room_name"].unique())
    n_zones = len(zones)

    # Create grid: 2 plots per row
    n_cols = 5
    n_rows = math.ceil(n_zones / n_cols)

    fig, axes = plt.subplots(n_rows, n_cols, figsize=(14, 4 * n_rows))
    axes = axes.flatten()

    for idx, zone_id in enumerate(zones):
        ax = axes[idx]
        zone_df = df[df["Room_name"] == zone_id]

        # Identify top N RSSI features for this zone

        mean_rssi = zone_df[rssi_cols].where(zone_df[rssi_cols] != -100).mean()

        top_features = mean_rssi.sort_values(ascending=False).head(top_n).index.tolist()

        plot_data = []
        tag_labels = []

        # Collect RSSI values per tag
        for tag in unique_tags:
            tag_df = zone_df[zone_df["tagId"] == tag]

            if tag_df.empty:
                values = [np.nan] * top_n
            else:
                values = tag_df[top_features].values.flatten()

            plot_data.append(values)
            tag_labels.append(tag)

        # ---- Plot for this zone ----
        box = ax.boxplot(plot_data, patch_artist=True, labels=tag_labels)

        # Color each box by tag
        for patch, tag in zip(box['boxes'], tag_labels):
            patch.set_facecolor(tag_colors[tag])

        ax.set_title(f"{zone_id}")
        ax.set_ylabel("RSSI")
        ax.set_xticklabels(tag_labels, rotation=45, ha='right')
        ax.grid(True, axis='y', linestyle='--', alpha=0.6)

    # Remove empty axes if zones are odd
    for j in range(idx + 1, len(axes)):
        fig.delaxes(axes[j])

    # Add legend below all plots
    legend_handles = [
        plt.Line2D([0], [0], color=tag_colors[tag], lw=8, label=f"Tag {tag}")
        for tag in unique_tags
    ]

    fig.legend(
        handles=legend_handles,
        title="Tag ID",
        loc="lower center",
        bbox_to_anchor=(0.5, 0.00),
        ncol=5
    )

    plt.tight_layout(rect=[0, 0.07, 1, 1])  
    plt.show()


In [None]:
plot_rssi_box_each_zone_grid(data_set_df, top_n=5)

### Sliding 3

In [None]:
def sliding_window_aggregate_3rows(df, window_size=3):
    """
    Generate synthetic rows using sliding window aggregation:
    - For each (Zone_id, Room_name, Tag_id), sorted by timestamp
    - Sliding window of `window_size` rows
    - For each window, compute mean, median, max per beacon, ignoring -100
    - Keeps original x and y columns from the last row in the window
    - Returns a DataFrame with synthetic rows only
    """
    synthetic_rows = []
    
    # Sort by keys and timestamp
    df_sorted = df.sort_values(by=['Zone_id', 'Room_name', 'tagId', 'timestamp'])
    
    # Identify beacon columns (assuming names start with '0')
    beacon_cols = [c for c in df.columns if str(c).startswith('0')]
    
    # Group by Zone, Room, Tag
    grouped = df_sorted.groupby(['Zone_id', 'Room_name', 'tagId'])
    
    for _, group in grouped:
        group = group.reset_index(drop=True)
        n_rows = len(group)
        if n_rows < window_size:
            continue  # skip if not enough rows
        
        # Sliding window
        for start in range(n_rows - window_size + 1):
            window = group.iloc[start:start+window_size]
            last_row = window.iloc[-1]
            
            # Base info including x and y
            base_info = {
                'Zone_id': last_row['Zone_id'],
                'Room_name': last_row['Room_name'],
                'tagId': last_row['tagId'],
                'timestamp': last_row['timestamp'],
                'x': last_row['x'],
                'y': last_row['y']
            }
            
            # Compute mean, median, max per beacon ignoring -100
            mean_vals, median_vals, max_vals = {}, {}, {}
            for beacon in beacon_cols:
                values = window[beacon].replace(-100, np.nan).dropna().values
                if len(values) == 0:
                    mean_vals[beacon] = -100
                    median_vals[beacon] = -100
                    max_vals[beacon] = -100
                else:
                    mean_vals[beacon] = np.mean(values)
                    median_vals[beacon] = np.median(values)
                    max_vals[beacon] = np.max(values)
            
            # Create synthetic rows
            mean_row = {**base_info, **mean_vals, 'agg_type': 'mean'}
            median_row = {**base_info, **median_vals, 'agg_type': 'median'}
            max_row = {**base_info, **max_vals, 'agg_type': 'max'}
            
            synthetic_rows.extend([mean_row, median_row, max_row])
    
    return pd.DataFrame(synthetic_rows)

In [None]:
data_set_df_agg = sliding_window_aggregate_3rows(data_set_df, window_size=3)

In [None]:
data_set_df_agg.shape, data_set_df.shape

In [None]:
start_time = time.perf_counter() 

result= fused_localization_mle_opt(data_set_df_agg.drop(columns='agg_type'), anchor_point_df)

save_folder = "Result_Asset"
save_name = f"{filename.replace('.json', '_aggregation.csv')}" 
save_path = os.path.join(save_folder, save_name)

result.to_csv(save_path, index=False)


end_time = time.perf_counter() 

total_time = end_time - start_time
avg_time_per_row = total_time / len(data_set_df)
print(avg_time_per_row)

In [None]:
import pandas as pd
import numpy as np

def fixed_window_centroid_predictions(df, window_size=3):
    """
    Compute centroid (mean x and y) for each fixed group of 3 rows:
    - Group by Zone_id, Room_name, Tag_id
    - Split into non-overlapping groups of 3
    - Compute mean of predicted positions and ground truth
    """
    centroid_rows = []

    grouped = df.groupby(['Zone_id', 'Room_name', 'Tag_id'])

    for _, group in grouped:
        group = group.reset_index(drop=True)
        n_rows = len(group)
        n_full_groups = n_rows // window_size

        for i in range(n_full_groups):
            start = i * window_size
            window = group.iloc[start:start + window_size]

            # Compute mean for each prediction and ground truth
            def mean_position(col_name):
                # Convert list-like columns to np.array and take mean along axis=0
                arrs = np.array(window[col_name].tolist())
                return arrs.mean(axis=0).tolist()

            centroid_info = {
                'Zone_id': window.iloc[-1]['Zone_id'],
                'Room_name': window.iloc[-1]['Room_name'],
                'Tag_id': window.iloc[-1]['Tag_id'],
                'timestamp': window.iloc[-1]['timestamp'],
                'Predicted_MLE': mean_position('Predicted_MLE'),
                'Predicted_Optimisation': mean_position('Predicted_Optimisation'),
                'Predicted_NLOS': mean_position('Predicted_NLOS'),
                'Predicted_NLOS_Dynamic': mean_position('Predicted_NLOS_Dynamic'),
                'Ground_Truth': mean_position('Ground_Truth'),
            }

            centroid_rows.append(centroid_info)

    return pd.DataFrame(centroid_rows)


In [None]:
result_agg= fixed_window_centroid_predictions(result, window_size=3)
result_agg.head(1)

In [None]:
from matplotlib.patches import Patch
from shapely.geometry import Polygon

accuracy_df_agg = plot_predicted_fused_dynamic(
    result_df=result_agg,
    ground_truth_df=ground_truth_df,
    map_file_location= map_file,
    fused_cols=['Predicted_NLOS_Dynamic', 'Predicted_NLOS'],
#     output_file="compare_fused_results.png"
)

In [None]:
# Group by Room_Type and compute weighted (point-based) accuracy
weighted_grouped = (
    accuracy_df_agg
    .groupby("Room_Type")
    .apply(lambda g: pd.Series({
        "MLE_Accuracy": (g["MLE_Inside_Points"].sum() / g["Total_Points"].sum()) * 100,
        "Optimisation_Accuracy": (g["Optimisation_Inside_Points"].sum() / g["Total_Points"].sum()) * 100,
        "NLOS_Accuracy": (g["Predicted_NLOS_Inside_Points"].sum() / g["Total_Points"].sum()) * 100
    }))
)

# Calculate overall accuracy (also weighted)
overall = pd.DataFrame([{
    "MLE_Accuracy": (accuracy_df["MLE_Inside_Points"].sum() / accuracy_df["Total_Points"].sum()) * 100,
    "Optimisation_Accuracy": (accuracy_df["Optimisation_Inside_Points"].sum() / accuracy_df["Total_Points"].sum()) * 100,
    "NLOS_Accuracy": (accuracy_df["Predicted_NLOS_Inside_Points"].sum() / accuracy_df["Total_Points"].sum()) * 100
}], index=["Overall"])

# Combine results
summary_df = pd.concat([overall, weighted_grouped]).rename(index={"Open": "Open Space"})
summary_df.to_csv("Result_Asset/temp_result.csv", index= False)
summary_df

In [None]:
filename