# Step 1: data engineering

In [2]:
# query data from the API and store it in a dictionary
import requests
import json

# query data from asset API
devices = ['Asset-0', 'Asset-1', 'Asset-2', 'Asset-3', 'Asset-A', 'Asset-B', 'Asset-C']
base_url = 'http://localhost:5000/api/asset/'
raw_data = {}
for device in devices:
    url = base_url + device + '?mins=100000&rssi=-100'
    resp = requests.get(url)
    data = json.loads(resp.text)
    raw_data[device] = data['data']
print(raw_data)
# Remove devices with no data
not_found_devices = []
for device in raw_data.keys():
    if len(raw_data[device]) == 0:
        not_found_devices.append(device)
for not_found_device in not_found_devices:
    print('Removing ' + not_found_device)
    del raw_data[not_found_device]
print(raw_data)

{'Asset-0': [{'rssi': -55, 'station': 'ESP32', 'timestamp': '2025-03-17T08:48:32.791000'}, {'rssi': -57, 'station': 'ESP32', 'timestamp': '2025-03-17T08:48:14.729000'}, {'rssi': -65, 'station': 'ESP32', 'timestamp': '2025-03-17T08:47:53.523000'}, {'rssi': -77, 'station': 'esp32', 'timestamp': '2025-03-17T08:43:19.361000'}, {'rssi': -76, 'station': 'esp32', 'timestamp': '2025-03-17T08:43:16.258000'}, {'rssi': -72, 'station': 'esp32', 'timestamp': '2025-03-17T08:43:13.322000'}, {'rssi': -77, 'station': 'esp32', 'timestamp': '2025-03-17T08:42:55.093000'}, {'rssi': -67, 'station': 'esp32', 'timestamp': '2025-03-17T08:42:43.079000'}, {'rssi': -69, 'station': 'esp32', 'timestamp': '2025-03-17T08:42:40.043000'}, {'rssi': -67, 'station': 'esp32', 'timestamp': '2025-03-17T08:42:27.964000'}, {'rssi': -67, 'station': 'esp32', 'timestamp': '2025-03-17T08:42:18.885000'}, {'rssi': -67, 'station': 'esp32', 'timestamp': '2025-03-17T08:42:12.905000'}, {'rssi': -75, 'station': 'esp32', 'timestamp': '202

In [6]:
# re-organize the data into table format
rssi_data = []
idx = 0
for device in raw_data.keys():
    for data in raw_data[device]:
        record = []
        record.append(data['timestamp'])
        record.append(device)
        record.append(data['station'])
        record.append(data['rssi'])
        record.append(0)
        rssi_data.append(record)
print(len(rssi_data), rssi_data)

1076 [['2025-03-17T08:48:32.791000', 'Asset-0', 'ESP32', -55, 0], ['2025-03-17T08:48:14.729000', 'Asset-0', 'ESP32', -57, 0], ['2025-03-17T08:47:53.523000', 'Asset-0', 'ESP32', -65, 0], ['2025-03-17T08:43:19.361000', 'Asset-0', 'esp32', -77, 0], ['2025-03-17T08:43:16.258000', 'Asset-0', 'esp32', -76, 0], ['2025-03-17T08:43:13.322000', 'Asset-0', 'esp32', -72, 0], ['2025-03-17T08:42:55.093000', 'Asset-0', 'esp32', -77, 0], ['2025-03-17T08:42:43.079000', 'Asset-0', 'esp32', -67, 0], ['2025-03-17T08:42:40.043000', 'Asset-0', 'esp32', -69, 0], ['2025-03-17T08:42:27.964000', 'Asset-0', 'esp32', -67, 0], ['2025-03-17T08:42:18.885000', 'Asset-0', 'esp32', -67, 0], ['2025-03-17T08:42:12.905000', 'Asset-0', 'esp32', -67, 0], ['2025-03-17T08:42:09.844000', 'Asset-0', 'esp32', -75, 0], ['2025-03-17T08:42:03.808000', 'Asset-0', 'esp32', -75, 0], ['2025-03-17T08:41:54.774000', 'Asset-0', 'esp32', -68, 0], ['2025-03-17T08:41:36.605000', 'Asset-0', 'esp32', -69, 0], ['2025-03-17T08:41:27.545000', 'As

In [34]:
# import the data into a pandas dataframe
import pandas as pd
from datetime import datetime
df = pd.DataFrame(rssi_data, columns=['timestamp', 'device', 'station', 'rssi', 'label'])
df['timestamp'] = pd.to_datetime(df['timestamp'], format='mixed')
df

Unnamed: 0,timestamp,device,station,rssi,label
0,2025-03-17 08:48:32.791,Asset-0,ESP32,-55,0
1,2025-03-17 08:48:14.729,Asset-0,ESP32,-57,0
2,2025-03-17 08:47:53.523,Asset-0,ESP32,-65,0
3,2025-03-17 08:43:19.361,Asset-0,esp32,-77,0
4,2025-03-17 08:43:16.258,Asset-0,esp32,-76,0
...,...,...,...,...,...
1071,2025-03-17 08:20:17.542,Asset-2,esp32,-70,0
1072,2025-03-17 08:20:03.244,Asset-2,esp32,-65,0
1073,2025-03-17 08:21:18.704,Asset-3,esp32,-74,0
1074,2025-03-17 08:21:09.635,Asset-3,esp32,-78,0


In [43]:
station_dfs = {}
for station in df['station'].unique():
    station_dfs[station] = df[df['station'] == station]
for station_id in station_dfs.keys():
    station_dfs[station_id] = station_dfs[station_id][['timestamp', 'rssi']].resample('1min', on='timestamp').mean()
station_df = pd.concat(station_dfs.values(), axis=1)
station_df.columns = station_dfs.keys()
station_df.dropna(inplace=True)
station_df['label'] = 0
station_df.to_csv('station_df.csv')

In [8]:
# group data by timestamp
station_list = df['station'].unique()
device_list = df['device'].unique()
df_dicts = {}
for station in station_list:
    for device in device_list:
        sub_df = df[(df['station'] == station) & (df['device'] == device)]
        if sub_df.empty:
            continue
        rssi_values = sub_df[['timestamp','rssi']].resample('1min', on='timestamp').mean()
        rssi_values = rssi_values.dropna().reset_index()
        idx = station + '+' + device
        df_dicts[idx] = rssi_values
print(df_dicts)

{'ESP32+Asset-0':              timestamp       rssi
0  2025-03-17 08:30:00 -75.500000
1  2025-03-17 08:31:00 -71.500000
2  2025-03-17 08:33:00 -68.666667
3  2025-03-17 08:34:00 -69.000000
4  2025-03-17 08:35:00 -62.000000
5  2025-03-17 08:36:00 -53.500000
6  2025-03-17 08:37:00 -55.000000
7  2025-03-17 08:39:00 -63.500000
8  2025-03-17 08:40:00 -59.000000
9  2025-03-17 08:41:00 -60.000000
10 2025-03-17 08:47:00 -65.000000
11 2025-03-17 08:48:00 -56.000000, 'ESP32+Asset-1':             timestamp       rssi
0 2025-03-17 08:51:00 -62.666667
1 2025-03-17 08:52:00 -63.333333
2 2025-03-17 08:53:00 -66.000000
3 2025-03-17 08:54:00 -68.000000, 'ESP32+Asset-2':              timestamp       rssi
0  2025-03-17 08:30:00 -72.666667
1  2025-03-17 08:31:00 -73.800000
2  2025-03-17 08:32:00 -75.250000
3  2025-03-17 08:33:00 -75.181818
4  2025-03-17 08:34:00 -76.428571
5  2025-03-17 08:35:00 -72.714286
6  2025-03-17 08:36:00 -72.727273
7  2025-03-17 08:37:00 -69.800000
8  2025-03-17 08:38:00 -70.375000

In [9]:
# export the data to a CSV file with columns station, device, timestamp, rssi
import csv
with open('rssi_data.csv', mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['station', 'device', 'timestamp', 'rssi', 'label'])
    for key in df_dicts.keys():
        station, device = key.split('+')
        for idx, row in df_dicts[key].iterrows():
            writer.writerow([station, device, row['timestamp'], row['rssi'], 0])

# Step 2: ML engineering

In [10]:
# import label data from CSV file
label_data = []
with open('rssi_data.csv', mode='r') as file:
    reader = csv.reader(file)
    for idx, row in enumerate(reader):
        if idx == 0:
            continue
        label_data.append(row)
print(label_data)

[['ESP32', 'Asset-2', '3/17/2025 8:34', '-76.42857143', '0'], ['esp32', 'Asset-0', '3/17/2025 8:33', '-76.125', '0'], ['ESP32', 'Asset-2', '3/17/2025 8:49', '-75.66666667', '0'], ['ESP32', 'Asset-0', '3/17/2025 8:30', '-75.5', '0'], ['esp32', 'Asset-2', '3/17/2025 8:22', '-75.375', '0'], ['ESP32', 'Asset-2', '3/17/2025 8:32', '-75.25', '0'], ['ESP32', 'Asset-2', '3/17/2025 8:33', '-75.18181818', '0'], ['esp32', 'Asset-0', '3/17/2025 8:27', '-75', '0'], ['esp32', 'Asset-0', '3/17/2025 8:43', '-75', '0'], ['esp32', 'Asset-0', '3/17/2025 8:34', '-74.83333333', '0'], ['ESP32', 'Asset-2', '3/17/2025 8:31', '-73.8', '0'], ['esp32', 'Asset-3', '3/17/2025 8:21', '-73.66666667', '0'], ['esp32', 'Asset-0', '3/17/2025 8:30', '-73.6', '0'], ['ESP32', 'Asset-2', '3/17/2025 8:43', '-73', '0'], ['ESP32', 'Asset-2', '3/17/2025 8:36', '-72.72727273', '0'], ['ESP32', 'Asset-2', '3/17/2025 8:35', '-72.71428571', '0'], ['ESP32', 'Asset-2', '3/17/2025 8:30', '-72.66666667', '0'], ['ESP32', 'Asset-2', '3/17

In [26]:
# prepare data into ML training format
feat_df = pd.DataFrame(label_data, columns=['station', 'device', 'timestamp', 'rssi', 'label'])
feat_df.label = feat_df.label.astype(int)
feat_df.rssi = feat_df.rssi.astype(float)
feat_esp32_df = feat_df[feat_df['station'] =='esp32']
feat_ESP32_df = feat_df[feat_df['station'] =='ESP32']
data_len = min(feat_esp32_df.shape[0], feat_ESP32_df.shape[0])
rows = []
for idx in range(data_len):
    row = [float(feat_esp32_df.iloc[idx, 3]), float(feat_ESP32_df.iloc[idx, 3]), int(feat_esp32_df.iloc[idx, 4])]
    rows.append(row)
train_df = pd.DataFrame(rows, columns=['esp32_rssi', 'ESP32_rssi', 'label'])
train_df

Unnamed: 0,esp32_rssi,ESP32_rssi,label
0,-76.125,-76.428571,0
1,-75.375,-75.666667,0
2,-75.0,-75.5,0
3,-75.0,-75.25,0
4,-74.833333,-75.181818,0
5,-73.666667,-73.8,0
6,-73.6,-73.0,0
7,-72.166667,-72.727273,0
8,-72.0,-72.714286,0
9,-72.0,-72.666667,0


In [52]:
label_data = []
with open('station_df.csv', mode='r') as file:
    reader = csv.reader(file)
    for idx, row in enumerate(reader):
        if idx == 0:
            continue
        label_data.append(row)
train_df = pd.DataFrame(label_data, columns=['timestamp', 'ESP32_rssi', 'esp32_rssi', 'label'])
train_df.ESP32_rssi = train_df.ESP32_rssi.astype(float)
train_df.esp32_rssi = train_df.esp32_rssi.astype(float)
train_df.label = train_df.label.astype(int)

In [56]:
from sklearn import tree

X = train_df[['ESP32_rssi', 'esp32_rssi']]
y = train_df['label']
clf = tree.DecisionTreeClassifier(max_depth=5)
clf = clf.fit(X, y)
clf.predict([[-60, -80]])



array([0])