In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install shap

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting shap
  Downloading shap-0.41.0-cp39-cp39-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (572 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m572.4/572.4 KB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
Collecting slicer==0.0.7
  Downloading slicer-0.0.7-py3-none-any.whl (14 kB)
Installing collected packages: slicer, shap
Successfully installed shap-0.41.0 slicer-0.0.7


In [3]:
import pandas as pd
import seaborn as sns
import numpy as np
import shap
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold, KFold, RandomizedSearchCV
from sklearn.metrics import accuracy_score, make_scorer, classification_report, confusion_matrix

import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
import xgboost as xgb
from xgboost import XGBClassifier

# Umwandlung JSON-Request zu CSV
Dieser Code muss nur ausgeführt werden, wenn die Erhebung von Referenzdaten statt mit dem Beacon Backend Projekt, mit hook.ubeac.io erfasst wurden. In diesem Codesegment werden die JSON-Files verarbeitet und die Informationen so aufbereitet, dass diese zu denselben CSV-Files führen wie bei dem Beacon Backend Projekt.

In [12]:
import glob, os

path = "/content/drive/MyDrive/Colab Notebooks/Datenpunkte/"

os.chdir(path+"JSON/")

for jsonFile in glob.glob("*.json"):
  filename = os.path.splitext(jsonFile)[0]

  df = pd.read_json(path+"/JSON/"+filename+".json")

  dictionary_list = []

  for traceIndex in range(len(df)):
    currentTrace = df.iloc[traceIndex]
    timestamp = int(round(currentTrace.date.timestamp()))
    traceEntries = currentTrace.content.split('"')[3::2]
    for beaconIndex in range(len(traceEntries)):
      beaconData = traceEntries[beaconIndex].split(',')

      if len(beaconData[4]) >= 56: 
        beaconType = beaconData[4][6:18]
        uuid = beaconData[4][18:50]
        major = int(beaconData[4][50:54],16)
        minor = int(beaconData[4][54:58],16)
        majorminor = int(str(major)+str(minor))

        measuredPower = -60

        if majorminor == 100656666:
          measuredPower = -75
        #Id of the tag of the Beacon
        tagId = beaconData[1]
        gateway = beaconData[2]
        rssi = int(beaconData[3])
        distance = (10 ** ((measuredPower - rssi) / (10 * 3)))

        beacon = {'_id':tagId, 'distance':distance, 'gateway':gateway, 
                'major':major, 'majorminor':majorminor, 'minor':minor, 
                'rssi':rssi, 'timestamp':timestamp,'type':beaconType,
                'uuid':uuid}

        dictionary_list.append(beacon)
  transformedDf = pd.DataFrame.from_dict(dictionary_list)
  
  transformedDf.to_csv(r''+path+"/CSV/"+filename+'.csv', index=False)

# Datenanalyse und -aufbereitung
In diesem Codesegment werden die Daten analysiert und aufbereitet

In [47]:
import glob, os

path = "/content/drive/MyDrive/Colab Notebooks/Datenpunkte/CSV/"

os.chdir(path)

frame = list()
print(type(frame))

for csvFile in glob.glob("*.csv"):
  filename = os.path.splitext(csvFile)[0]
  tag = filename.split("_")[1]
  specificTag = "".join(filename.split("_")[1:])
  df = pd.read_csv(path+filename+".csv")
  df['tag'] = tag
  df['specificTag'] = specificTag
  frame.append(df)
  
masterDf = pd.concat(frame)

<class 'list'>


In [71]:
features = ["distance","major","majorminor","minor","rssi"]

requestIntervalInSeconds = 3

gateways = masterDf.gateway.unique()
minThreshold = masterDf.timestamp.min()
maxSequenceThreshold = minThreshold + requestIntervalInSeconds
maxThreshold = masterDf.head(40).timestamp.max()


print("min: "+str(minThreshold))
print("max: "+str(maxSequenceThreshold))
print("limit: "+str(maxThreshold))

for lowerThreshold in range(minThreshold, (maxThreshold + 1), requestIntervalInSeconds):
  upperThreshold = lowerThreshold + requestIntervalInSeconds

  for gateway in gateways:
    example = masterDf.loc[(masterDf.gateway == gateway) & 
                           (masterDf.timestamp >= minThreshold) & 
                           (masterDf.timestamp < maxSequenceThreshold)]
    example.columns = [col + '_' + gateway if col in features else col for col in df.columns]
    print(example.columns)
  print(lowerThreshold)
  print(upperThreshold)

min: 1678461689
max: 1678461692
limit: 1678461694
Index(['_id', 'distance_F3499FDED02E', 'gateway', 'major_F3499FDED02E',
       'majorminor_F3499FDED02E', 'minor_F3499FDED02E', 'rssi_F3499FDED02E',
       'timestamp', 'type', 'uuid', 'tag', 'specificTag'],
      dtype='object')
Index(['_id', 'distance_E06B09BAC79F', 'gateway', 'major_E06B09BAC79F',
       'majorminor_E06B09BAC79F', 'minor_E06B09BAC79F', 'rssi_E06B09BAC79F',
       'timestamp', 'type', 'uuid', 'tag', 'specificTag'],
      dtype='object')
1678461689
1678461692
Index(['_id', 'distance_F3499FDED02E', 'gateway', 'major_F3499FDED02E',
       'majorminor_F3499FDED02E', 'minor_F3499FDED02E', 'rssi_F3499FDED02E',
       'timestamp', 'type', 'uuid', 'tag', 'specificTag'],
      dtype='object')
Index(['_id', 'distance_E06B09BAC79F', 'gateway', 'major_E06B09BAC79F',
       'majorminor_E06B09BAC79F', 'minor_E06B09BAC79F', 'rssi_E06B09BAC79F',
       'timestamp', 'type', 'uuid', 'tag', 'specificTag'],
      dtype='object')
16784616