<a href="https://colab.research.google.com/github/vvv94/indoor-localization/blob/main/localization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Installing required Packages and import Python Modules

In [4]:
!apt -qq -y install unzip >/dev/null 2>&1

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from warnings import simplefilter
from pandas.plotting import scatter_matrix
from sklearn import model_selection
from sklearn.utils import shuffle
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

%pylab inline
pylab.rcParams['figure.figsize'] = (50, 30)

Populating the interactive namespace from numpy and matplotlib


`%matplotlib` prevents importing * from pylab and numpy
  "\n`%matplotlib` prevents importing * from pylab and numpy"


Preparing the dataset

In [5]:
!rm -r sample_data dataset README.rtf >/dev/null 2>&1
!wget https://www.utwente.nl/en/eemcs/ps/dataset-folder/soloc-ipin2017-dataset.zip -q 
!unzip soloc-ipin2017-dataset.zip >/dev/null 2>&1
!rm -r __MACOSX >/dev/null 2>&1
!mv ./SoLoc_IPIN2017_dataset ./dataset >/dev/null 2>&1
!rm *.zip >/dev/null 2>&1
!mv ./dataset/readme.rtf ./README.rtf >/dev/null 2>&1
!mkdir -p results >/dev/null 2>&1

Load Dataset into a Table

In [6]:
'''Project Parameters'''
DATASET_DIR = "./dataset/"
RESULTS_DIR = "./results"
DATASET_NAMES = [   
'P_SA',             # RSS values between the smartphone and 11 WiFi APs
'P_SA_Signatures',  # RSS values between the smartphone and 11 WiFi APs for calibration points (signatures)
'P_SA_Tests',       # RSS values between the smartphone and 11 WiFi APs for test points
'APLocs',           # x and y coordinates of 11 APs with RSS values stored in P_SA

'P_SB',             # RSS values between the smartphone and 11 WiFi APs
'P_SB_Signatures',  # RSS values between the smartphone and 46 Bluetooth beacons for calibration points (signatures)
'P_SB_Tests',       # RSS values between the smartphone and 46 Bluetooth beacons for test points
'BeaconLocs',       # x and y coordinates of 46 Bluetooth beacons with RSS values stored in P_SB

'MeasLocs',         # x and y coordinates of the smartphone with RSS values stored in P_SA and P_SB
'SignatureLocs',    # x and y coordinates of calibration points (signatures) with RSS values stored in P_SA_Signature and P_SB_Signature
'TestLocs',         # x and y coordinates of test points with RSS values stored in P_SA_Tests and P_SB_Tests
'P_Signatures',     # combination of RSS values for calibration points (signatures) - P_SA_Signature and P_SB_Signature
'P_Tests',          # combination of RSS values for test points - P_SA_Tests and P_SB_Tests
]

def pre_process(data_name):

  _colnames = []
  if data_name == "P_SA":
    _colnames=['WF1 RSS', 'WF2 RSS', 'WF3 RSS', 'WF4 RSS','WF5 RSS', 'WF6 RSS', 'WF7 RSS', 'WF8 RSS','WF9 RSS', 'WF10 RSS', 'WF11 RSS']
    data = pd.read_csv("./dataset/P_SA.csv", sep=';',names=_colnames,header=None)

  elif data_name == "P_SA_Signatures":
    _colnames=['WF1 RSS Sig.', 'WF2 RSS Sig.', 'WF3 RSS Sig.', 'WF4 RSS Sig.','WF5 RSS Sig.', 'WF6 RSS Sig.', 'WF7 RSS Sig.', 'WF8 RSS Sig.','WF9 RSS Sig.', 'WF10 RSS Sig.', 'WF11 RSS Sig.']
    data = pd.read_csv("./dataset/P_SA_Signatures.csv", sep=';',names=_colnames,header=None)

  elif data_name == "P_SA_Tests":
    _colnames=['WF1 RSS Test', 'WF2 RSS Test', 'WF3 RSS Test', 'WF4 RSS Test','WF5 RSS Test', 'WF6 RSS Test', 'WF7 RSS Test', 'WF8 RSS Test','WF9 RSS Test', 'WF10 RSS Test', 'WF11 RSS Test']
    data = pd.read_csv("./dataset/P_SA_Tests.csv", sep=';',names=_colnames,header=None)

  elif data_name == "APLocs":
    _colnames=['AP Coordinates', 'RSS Value in P_SA']
    data = pd.read_csv("./dataset/APLocs.csv", sep=';',names=_colnames,header=None)

  elif data_name == "P_SB":
    [_colnames.append('BT'+str(i)+' RSS') for i in range(1,47)]
    data = pd.read_csv("./dataset/P_SB.csv", sep=';',names=_colnames,header=None)

  elif data_name == "P_SB_Signatures":
    [_colnames.append('BT'+str(i)+' RSS Sig.') for i in range(1,47)]
    data = pd.read_csv("./dataset/P_SB_Signatures.csv", sep=';',names=_colnames,header=None)

  elif data_name == "P_SB_Tests":
    [_colnames.append('BT'+str(i)+' RSS Test') for i in range(1,47)]
    data = pd.read_csv("./dataset/P_SB_Tests.csv", sep=';',names=_colnames,header=None)

  elif data_name == "BeaconLocs":
    _colnames=['BT Coordinates', 'RSS Value in P_SB']
    data = pd.read_csv("./dataset/BeaconLocs.csv", sep='\t',names=_colnames,header=None)

  elif data_name == "MeasLocs":
    _colnames=['Smartphone Coordinates', 'RSS in P_SA and P_SB']
    data = pd.read_csv("./dataset/MeasLocs.csv", sep='	',names=_colnames,header=None)

  elif data_name == "SignatureLocs":
    _colnames=['Calibration Coordinates', 'RSS in P_SA and P_SB']
    data = pd.read_csv("./dataset/SignatureLocs.csv", sep=';',names=_colnames,header=None)

  elif data_name == "TestLocs":
    _colnames=['Test Coordinates', 'RSS in P_SA and P_SB']
    data = pd.read_csv("./dataset/TestLocs.csv", sep=';',names=_colnames,header=None)

  elif data_name == "P_Signatures":
    [_colnames.append('WF'+str(i)+' RSS Sign.') for i in range(1,12)]
    [_colnames.append('BT'+str(i)+' RSS Sign.') for i in range(12,58)]
    data = pd.read_csv("./dataset/P_Signatures.csv", sep=';',names=_colnames,header=None)

  elif data_name == "P_Tests":
    [_colnames.append('WF'+str(i)+' RSS Test') for i in range(1,12)]
    [_colnames.append('BT'+str(i)+' RSS Test') for i in range(12,58)]
    data = pd.read_csv("./dataset/P_Tests.csv", sep=';',names=_colnames,header=None)

  else:
    data = []

  return data

data = [pre_process(name) for idx, name in enumerate(DATASET_NAMES)]
dataset1 = [data[0], data[1], data[2], data[3]]
dataset2 = [data[4], data[5], data[6], data[7]]
dataset3 = [data[8], data[9], data[10], data[11], data[12]]

Load Model

In [7]:
dataset1[0].insert(loc=0,column="Points",value=list(range(603)))
# ['Poitns, WF1 RSS', 'WF2 RSS', 'WF3 RSS', 'WF4 RSS','WF5 RSS', 'WF6 RSS', 'WF7 RSS', 'WF8 RSS','WF9 RSS', 'WF10 RSS', 'WF11 RSS']
dataset1[2].insert(loc=0,column="Points",value=list(range(475)))
# ['Poitns, WF1 RSS Test', 'WF2 RSS Test', 'WF3 RSS Test', 'WF4 RSS Test','WF5 RSS Test', 'WF6 RSS Test', 'WF7 RSS Test', 'WF8 RSS Test','WF9 RSS Test', 'WF10 RSS Test', 'WF11 RSS Test']

#dataset1[0].plot(kind='box', subplots=True, layout=(5,5), sharex=False, sharey=False)
#dataset1[0].hist()
#scatter_matrix(dataset1[0])
#plt.show()

In [8]:
from sklearn.utils import shuffle

train_set = shuffle(dataset1[0].values, random_state=8)
test_set = shuffle(dataset1[2].values, random_state=8)
train_values = train_set[:, 1:6]
train_labels = train_set[:, 0]
test_values = test_set[:, 1:6]
test_labels = test_set[:, 0]

In [9]:
#print(train_values)
#print(train_labels)

In [10]:
import matplotlib.pyplot as plt
from warnings import simplefilter
from sklearn import model_selection

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score

models = []
models.append(('LR', LogisticRegression(max_iter=1000)))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('CART', DecisionTreeClassifier()))
models.append(('NB', GaussianNB()))
models.append(('SVM', SVC()))
scoring = 'accuracy'



# evaluate each model in turn

results = []
names = []

simplefilter(action='ignore', category=FutureWarning)


for name, model in models:

    kfold = model_selection.KFold(n_splits=10)
    cv_results = model_selection.cross_val_score(model, train_values, train_labels, cv=kfold, scoring='accuracy')
    results.append(cv_results)
    names.append(name)
    msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
    print(msg)

knn = KNeighborsClassifier()
knn.fit(train_values, train_labels)
predictions = knn.predict(test_values)

print()
print("predictions result :")
print("KNN accuracy score : ",accuracy_score(test_labels, predictions))
print("confusion matrix : ")
print(confusion_matrix(test_labels, predictions))
print()
print("classification report : ")
print(classification_report(test_labels, predictions))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logist

LR: 0.000000 (0.000000)
LDA: nan (nan)
KNN: 0.000000 (0.000000)
CART: 0.000000 (0.000000)
NB: 0.000000 (0.000000)
SVM: 0.000000 (0.000000)

predictions result :
KNN accuracy score :  0.002105263157894737
confusion matrix : 
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 ...
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]

classification report : 
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.00      0.00      0.00         1
           2       0.00      0.00      0.00         1
           3       0.00      0.00      0.00         1
           4       0.00      0.00      0.00         1
           5       0.00      0.00      0.00         1
           6       0.00      0.00      0.00         1
           7       0.00      0.00      0.00         1
           8       0.00      0.00      0.00         1
           9       0.00      0.00      0.00         1
          10       0.00      0.00  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [3]:
import pandas as pd

train_data = pd.read_csv("./trainingData.csv", sep=';',header=None)
print(train_data)

                                                      0
0     WAP001,WAP002,WAP003,WAP004,WAP005,WAP006,WAP0...
1     100,100,100,100,100,100,100,100,100,100,100,10...
2     100,100,100,100,100,100,100,100,100,100,100,10...
3     100,100,100,100,100,100,100,-97,100,100,100,10...
4     100,100,100,100,100,100,100,100,100,100,100,10...
...                                                 ...
5869  100,100,100,100,100,100,100,100,100,100,-89,10...
5870  100,100,100,100,100,-88,100,100,100,100,100,10...
5871  100,100,100,100,100,100,100,100,100,100,100,10...
5872  100,100,100,100,100,-89,100,100,100,100,100,10...
5873  100,100,100,100,100,100,100,100,100,100,100,10...

[5874 rows x 1 columns]
