In [7]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import metrics
import pandas as pd
import seaborn as sns
from sklearn.metrics import mean_absolute_error,mean_squared_error
from sklearn.model_selection import train_test_split
%matplotlib inline
import math as m
pd.set_option("display.precision", 8)

# the height from sealevel where the mesurement was taken
elevation = 117
epoch = 20
batch = 32

def softmax(x):
    '''softmax for multiclass classification'''
    summary = 0
    for i in range(len(x)):
        summary += m.exp(x[i])
    for j in range(len(x)):
        x[j] = m.exp(x[j])/summary
    return x

def indexlist(data,):
    '''use indeces to mark the 143 gps coordinates'''
    checklist = data['ref_x'].unique()
    for i in range(len(checklist)):
        data['point_index'] = data['point_index'].replace([checklist[i]],i)
    return data

def convert(x):# make the shape of x
    '''convert the indeces to one-hot matrices'''
    y=np.zeros([len(x),143])
    z=np.eye(143)
    for i in range(len(x)):
        y[i]=(z[(x[i])])
    return y

# convert latitude, longtitude to x,y,z coordinate
def cartesian(longitude,latitude, elevation):
    R = 6371000 + elevation  # relative to centre of the earth
    X = R * math.cos(longitude) * math.sin(latitude)
    Y = R * math.sin(longitude) * math.sin(latitude)
    return X,Y

def training(x_train,y_train,epoch,W,b):
    '''train W and b for epoches'''
    for times in range(epoch):
        for i in range(len(x_train)):
            y = y_train[i]
            y_pred = softmax(np.dot(W,x_train[i]) + b)
            loss = y_pred - y
            W -= 2 * eta * loss.reshape(143,1)*x_train[i]
            b -= 2 * eta * loss
            #print(loss)

def test_accuracy(x_test,y_test):
    right = 0
    wrong = 0
    for j in range(len(x_test)):
        y_ = y_test[j]
        y_p = np.argmax(softmax(np.dot(W,x_test[j]) + b))
        if y_ == y_p:
            right +=1
        else:
            wrong +=1
            #print("{} is misrecognized as {}".format(y_test[j],y_p))
    accuracy = right/(right+wrong)
    print("the accuracy rate is= {}".format(accuracy))
    return accuracy


def main(x_train,y_train,epoch,W,b,x_test,y_test):
    last_acc = 0
    current_acc = 0
    i=0
    bad = 0
    active = True
    acc_list = []
    acc_list.append(current_acc)
    while active:
        
        if current_acc <= last_acc:
            bad += 1
        else:
            bad = 0
        i+=1
        training(x_train,y_train,epoch,W,b)
        print('After {} epoches,'.format(epoch*i))
        last_acc = max(acc_list)
        current_acc = test_accuracy(x_test,y_test)
        acc_list.append(current_acc)
        if bad >= 5:
            active = False
    return W,b


In [11]:
#read files
rsrp = pd.read_csv('rds-SRSRP.csv')
gps = pd.read_csv("gps.csv")
#combine ref_x and ref_y
gps['coordinate'] =  list(zip(gps.ref_x, gps.ref_y))
#input indeces for each individual gps coordinates
gps['point_index'] = gps['ref_x']
gps = indexlist(gps)
checklist = pd.DataFrame(gps.coordinate.unique(),columns = ['coordinate'])
#regularization
rsrp_reg = (rsrp - rsrp.min().min())/(rsrp.max().max()-rsrp.min().min())

In [5]:
X_train, X_test, y_train, y_test = train_test_split(rsrp_reg, gps[['point_index']],
                                                        test_size=0.20,random_state=101)

#set initial parameters
W = np.zeros((143,8))
b = np.zeros(143)
eta = 2

In [6]:
X_train,y_train = X_train.to_numpy(),y_train.to_numpy()
X_test,y_test = X_test.to_numpy(),y_test.to_numpy()
#convert y_train into the shape of (,143)
y_train = y_train.astype(int)
y_train = np.array(y_train)
y_train = convert(y_train)

In [7]:
iteration,acc_list = main(X_train,y_train,epoch,W,b,X_test,y_test)

After 20 epoches,
the accuracy rate is= 0.19641255605381167
After 40 epoches,
the accuracy rate is= 0.2367713004484305
After 60 epoches,
the accuracy rate is= 0.24663677130044842
After 80 epoches,
the accuracy rate is= 0.25112107623318386
After 100 epoches,
the accuracy rate is= 0.25650224215246636
After 120 epoches,
the accuracy rate is= 0.2556053811659193
After 140 epoches,
the accuracy rate is= 0.2591928251121076
After 160 epoches,
the accuracy rate is= 0.2645739910313901
After 180 epoches,
the accuracy rate is= 0.2645739910313901
After 200 epoches,
the accuracy rate is= 0.2663677130044843
After 220 epoches,
the accuracy rate is= 0.2672645739910314
After 240 epoches,
the accuracy rate is= 0.26547085201793724
After 260 epoches,
the accuracy rate is= 0.2645739910313901
After 280 epoches,
the accuracy rate is= 0.262780269058296
After 300 epoches,
the accuracy rate is= 0.26547085201793724


In [12]:
se_rec = []
#pred = np.array(prediction)
test = np.array(y_test)
for i in range(len(test)):
    y_ = int(test[i])
    y_p = np.argmax(softmax(np.dot(W,X_test[i]) + b))
    if y_ == y_p:
        se = 0
        se_rec.append(se)
    else:
        lon_pred, lat_pred = checklist.iloc[y_p][0]
        lon_, lat_ = checklist.iloc[y_][0]
        err_lon = lon_pred - lon_
        err_lat = lat_pred -lat_
        err_x = err_lon*((40000/360)*1000*m.cos(lat_pred))
        err_y = err_lat*((40000/360)*1000)
        se = err_x**2 + err_y**2
        se_rec.append(se)
rmse = np.sqrt(sum(se_rec)/len(test))
print("RMSE is {:.2f}m".format(rmse))
a = np.sqrt(se_rec)
b = np.sort(a)
print("When the accuracy is 80%,RMSE is {:.2f}m".format(b[int(len(a)*0.8)]))

RMSE is 11.15m
When the accuracy is 80%,RMSE is 14.47m


In [4]:
#read files
rsrp = pd.read_csv('rds-SRSRP.csv')
gps = pd.read_csv("gps.csv")
#combine ref_x and ref_y
gps['coordinate'] =  list(zip(gps.ref_x, gps.ref_y))
#input indeces for each individual gps coordinates
gps['point_index'] = gps['ref_x']
gps = indexlist(gps)
#regularization
rsrp_reg = (rsrp - rsrp.min().min())/(rsrp.max().max()-rsrp.min().min())
gps_reg = gps
#gps_reg['point_index'] = gps['point_index']/gps['point_index'].max()

#combine rsrp and point index and then split
result = pd.concat([gps_reg['point_index'], rsrp_reg], axis=1, join='inner')
train = result.sample(frac = 0.8)
test = result.drop(train.index)
x_train,y_train = (train.drop(['point_index'],axis=1),train['point_index'])
x_test,y_test = (test.drop(['point_index'],axis=1),test['point_index'])
#convert to np.array
x_train,y_train = x_train.to_numpy(),y_train.to_numpy()
x_test,y_test = x_test.to_numpy(),y_test.to_numpy()
#convert y_train into the shape of (,143)
y_train = y_train.astype(int)
y_train = np.array(y_train)
y_train = convert(y_train)

In [5]:
#set initial parameters
W = np.zeros((143,8))
b = np.zeros(143)
eta = 2

In [8]:
W,b = main(x_train,y_train,epoch,W,b,x_test,y_test)

After 20 epoches,
the accuracy rate is= 0.29084380610412924
After 40 epoches,
the accuracy rate is= 0.2926391382405745
After 60 epoches,
the accuracy rate is= 0.3007181328545781
After 80 epoches,
the accuracy rate is= 0.30610412926391384
After 100 epoches,
the accuracy rate is= 0.30610412926391384
After 120 epoches,
the accuracy rate is= 0.30430879712746856
After 140 epoches,
the accuracy rate is= 0.3052064631956912
After 160 epoches,
the accuracy rate is= 0.3052064631956912
After 180 epoches,
the accuracy rate is= 0.3052064631956912
After 200 epoches,
the accuracy rate is= 0.30430879712746856


In [None]:
def error(x_test,y_test):
    for j in range(len(x_test)):
        y_ = y_test[j]
        y_p = np.argmax(softmax(np.dot(W,x_test[j]) + b))
        if y_ == y_p:
            right +=1
        else:
            wrong +=1
            #print("{} is misrecognized as {}".format(y_test[j],y_p))
    accuracy = right/(right+wrong)
    print("the accuracy rate is= {}".format(accuracy))
    return accuracy

In [9]:
y_test

array([  0.,   0.,   0., ..., 142., 142., 142.])

In [10]:
checklist = pd.DataFrame(gps.coordiante.unique(),columns = ['coordinate'])

AttributeError: 'DataFrame' object has no attribute 'coordiante'

In [60]:
se_rec = []
for i in range(len(x_test)):
    y_ = int(y_test[i])
    y_p = int(np.argmax(softmax(np.dot(W,x_test[i]) + b)))
    if y_ == y_p:
        se = 0
        se_rec.append(se)
    else:
        lon_pred, lat_pred = checklist.iloc[y_p][0]
        lon_, lat_ = checklist.iloc[y_][0]
        err_lon = lon_pred - lon_
        err_lat = lat_pred -lat_
        err_x = err_lon*((40000/360)*1000*m.cos(lat_pred))
        err_y = err_lat*((40000/360)*1000)
        se = err_x**2 + err_y**2
        se_rec.append(se)

In [65]:
rmse = np.sqrt(sum(se_rec)/len(x_test))

In [66]:
rmse

11.291618117193384

In [73]:
a = np.sqrt(se_rec)
b = np.sort(a)

In [69]:
a.max()*0.3

24.242498568174838

In [76]:
b[int(len(a)*0.8)]

12.667347021803259

In [75]:
b

array([ 0.        ,  0.        ,  0.        , ..., 76.21781724,
       78.31955124, 80.80832856])