In [1]:
import numpy as np
import csv
import pandas
import os
import statistics




In [2]:
'''
read in NTUH AP list and cut the final three characters
e.g '00:be:75:db:92:40 -> '00:be:75:db:92'
'''
BSSID_list = []
with open("./BSSID.txt",'r') as f:
    lines = f.readlines()
    for line in lines:
        BSSID_list.append(line.strip('\n')[:-3])
        
print(BSSID_list)




['00:be:75:db:92', '00:be:75:09:d7', '00:be:75:d5:21', '00:be:75:c9:6e', '00:be:75:d0:e2', '00:be:75:dd:6d', '00:be:75:09:dd', '00:be:75:dd:72', '00:be:75:d0:d8', '00:be:75:d5:25', '00:be:75:d5:24', '6c:b2:ae:2d:40', '00:be:75:d5:2c', '00:be:75:dd:74', '00:be:75:d5:33', '00:be:75:dd:75', '00:be:75:d8:79', '00:be:75:d8:74', '00:be:75:d5:1c', '00:be:75:d8:7c', '00:be:75:db:a5', '00:be:75:dd:67', '00:be:75:d0:dc', '00:be:75:d0:e0', '00:be:75:d8:67', '00:be:75:db:95', '00:be:75:d0:d1', '00:be:75:d5:1d', '00:be:75:d5:31', '00:be:75:d0:ce', '00:be:75:d5:2e', '00:be:75:d5:30', '00:be:75:d5:2c', '00:be:75:09:dc', '00:be:75:d5:2c', '00:be:75:d5:1c', '00:be:75:d5:33', '00:be:75:d0:e4', '00:be:75:d8:7d', '00:be:75:d5:25', '00:be:75:db:a4', '00:be:75:db:a7', '00:be:75:d5:32', '00:be:75:d8:7f', '00:be:75:d0:e1', '00:be:75:db:9b', '00:be:75:d5:28', '00:be:75:09:da', '00:be:75:db:a0', '00:be:75:db:a4', '00:be:75:dd:71', '00:be:75:d8:74', '00:be:75:d0:d6', '00:be:75:c9:75', '6c:b2:ae:2d:40', '00:be:75

In [3]:
def preprocess():    
    '''
    preprocess raw dataset into fingerprint.txt
    args: path list
    output:Fingerprint.txt
    '''
    folder = "Data/NTUH_0904"
    path_list = list(sorted(os.listdir(folder)))

    fd = open("./Fingerprint.txt", 'w')
    #total_count = 0

    for path in path_list:
        count = 0
        path = os.path.join(folder, path)
        for file in sorted(os.listdir(path)):
            pos = file.strip('.txt')
            if pos[-1] == ")":
                pos = pos[:-3]
            print(f"Position,{pos}", file = fd)
            count += 1
            with open(os.path.join(path,file),'r') as f:
                lines = f.readlines()[1:]
                d = dict()
                
                for line in lines:
                    line = line.split('|')
                    BSSID = line[2][:-3]
                    #print(BSSID)
                    if BSSID not in BSSID_list:
                        continue
                    RSSI = float(line[3].strip("dBm"))
                    if BSSID not in d:
                        d[BSSID] = [RSSI]
                    else:
                        d[BSSID].append(RSSI)
                        
                for BSSID in d.keys():
                    avg_RSSI = int(sum(d[BSSID]) / len(d[BSSID]))
                    var_RSSI = np.var(d[BSSID])
                    print(f"{BSSID},{avg_RSSI},{var_RSSI},{d[BSSID]}",file = fd)
        print(f"the folder contains position count = {count}")
    #print(f"total positions = {total_count}")
    fd.close()    

In [4]:
def preprocess_v2():    
    '''
    including variance into consideration
    把virtual ap list分成前半後半 分別作average
    或是分別用avg + median
    
    '''
    print("using preprocess v2, mean + median method")
    folder = "Data/NTUH_0904"
    
    path_list = list(sorted(os.listdir(folder)))

    fd = open("./Fingerprint.txt", 'w')
    #total_count = 0

    for path in path_list:
        count = 0
        path = os.path.join(folder, path)
        for file in sorted(os.listdir(path)):
            pos = file.strip('.txt')
            if pos[-1] == ")":
                pos = pos[:-3]
            print(f"Position,{pos}", file = fd)
            count += 1
            with open(os.path.join(path,file),'r') as f:
                lines = f.readlines()[1:]
                d = dict()
                
                for line in lines:
                    line = line.split('|')
                    BSSID = line[2][:-3]
                    #print(BSSID)
                    if BSSID not in BSSID_list:
                        continue
                    RSSI = float(line[3].strip("dBm"))
                    if BSSID not in d:
                        d[BSSID] = [RSSI]
                    else:
                        d[BSSID].append(RSSI)
                        
                for BSSID in d.keys():
                    
                    avg_RSSI = int(sum(d[BSSID]) / len(d[BSSID]))
                    median_RSSI = int(statistics.median(d[BSSID]))
                    print(f"{BSSID},{avg_RSSI}",file = fd)
                    print(f"{BSSID + '_'},{median_RSSI}",file = fd)
                    #print(f"{BSSID},{avg_RSSI},{var_RSSI},{d[BSSID]}",file = fd)
        print(f"the folder contains position count = {count}")
    #print(f"total positions = {total_count}")
    fd.close()    

In [5]:
def preprocess_no_mean():
    '''
    沒有將virtual ap整合成一個ap
    preprocess raw dataset into fingerprint.txt
    args: path list
    output:Fingerprint.txt
    '''
    print("using preprocess_no_mean, selecting the last two virtual ap, kind of random")
    folder = "Data/NTUH_0904"
    #folder = "Data/0904"
    
    path_list = list(sorted(os.listdir(folder)))

    fd = open("./Fingerprint.txt", 'w')
    #total_count = 0

    for path in path_list:
        count = 0
        path = os.path.join(folder, path)
        for file in sorted(os.listdir(path)):
            pos = file.strip('.txt')
            if pos[-1] == ")":
                pos = pos[:-3]
            print(f"Position,{pos}", file = fd)
            count += 1
            with open(os.path.join(path,file),'r') as f:
                lines = f.readlines()[1:]
                d = dict()
                
                for line in lines:
                    line = line.split('|')
                    BSSID = line[2][:-3]
                    #print(BSSID)
                    if BSSID not in BSSID_list:
                        continue
                    RSSI = float(line[3].strip("dBm"))
                    if BSSID not in d:
                        d[BSSID] = RSSI
                    else:
                        d[BSSID + "_"] = RSSI
                        
                for BSSID in d.keys():
                    #avg_RSSI = int(sum(d[BSSID]) / len(d[BSSID]))
                    print(f"{BSSID},{d[BSSID]}",file = fd)
        print(f"the folder contains position count = {count}")
    #print(f"total positions = {total_count}")
    fd.close()

In [6]:
def preprocess_no_mean_v2():
    '''
    沒有將virtual ap整合成一個ap
    是最詳盡的一個不過train起來很heavy
    '''
    print("using preprocess_no_mean_v2 with the most detailed information, all virtual aps are take into consideration")
    
    folder = "Data/NTUH_0904"
    #folder = "Data/0904"
    
    path_list = list(sorted(os.listdir(folder)))

    fd = open("./Fingerprint.txt", 'w')
    #total_count = 0

    for path in path_list:
        count = 0
        path = os.path.join(folder, path)
        for file in sorted(os.listdir(path)):
            pos = file.strip('.txt')
            if pos[-1] == ")":
                pos = pos[:-3]
            print(f"Position,{pos}", file = fd)
            count += 1
            with open(os.path.join(path,file),'r') as f:
                lines = f.readlines()[1:]
                d = dict()
                
                for line in lines:
                    line = line.split('|')
                    BSSID = line[2]
                    #print(BSSID)
                    if BSSID[:-3] not in BSSID_list:
                        continue
                    
                    RSSI = float(line[3].strip("dBm"))
                    if BSSID not in d:
                        d[BSSID] = RSSI
                    else:
                        d[BSSID + "_"] = RSSI
                        
                for BSSID in d.keys():
                    #avg_RSSI = int(sum(d[BSSID]) / len(d[BSSID]))
                    print(f"{BSSID},{d[BSSID]}",file = fd)
        print(f"the folder contains position count = {count}")
    #print(f"total positions = {total_count}")
    fd.close()

In [7]:
def save_fingerprint(dir_path, training_wifi_pos, testing_wifi_pos):
    print('='*50)
    print(dir_path)
    print('training wifi pos shape', training_wifi_pos.shape)
    print('testing wifi pos shape', testing_wifi_pos.shape)

    training_wifi_pos_path = os.path.join(dir_path, 'training_wifi_pos.npy') 
    testing_wifi_pos_path = os.path.join(dir_path, 'testing_wifi_pos.npy') 

    np.save(training_wifi_pos_path, training_wifi_pos)
    np.save(testing_wifi_pos_path, testing_wifi_pos)

In [8]:
#for preprocessing Fingerprint.txt to training & testing data
def NTUH_2F():
    fingerprint_file_path = "./Fingerprint.txt"
    
    bssid_index = {}
    training_wifi_pos = []
    testing_wifi_pos = []

    with open(fingerprint_file_path, 'r') as f:
        position_index = []
        index = 0
        lines = f.read().splitlines()
        for i in range(len(lines)):
            elements = lines[i].split(',')
            if elements[0] == 'Position':
                position_index.append(i)
            else:
                bssid, rssi = elements
                if bssid not in bssid_index:
                    bssid_index[bssid] = index
                    index += 1
        
        total_APs = len(bssid_index)
        print(f"total ap = {total_APs}")
        testing_set = set()
        
        for p_idx in position_index:
            fingerprint = np.full(total_APs+2, -100, dtype=np.float64)
            _, x, y = lines[p_idx].split(',')
            #pos = (eval(x)/100, eval(y)/100)
            pos = (eval(x), eval(y))
            fingerprint[-2:] = pos

            p_idx += 1
            elements = lines[p_idx].split(',')
            while len(elements) == 2:
                bssid, rssi = elements
                bssid, rssi = bssid_index[bssid], eval(rssi)
                fingerprint[bssid] = rssi
                p_idx += 1
                if p_idx == len(lines): 
                    break
                elements = lines[p_idx].split(',')
            print(fingerprint)
            if pos not in testing_set:
                testing_set.add(pos)
                testing_wifi_pos.append(fingerprint)
            else:
                training_wifi_pos.append(fingerprint)
        #print(len(training_wifi_pos))
        #print((testing_wifi_pos))
        training_wifi_pos = np.array(training_wifi_pos, dtype=np.float64)
        testing_wifi_pos = np.array(testing_wifi_pos, dtype=np.float64)
        save_fingerprint("./Dataset/NTUH_2F", training_wifi_pos, testing_wifi_pos)

In [9]:
preprocess_no_mean_v2()
#preprocess_v2()
NTUH_2F()

using preprocess_no_mean_v2 with the most detailed information, all virtual aps are take into consideration
the folder contains position count = 56
the folder contains position count = 96
total ap = 544
[ -82.  -83.  -83.  -84.  -84.  -84.  -84.  -84.  -79.  -79.  -79.  -79.
  -78.  -79.  -79.  -78.  -79.  -76.  -78.  -78.  -80.  -78.  -76.  -76.
  -76.  -76.  -91.  -63.  -63.  -64.  -63.  -79.  -65.  -65.  -79.  -78.
  -80.  -77.  -78.  -78.  -78.  -77.  -81.  -65.  -64.  -66.  -65.  -68.
  -65.  -65.  -69.  -68.  -68.  -68.  -67.  -70.  -69.  -68.  -68.  -67.
  -81.  -81.  -81.  -81.  -81.  -81.  -88.  -88.  -88.  -87.  -87.  -87.
  -87.  -87.  -87.  -89.  -81.  -80.  -79.  -83.  -83.  -83.  -83.  -84.
  -85.  -85.  -85.  -83.  -86.  -83.  -82.  -82.  -81.  -79.  -81.  -84.
  -83.  -81.  -78.  -83.  -77.  -82.  -83.  -77.  -77.  -78.  -78.  -77.
  -75.  -75.  -75.  -72.  -75.  -74.  -71.  -72.  -71.  -72.  -71.  -71.
  -73.  -71.  -71.  -84.  -83.  -82.  -82.  -85.  -83.  -83.  -83. 