In [1]:
import pandas as pd
import numpy as np
import re
from datetime import datetime as dt
from scipy.spatial import distance
import scipy.stats as stats
import itertools as it

%matplotlib inline

In [2]:
df = pd.read_csv(".\\_generated\\wifi_1.data", index_col = False, header = None, low_memory = False, \
                 names = ['timestamp', 'uuid', 'bssid', 'ssid', 'capabilities', \
                          'freq0', 'freq1', 'chwidth', 'freq', 'level', 'operator', \
                          'time', 'venue', '802', 'passpoint'])

In [3]:
df['timestamp'] = df['timestamp'].apply(lambda x: dt.strptime(x, '%d.%m.%Y_%H:%M:%S.%f'))
df.index = pd.DatetimeIndex(df.timestamp)
df = df.drop(df.columns.difference(['uuid', 'bssid']), axis = 1)
df = df.sort_index()

In [4]:
bssid_map = { bssid.replace(' ', ''): 0 for bssid in df.bssid.unique() }

In [5]:
def update_map(bssid):
    bssid_map[bssid] += 1
    return bssid

In [6]:
_ = df.bssid.apply(lambda bssid: update_map(bssid))

In [7]:
bssid_map = dict(sorted(bssid_map.items(), key=lambda item: item[1], reverse=True)[:80])

In [8]:
sum_of_freq = 0
for key, value in bssid_map.items():
    sum_of_freq += value
    
bssid_map = { key: value / sum_of_freq for key, value in bssid_map.items() }

In [9]:
bssid_map

{'34:ce:00:4d:64:1e': 0.04389406345484589,
 '34:ce:00:4d:64:1d': 0.04337527347247736,
 '40:b4:f0:28:29:c3': 0.04262570502820113,
 '40:b4:f0:28:29:c1': 0.04262570502820113,
 '50:d4:f7:66:35:d0': 0.040964100102465636,
 '40:b4:f0:28:29:c2': 0.03946311698621791,
 '40:b4:f0:28:29:c0': 0.03900709874548828,
 'd4:6e:0e:a0:40:56': 0.03855108050475865,
 '20:4e:7f:75:b5:36': 0.038418152110699814,
 'b0:a8:6e:41:ec:c2': 0.027457098283931357,
 'b0:a8:6e:41:ec:c1': 0.02696784794468702,
 'b0:a8:6e:41:ec:c3': 0.026954924350820187,
 '3c:8a:b0:f9:27:02': 0.026659527919578322,
 'b0:a8:6e:41:ec:c0': 0.02661152599950152,
 '10:0e:7e:17:02:c2': 0.02365940791477813,
 '3c:8a:b0:f9:27:00': 0.023020613132217597,
 '10:0e:7e:17:02:c0': 0.02122977226781379,
 '50:64:2b:4e:08:dd': 0.019887564733358565,
 '2c:4d:54:5b:80:28': 0.019708480646918186,
 'b0:a8:6e:41:e4:82': 0.01921553785228332,
 'bc:ee:7b:68:0f:4c': 0.017991488890324845,
 '40:31:3c:28:93:ed': 0.01724007421835335,
 'b0:a8:6e:41:e4:80': 0.016800672026881076,
 

In [10]:
df_2 = pd.read_csv(".\\_generated\\wifi_2.data", index_col = False, header = None, low_memory = False, \
                 names = ['timestamp', 'uuid', 'bssid', 'ssid', 'capabilities', \
                          'freq0', 'freq1', 'chwidth', 'freq', 'level', 'operator', \
                          'time', 'venue', '802', 'passpoint'])
df_3 = pd.read_csv(".\\_generated\\wifi_3.data", index_col = False, header = None, low_memory = False, \
                 names = ['timestamp', 'uuid', 'bssid', 'ssid', 'capabilities', \
                          'freq0', 'freq1', 'chwidth', 'freq', 'level', 'operator', \
                          'time', 'venue', '802', 'passpoint'])

df_ = pd.concat([df_2, df_3])

df_['timestamp'] = df_['timestamp'].apply(lambda x: dt.strptime(x, '%d.%m.%Y_%H:%M:%S.%f'))
df_.index = pd.DatetimeIndex(df_.timestamp)
df_ = df_.drop(df_.columns.difference(['uuid', 'bssid']), axis = 1)
df_ = df_.sort_index()

In [17]:
bssid_map_ = { key: 0 for key, value in bssid_map.items() }

def update_invalid_map(bssid):
    if bssid in bssid_map.keys():
        bssid_map[bssid] += 1
    return bssid

_ = df_.bssid.apply(lambda bssid: update_invalid_map(bssid))

In [18]:
bssid_map_

{'34:ce:00:4d:64:1e': 0,
 '34:ce:00:4d:64:1d': 0,
 '40:b4:f0:28:29:c3': 0,
 '40:b4:f0:28:29:c1': 0,
 '50:d4:f7:66:35:d0': 0,
 '40:b4:f0:28:29:c2': 0,
 '40:b4:f0:28:29:c0': 0,
 'd4:6e:0e:a0:40:56': 0,
 '20:4e:7f:75:b5:36': 0,
 'b0:a8:6e:41:ec:c2': 0,
 'b0:a8:6e:41:ec:c1': 0,
 'b0:a8:6e:41:ec:c3': 0,
 '3c:8a:b0:f9:27:02': 0,
 'b0:a8:6e:41:ec:c0': 0,
 '10:0e:7e:17:02:c2': 0,
 '3c:8a:b0:f9:27:00': 0,
 '10:0e:7e:17:02:c0': 0,
 '50:64:2b:4e:08:dd': 0,
 '2c:4d:54:5b:80:28': 0,
 'b0:a8:6e:41:e4:82': 0,
 'bc:ee:7b:68:0f:4c': 0,
 '40:31:3c:28:93:ed': 0,
 'b0:a8:6e:41:e4:80': 0,
 'd8:47:32:c3:1b:90': 0,
 'c0:25:e9:92:cc:5a': 0,
 '5e:ea:1d:8b:34:df': 0,
 '2c:fd:a1:05:25:c4': 0,
 '40:b4:f0:9b:f9:00': 0,
 'ec:08:6b:c9:c4:a0': 0,
 '40:b4:f0:9b:f9:02': 0,
 'f4:f2:6d:4c:5e:ea': 0,
 '7c:8b:ca:2c:6c:a8': 0,
 'ea:d8:d1:7a:da:af': 0,
 '0c:8f:ff:81:2b:f8': 0,
 '50:ff:20:1f:55:44': 0,
 'fc:8b:97:54:97:ea': 0,
 '74:d0:2b:69:2e:54': 0,
 '30:b5:c2:3f:ab:e2': 0,
 '10:0e:7e:17:02:c1': 0,
 '10:0e:7e:17:02:c3': 0,


In [None]:
sum_of_freq_ = 0
for key, value in bssid_map_.items():
    sum_of_freq_ += value
    
bssid_map_ = { key: value / sum_of_freq_ for key, value in bssid_map.items() }

In [None]:
bssid_map_

In [4]:
def agg_string_join(col):
    col = col.apply(lambda x: str(x))
    return col.str.cat(sep = ',').replace(' ', '')

In [8]:
df_quantum = df.groupby(['timestamp', 'uuid']).agg({'bssid' : agg_string_join})

In [10]:
df_quantum

Unnamed: 0_level_0,Unnamed: 1_level_0,bssid
timestamp,uuid,Unnamed: 2_level_1
2020-12-06 17:56:05.536,b018455a-f744-4479-88c8-2328ad3efc5f,"34:ce:00:4d:64:1d,b0:a8:6e:41:ec:c3,b0:a8:6e:4..."
2020-12-06 17:56:08.521,615c3cad-870e-461b-b49f-bee42bdcc3e6,"b0:a8:6e:41:e4:82,3c:8a:b0:f9:27:02,3c:8a:b0:f..."
2020-12-06 17:56:12.036,db79802c-7f27-4cb5-bf03-2ce6e751b13f,"3c:8a:b0:f9:27:00,3c:8a:b0:f9:27:02,10:0e:7e:1..."
2020-12-06 17:56:19.939,03a1ca3e-5208-44e6-94ca-0f5f0242b98b,"bc:ee:7b:68:0f:4c,34:ce:00:4d:64:1d,34:ce:00:4..."
2020-12-06 17:56:26.836,841e61eb-5cd2-47a6-890e-28e61e254000,"10:0e:7e:17:02:c2,d4:6e:0e:a0:40:56,fc:8b:97:5..."
2020-12-06 17:56:30.335,6f976e70-5067-4136-b96d-6a6f9b30e801,"40:b4:f0:28:29:c3,50:d4:f7:66:35:d0,40:b4:f0:2..."
2020-12-06 17:56:34.951,706d09df-f988-4cff-871d-a1be3d267202,"d4:6e:0e:a0:40:56,2c:fd:a1:05:25:c4,b0:a8:6e:4..."
2020-12-06 17:56:39.942,4d439a12-5d16-4316-a726-a04c6a1c8338,"2c:fd:a1:05:25:c4,2c:56:dc:40:7a:3c,10:0e:7e:1..."
2020-12-06 17:56:44.326,7e058500-7129-4639-b847-79c7469b8d64,"40:b4:f0:28:29:c3,40:b4:f0:28:29:c1,10:0e:7e:1..."
2020-12-06 17:56:49.361,62de2702-a78d-4d10-8fb2-3f5861516709,"3c:8a:b0:f9:27:02,b0:a8:6e:41:e4:82,10:0e:7e:1..."
