# Import Headers

In [0]:
import numpy as np
import pandas as pd
import datetime
import tqdm

#Load Data

In [0]:
wifi_ds = pd.read_csv('extract_network.csv')

#Start Work

### Explore Data

In [4]:
wifi_ds.dtypes

suid           object
sensing_ts     object
ssid           object
bssid          object
rssi          float64
dtype: object

In [5]:
wifi_ds

Unnamed: 0,suid,sensing_ts,ssid,bssid,rssi
0,017e47f973145d25ae16c3913afa46923517101199ff0a...,2018-04-08 12:13:00.966,sisma,1e:5f:2f:ce:d9:a0,-81.0
1,017e47f973145d25ae16c3913afa46923517101199ff0a...,2018-04-08 12:13:00.972,FREEBOX_AGNES_AP,2e:90:48:12:e9:14,-84.0
2,017e47f973145d25ae16c3913afa46923517101199ff0a...,2018-04-08 12:13:00.973,FreeWifi,1e:5f:2f:ce:d9:a2,-83.0
3,017e47f973145d25ae16c3913afa46923517101199ff0a...,2018-04-08 12:13:00.973,FreeWifi_secure,1e:5f:2f:ce:d9:a3,-84.0
4,017e47f973145d25ae16c3913afa46923517101199ff0a...,2018-04-08 12:13:00.973,FreeWifi_secure,f4:ca:e5:a1:2c:5a,-85.0
...,...,...,...,...,...
180269,69eec04fc9eea08529a35f9cc091765396cf44f495afdd...,2018-04-30 19:21:07.006,,fe:a6:67:0a:b4:e3,-85.0
180270,69eec04fc9eea08529a35f9cc091765396cf44f495afdd...,2018-04-30 19:21:07.006,DIRECT-BO[TV]rayda,be:8c:cd:7c:28:84,-64.0
180271,69eec04fc9eea08529a35f9cc091765396cf44f495afdd...,2018-04-30 19:21:07.006,Smith,40:70:09:9f:d0:b0,-70.0
180272,69eec04fc9eea08529a35f9cc091765396cf44f495afdd...,2018-04-30 19:21:07.006,Smith-5G,40:70:09:9f:d0:b5,-81.0


In [6]:
print("Data shape: " + str(wifi_ds.shape))
print("NaN counts: \n" + str(wifi_ds.isna().sum()) + "\n")
print("#Unique devices: " + str(len(wifi_ds['suid'].unique())))
print("#Unique Wifis: " + str(len(wifi_ds['ssid'].unique())))
print("#Unique MACs: " + str(len(wifi_ds['bssid'].unique())))
print('First datetime of records: \t' + str(wifi_ds['sensing_ts'].min()))
print('Last datetime of records: \t' + str(wifi_ds['sensing_ts'].max()))
print('#Seen WIFI by device: ')
for suid in wifi_ds['suid'].unique():
  print(str(suid) + " => \t(" + str(len(wifi_ds[wifi_ds.suid==suid]['bssid'])) + " , UNIQUES: \t" + str(len(wifi_ds[wifi_ds.suid==suid]['bssid'].unique())) + ")")

Data shape: (180274, 5)
NaN counts: 
suid              0
sensing_ts        0
ssid          12532
bssid             0
rssi             92
dtype: int64

#Unique devices: 27
#Unique Wifis: 4315
#Unique MACs: 9896
First datetime of records: 	2018-04-01 00:00:08.497
Last datetime of records: 	2018-04-30 23:14:49.298
#Seen WIFI by device: 
017e47f973145d25ae16c3913afa46923517101199ff0aa7a45eac16f61d33b0 => 	(434 , UNIQUES: 	55)
01da16ee1790a27db25cfc761c80506091a902dd9e3af44033cd635ded882987 => 	(3056 , UNIQUES: 	41)
0d3cf1db31f588de26e760cc1e7acaf1b5f60b24b14e6db85fde4d2347532680 => 	(2385 , UNIQUES: 	106)
0d7941d091ddfb741c3eb4b00efcb04802cab271128308034f4af359e85298e6 => 	(2370 , UNIQUES: 	21)
0e2b6574d9102df830608db713b3533e12426a3051fa10729f94ab705df66519 => 	(10182 , UNIQUES: 	1837)
0ffb80acdcf5e11bfa483740ac141df29a37388ecc6abfcc3b619740e8a1f841 => 	(1145 , UNIQUES: 	5)
11720a42649d41db48258c6817cba9d774f49f5d557ae5dbbdeebf510184d176 => 	(1214 , UNIQUES: 	59)
118a3b8f9241bd791fdb2d4e9

###Clean data

In [0]:
wifi_ds.drop([110796], inplace=True)

In [0]:
wifi_ds['sensing_ts'] = pd.to_datetime(wifi_ds['sensing_ts'])

###Detect when someone is at home or at work.

In [0]:
def getWifisPerDeviceInTimeInterval(dataset, suid, 
                                    hour_start_range, min_start_range, 
                                    hour_end_range, min_end_range, 
                                    min_delta_duration):
  TIME_STEP = 30
  time_blocks_count = min_delta_duration / TIME_STEP
  wifiPerDeviceInTimeRange = dataset[(wifi_ds.suid == suid)].sort_values(by=['sensing_ts'])
  wifis_list = wifiPerDeviceInTimeRange['bssid']
  stop_dt = dataset[wifi_ds.suid == suid]['sensing_ts'].max().to_pydatetime()
  result = []
  for wifi in tqdm.tqdm(list(set(wifis_list))):
    if len(result)> 0:
      print('Result: ' + str(result)) #TODO remove
    wifi_time_availability_count = 0
    current_dt = dataset[wifi_ds.suid == suid]['sensing_ts'].min().to_pydatetime()
    while((current_dt < stop_dt)):
      
      # print('I am in the WHILE loop at current_dt: ' + str(current_dt)) #TODO remove
      wifis_available = wifiPerDeviceInTimeRange[(wifiPerDeviceInTimeRange.sensing_ts >= current_dt) & 
                                                 (wifiPerDeviceInTimeRange.sensing_ts < (current_dt + datetime.timedelta(minutes=30)))]['bssid']
      if wifi in wifis_available:
        print(str(wifi in wifis_available))
        wifi_time_availability_count +=1
        print('wifi_time_availability_count: ' + str(wifi_time_availability_count))
      else:
        print('at ' + str(current_dt) + ', ' + str(wifi) + 'is no avalable in the list' + str(list(wifis_available)))
        wifi_time_availability_count = 0
      
      current_dt = current_dt + datetime.timedelta(minutes=30)
      # print('current_dt: ' + str(current_dt)) #TODO remove
      if (wifi_time_availability_count >= time_blocks_count):
        result.append(wifi)

  return result