## 1. Load data collected this year.

In [1]:
import urllib2
import json
import pandas as pd
import os
import numpy as np
import datetime as dt
import sys
import csv

In [2]:
## Data collected on 06/20/2017

columns = ['lat', 'lng', 'ssid', 'bssid', 'acc', 'time', 'level']
startdate1 = '06/20/2017'
enddate1 = '06/21/2017'
device_model = 'Nexus%204'

In [3]:
url1 = "http://wifindproject.com/wifipulling/\
?columns={}\
&startdate={}&enddate={}\
&device_model={}\
&timeformat=1".format("%7C".join(columns), startdate1, enddate1, device_model)
response1 = urllib2.urlopen(url1)
data1 = response1.read()
df1 = pd.read_json(data1)

In [4]:
result1 = df1[df1.time <= '06-20-2017 18:00:00']
result1.bssid.unique().shape

(2002,)

In [5]:
## Data collected on 07/02/2017

columns = ['lat', 'lng', 'ssid', 'bssid', 'acc', 'time', 'level']
startdate2 = '07/02/2017'
enddate2 = '07/03/2017'
device_model = 'Nexus%204'

In [6]:
url2 = "http://wifindproject.com/wifipulling/\
?columns={}\
&startdate={}&enddate={}\
&device_model={}\
&timeformat=1".format("%7C".join(columns), startdate2, enddate2, device_model)
response2 = urllib2.urlopen(url2)
data2 = response2.read()
df2 = pd.read_json(data2)

In [8]:
result2 = df2[df2.time <= '07-02-2017 13:30:00']
result2.bssid.unique().shape

(3263,)

## 2. Filter the points around housing projects.

In [9]:
from shapely.geometry import Point, Polygon
import datetime as dt
import geopandas as gpd
from fiona.crs import from_epsg
import pyproj
import mplleaflet
import folium
import shapefile as shp
import math
import matplotlib.pyplot as plt
%matplotlib inline

In [10]:
# Transform datasets to geometry data

result1['geo'] = zip(result1.lng, result1.lat)
result1['geometry'] = map(lambda x: Point(x), zip(result1.lng, result1.lat))
result1.head(1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,acc,bssid,lat,level,lng,ssid,time,geo,geometry
0,17.0,24:c9:a1:cc:af:48,40.693674,-67,-73.982557,twcwifi-passpoint,06-20-2017 16:56:05,"(-73.98255674, 40.69367447)",POINT (-73.98255674000001 40.69367447)


In [11]:
# Outline the range of public housing projects

outline = Polygon([[-73.982483, 40.693547], [-73.982397, 40.695865], [-73.980369, 40.696199], [-73.973460, 40.695906], \
[-73.972880, 40.693221]])

# Filter points within the range of housing projects

result3 = result1[result1.geometry.apply(outline.contains)]
result3.head(1)

Unnamed: 0,acc,bssid,lat,level,lng,ssid,time,geo,geometry
106,14.0,d4:05:98:4c:f8:55,40.695681,-88,-73.98238,tg1672g52-5g,06-20-2017 16:57:17,"(-73.98238012, 40.69568089)",POINT (-73.98238012 40.69568089)


In [12]:
result2['geo'] = zip(result2.lng, result2.lat)
result2['geometry'] = map(lambda x: Point(x), zip(result2.lng, result2.lat))
result2.head(1)

Unnamed: 0,acc,bssid,lat,level,lng,ssid,time,geo,geometry
0,16,c4:10:8a:1d:a8:48,40.690576,-78,-73.978865,downtownbrooklynwifi_fon,07-02-2017 12:14:15,"(-73.97886496, 40.690576)",POINT (-73.97886496 40.690576)


In [48]:
frames = [result2, result3]
df = pd.concat(frames)

In [59]:
# groupby geo, unique bssid
access_count = df.groupby(df.geo).apply(lambda x: len(x.bssid.unique()))
access_bssidList = df.groupby(df.geo).apply(lambda x: list(x.bssid.unique()))
df = pd.DataFrame(map(lambda x: Point(x), access_count.index), columns=['geometry'])
df['unique_bssid_count'] = access_count.values
df['unique_bssid_list'] = access_bssidList.values

In [67]:
#crs
df= gpd.GeoDataFrame(df)
df.crs = from_epsg(4326)
df.to_crs(epsg=2263, inplace=True)
df.to_pickle('unique_bssid.p')

## 3. Generate grid containing unique 'bssid'.

In [68]:
cell_length = 25 # usft
delete_empty_cell = True

# grid boundry
all_x = map(lambda p: p.x, df.geometry)
all_y = map(lambda p: p.y, df.geometry)
minx, maxx, miny, maxy = min(all_x), max(all_x), min(all_y), max(all_y) 

# grid length
dx = cell_length
dy = cell_length
nx = int(math.ceil(abs(maxx - minx)/dx))
ny = int(math.ceil(abs(maxy - miny)/dy))

# grid plotting
w = shp.Writer(shp.POLYGON)
w.autoBalance = 1
w.field("ID")
id=0
for i in range(ny):
    for j in range(nx):
        id+=1
        vertices = []
        parts = []
        vertices.append([min(minx+dx*j,maxx),max(maxy-dy*i,miny)])
        vertices.append([min(minx+dx*(j+1),maxx),max(maxy-dy*i,miny)])
        vertices.append([min(minx+dx*(j+1),maxx),max(maxy-dy*(i+1),miny)])
        vertices.append([min(minx+dx*j,maxx),max(maxy-dy*(i+1),miny)])
        parts.append(vertices)
        w.poly(parts)
        w.record(id)
w.save('polygon_grid')

In [70]:
# read data: TBD
grid = gpd.read_file('./polygon_grid.shp')
grid.crs = from_epsg(2263)
uni_bssid = pd.read_pickle("./unique_bssid.p")
uni_bssid = gpd.GeoDataFrame(uni_bssid)
uni_bssid.crs = from_epsg(2263)

# geo points in which cell?
PointInPoly = gpd.sjoin(uni_bssid, grid, how='left', op='intersects')
PointInPoly.dropna(subset=['ID'], inplace=True)

In [93]:
# groupby cell.ID to get list of bssid (with duplications) for each cell, then calculate length of unique bssid "uni"
grouped = PointInPoly.groupby('ID').apply(lambda x: reduce(lambda x,y: x+y, x.unique_bssid_list))
bssidInPoly = pd.DataFrame(grouped, columns=['all_bssid_list'])
bssidInPoly['unique_bssid_list'] = map(lambda x: set(x), grouped)
bssidInPoly['cum'] = map(lambda x: len(x), grouped)
bssidInPoly['uni'] = map(lambda x: len(set(x)), grouped)
bssidInPoly['ID'] = bssidInPoly.index
bssidInPoly.reset_index(drop=True, inplace=True)

# merge grid and bssidInPoly
grid_bssid = pd.merge(grid, bssidInPoly, how='left', on='ID')
grid_bssid.to_crs(epsg=2263, inplace=True)
if not delete_empty_cell: 
    grid_bssid.uni.fillna(inplace=True, value=0)
    
# Data for Plot
grid_plot = grid_bssid.loc[:, ['ID', 'uni', 'geometry']]
grid_plot.dropna(subset=['uni'], inplace=True) 

In [92]:
grid_plot.to_file('all_grid.shp')

## 4. Generate grid containing unique 'bssid' for free wifi.

In [97]:
free_wifi=['#flatiron free wifi', 'freewifibysurface','bryantpark.org', 'downtownbrooklynwifi_fon',
'linknyc free wi-fi', 'metrotech','usp park wifi', 'red hook wifi']

frames = [result2, result3]
df1 = pd.concat(frames)

free = df1[df1.ssid.isin(free_wifi)]
free.ssid.unique()

array([u'downtownbrooklynwifi_fon', u'linknyc free wi-fi'], dtype=object)

In [99]:
# groupby geo, unique bssid
free_access_count = free.groupby(free.geo).apply(lambda x: len(x.bssid.unique()))
free_access_bssidList = free.groupby(free.geo).apply(lambda x: list(x.bssid.unique()))
free = pd.DataFrame(map(lambda x: Point(x), free_access_count.index), columns=['geometry'])
free['free_unique_bssid_count'] = free_access_count.values
free['free_unique_bssid_list'] = free_access_bssidList.values

In [101]:
#crs
free = gpd.GeoDataFrame(free)
free.crs = from_epsg(4326)
free.to_crs(epsg=2263, inplace=True)
free.to_pickle('free_unique_bssid.p')

In [102]:
# read data: TBD
grid = gpd.read_file('./polygon_grid.shp')
grid.crs = from_epsg(2263)
free_uni_bssid = pd.read_pickle("./free_unique_bssid.p")
free_uni_bssid = gpd.GeoDataFrame(free_uni_bssid)
free_uni_bssid.crs = from_epsg(2263)

# geo points in which cell?
FreePointInPoly = gpd.sjoin(free_uni_bssid, grid, how='left', op='intersects')
FreePointInPoly.dropna(subset=['ID'], inplace=True)

In [103]:
# groupby cell.ID to get list of bssid (with duplications) for each cell, then calculate length of unique bssid "uni"
grouped = FreePointInPoly.groupby('ID').apply(lambda x: reduce(lambda x,y: x+y, x.free_unique_bssid_list))
free_bssidInPoly = pd.DataFrame(grouped, columns=['free_bssid_list'])
free_bssidInPoly['free_unique_bssid_list'] = map(lambda x: set(x), grouped)
free_bssidInPoly['cum'] = map(lambda x: len(x), grouped)
free_bssidInPoly['uni'] = map(lambda x: len(set(x)), grouped)
free_bssidInPoly['ID'] = free_bssidInPoly.index
free_bssidInPoly.reset_index(drop=True, inplace=True)

# merge grid and bssidInPoly
free_grid_bssid = pd.merge(grid, free_bssidInPoly, how='left', on='ID')
free_grid_bssid.to_crs(epsg=2263, inplace=True)
if not delete_empty_cell: 
    free_grid_bssid.uni.fillna(inplace=True, value=0)
    
# Data for Plot
free_grid_plot = free_grid_bssid.loc[:, ['ID', 'uni', 'geometry']]
free_grid_plot.dropna(subset=['uni'], inplace=True) 


In [104]:
free_grid_plot.to_file('free_grid.shp')