In [1]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, Polygon
from statistics import mean 
import json

In [2]:
right_json = gpd.read_file("right_sidewalk.json")
left_json = gpd.read_file("left_sidewalk.json")

In [3]:
city_limits = gpd.read_file('citylimit.geojson')
city_polygon = city_limits['geometry'][0]

In [4]:
# add columns for lat/longs in point form
def make_point(row, lat, long):
    return Point(row[long], row[lat])

right_json['startPoint'] = right_json.apply(make_point, axis=1, args=('LatStart', 'LongStart',))
right_json['endPoint'] = right_json.apply(make_point, axis=1, args=('LatEnd', 'LongEnd',))
left_json['startPoint'] = left_json.apply(make_point, axis=1, args=('LatStart', 'LongStart',))
left_json['endPoint'] = left_json.apply(make_point, axis=1, args=('LatEnd', 'LongEnd',))

def start_or_end_in_city_limits(row):
    return (row['startPoint'].within(city_polygon) or row['endPoint'].within(city_polygon))

right_json = right_json[right_json.apply(start_or_end_in_city_limits, axis=1)]
left_json = left_json[left_json.apply(start_or_end_in_city_limits, axis=1)]

assert len(right_json)==len(left_json)

In [5]:
right_json = right_json.reset_index()
left_json = left_json.reset_index()

In [6]:
r_dirs = []
l_dirs = []
for idx in range(len(left_json)):
    try:
        r_dir = ""
        l_dir = ""

        l_coord_list = list(left_json['geometry'][idx].coords)
        l_lat_avg = mean([a[1] for a in l_coord_list])
        l_long_avg = mean([a[0] for a in l_coord_list])

        r_coord_list = list(right_json['geometry'][idx].coords)
        r_lat_avg = mean([a[1] for a in r_coord_list])
        r_long_avg = mean([a[0] for a in r_coord_list])

        lat_dist = l_lat_avg - r_lat_avg
        long_dist = l_long_avg - r_long_avg

        if abs(lat_dist) > abs(long_dist):
            if lat_dist > 0:
                l_dir = "N"
                r_dir = "S"
            else:
                l_dir = "S"
                r_dir = "N"
        else:
            if long_dist > 0:
                # gets more negative as you move east
                # so if left long is greater than right long 
                # that means it's less negative
                # so left is west
                l_dir = "W"
                r_dir = "E"
            else:
                l_dir = "E"
                r_dir = "W"

        r_dirs.append(r_dir)
        l_dirs.append(l_dir)
    except Exception as ex:
        print("skipping idx {}".format(idx))
        print(ex)

In [7]:
right_json['whichArgisFile'] = 'right'
left_json['whichArgisFile'] = 'left'

In [44]:
## Make new dataframe, which is the one we'll actually import into sql 
df_to_load = pd.DataFrame()
df_to_load['segmentId'] = right_json['OBJECTID'].append(left_json['OBJECTID'])
df_to_load['directionInd'] = pd.Series(r_dirs).append(pd.Series(l_dirs))
df_to_load['linearId'] = right_json['LINEARID'].append(left_json['LINEARID'])
df_to_load['streetName'] = right_json['FULLNAME'].append(left_json['FULLNAME'])
df_to_load['startLat'] = right_json['LatStart'].append(left_json['LatStart'])
df_to_load['startLong'] = right_json['LongStart'].append(left_json['LongStart'])
df_to_load['endLat'] = right_json['LatEnd'].append(left_json['LatEnd'])
df_to_load['endLong'] = right_json['LongEnd'].append(left_json['LongEnd'])

In [74]:
# keeping this one separate because it takes a little while 
df_to_load['geoJson'] = right_json['geometry'].append(left_json['geometry'])
def turn_to_geojson(row):
    return json.dumps(gpd.GeoSeries([row['geoJson']]).__geo_interface__)

df_to_load['geoJson'] = df_to_load.apply(turn_to_geojson, axis=1)

In [54]:
df_to_load['whichArcgisFile'] = right_json['whichArgisFile'].append(left_json['whichArgisFile'])

In [55]:
# gut check - for a given segmentId, there should be 2 rows, with OPPOSITE directionInd and whichArcgisFile,
# SAME linearId/streetname/lats and longs, but DIFFERENT geoJson's
df_to_load[df_to_load['segmentId']==38027]

Unnamed: 0,segmentId,directionInd,linearId,streetName,startLat,startLong,endLat,endLong,geoJson,whichArcgisFile
20863,38027,W,1103676879311,N 44th St,43.094611,-87.968921,43.095039,-87.96893,"LINESTRING (-87.96886 43.09461, -87.96885 43.0...",right
20863,38027,E,1103676879311,N 44th St,43.094611,-87.968921,43.095039,-87.96893,"LINESTRING (-87.96894 43.09462, -87.96893 43.0...",left


Unnamed: 0,segmentId,directionInd,linearId,streetName,startLat,startLong,endLat,endLong,geoJson,whichArcgisFile
20863,38027,W,1103676879311,N 44th St,43.094611,-87.968921,43.095039,-87.96893,"LINESTRING (-87.96886 43.09461, -87.96885 43.0...",right
20863,38027,E,1103676879311,N 44th St,43.094611,-87.968921,43.095039,-87.96893,"LINESTRING (-87.96894 43.09462, -87.96893 43.0...",left


In [28]:
import pymysql
connection = pymysql.connect(host='aa6f8tcntjfd0z.c5625ddefrth.us-west-2.rds.amazonaws.com',
                             port=3306,
                             user=# ask Emily :) ,
                             password= # ask Emily :) ,
                             db='ebdb')
cursor=connection.cursor()

In [77]:
# NOTE: this method is way slow
# will take over an hour to load in 40,000 rows this way 
# creating column list for insertion
cols = "`,`".join([str(i) for i in df_to_load.columns.tolist()])

# Insert DataFrame recrds one by one.
for i,row in df_to_load.iterrows():
    sql = "INSERT INTO `sidewalk_segment2` (`" +cols + "`) VALUES (" + "%s,"*(len(row)-1) + "%s)"
    cursor.execute(sql, tuple(row))

    # the connection is not autocommitted by default, so we must commit to save our changes
    connection.commit()

  result = self._query(query)
  result = self._query(query)


In [72]:
### maybe the best thing to do for sqlite would just be to add some random test rows 
# through python/flask? rather than try to do this whole export thing