In [1]:
import pandas as pd
import numpy as np
import s3fs
import boto3
from datetime import datetime

## Identify Panos with highway views

In [125]:
intersections = pd.read_csv('data/intersections_metadata.csv')
intersect_panos_bridge = pd.read_csv('data/OutputLines_Intersection_bridge.csv', index_col = 2)
roads = pd.read_csv('data/roads_metadata.csv', index_col=0)
bridge = pd.read_csv('data/intersect_road_bridge.csv')

In [111]:
intersect_panos_bridge.head()

Unnamed: 0_level_0,OBJECTID,FID_OutputLines
FID_Intersections,Unnamed: 1_level_1,Unnamed: 2_level_1
1,1,1
2,2,1
3,3,2
4,4,2
71,5,79


In [113]:
roads.head()

Unnamed: 0_level_0,LINEARID,FULLNAME,RTTYP,MTFCC,LatStart,LongStart,LatEnd,LongEnd,SegLength,Shape_Length
OBJECTID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
1,1103690289752,19th St Pvt,M,S1400,42.956981,-87.937751,42.955854,-87.938082,156.708617,0.001402
2,1103410600564,19th St Pvt,M,S1400,42.956699,-87.936245,42.956981,-87.937751,142.039857,0.001554
3,1103690289751,19th St Pvt,M,S1400,42.957537,-87.938329,42.956981,-87.937751,107.863642,0.001024
4,1103410601970,19th St Pvt,M,S1400,42.955233,-87.937779,42.955852,-87.93667,133.319678,0.001358
5,110459340658,Upper Pkwy N,M,S1400,43.052964,-87.990042,43.052971,-87.992984,616.910245,0.003362


In [25]:
bridge.head()

Unnamed: 0,OBJECTID,FID_Intersections,FID_tl_2017_55079_roads_SplitLin
0,1,1,3
1,2,1,5728
2,3,2,3
3,4,2,5728
4,5,3,4


In [129]:
df = bridge.join(roads,on='FID_tl_2017_55079_roads_SplitLin')
df = df.join(intersect_panos_bridge, on='FID_Intersections', rsuffix='_1')

In [131]:
df = df[['OBJECTID', 'FULLNAME','FID_Intersections', 'RTTYP', 'FID_OutputLines']]
df.columns = ['OBJECTID', 'FULLNAME', 'FID_Intersections', 'RTTYP', 'Pano_ID']

In [None]:
panos_to_remove = df[(df.FULLNAME == ' ') | (df.RTTYP == "I")].Pano_ID.values
panos_to_remove = [int(x) for x in panos_to_remove if ~np.isnan(x)]
panos_to_remove = np.unique(panos_to_remove)
len(panos_to_remove)

In [133]:
pd.Series(panos_to_remove).to_csv('data/interstates.csv', header=['Pano_ID'])

## Delete Highways from S3 bucket

In [134]:
# Create list of filenames:
fnames = []
for pano in panos_to_remove:
    prefix = 'gsv/' + str(pano)
    fnames.append( prefix + '.json')
    for view in ['_45','_135','_225','_315']:
        fnames.append(prefix + view + '.jpg')


In [None]:
# Delete bad images from GSV
bucket_name = 'streetview-w210'
s3 = boto3.resource('s3')
start = datetime.now()

for i,fname in enumerate(fnames):
    try:
        s3.Object(bucket_name, fname).delete()
    except:
        print(fname)

print(datetime.now()-start)

gsv/2083_315.jpg
gsv/2207_225.jpg
gsv/2275_315.jpg
gsv/2278_45.jpg
gsv/2280_135.jpg
gsv/2287.json
gsv/2288_135.jpg
gsv/2289_315.jpg
gsv/2291_225.jpg
gsv/2293.json
gsv/2294_315.jpg
gsv/2296_45.jpg
gsv/2454_315.jpg


## Delete from Metadata and labeling files

In [75]:
# Delete from metadata files:
meta_with_depth = pd.read_csv('data/meta_with_depth.csv', index_col = 0)

In [76]:
meta_with_depth.head()

Unnamed: 0,date,lat,long,pano_id,name,pano_yaw_deg,tilt_yaw_deg,tilt_pitch_deg
0,2019-06,42.957503,-87.938367,XPRpjNDhowVo8zvqvSU1CA,1,91.979996,125.04,0.83
1,2016-10,42.899259,-88.047098,iEyn0apLSZvl4i4alUbfcA,10,145.81999,-178.26999,1.06
2,2011-08,42.921614,-87.881025,1BzC3WoFeJ8U1aUT9Hx8mg,100,359.18,30.769999,2.55
3,2018-09,43.050123,-88.040263,oRN5vilebPS0srDXRPylzw,1000,161.11,67.549995,1.57
4,2019-05,42.959289,-88.026043,KimNSirhP1TzngZkSpc8UA,10000,270.06,-114.81,1.35


In [77]:
meta_with_depth.shape

(19325, 8)

In [78]:
meta_with_depth = meta_with_depth[~(meta_with_depth.name.isin(panos_to_remove))]

In [79]:
meta_with_depth.shape

(19022, 8)

In [81]:
meta_with_depth.to_csv('data/meta_with_depth.csv')

In [82]:
labels = pd.read_csv('data/all_labels.csv')
labels.head()

Unnamed: 0,filename,file_size,file_attributes,region_count,region_id,region_shape_attributes,region_attributes
0,680_45.jpg,49558,{},3,0,"{""name"":""point"",""cx"":108,""cy"":389}","{""Present Curb Ramp"":""1\n""}"
1,680_45.jpg,49558,{},3,1,"{""name"":""point"",""cx"":160,""cy"":389}","{""Present Curb Ramp"":""1""}"
2,680_45.jpg,49558,{},3,2,"{""name"":""point"",""cx"":607,""cy"":453}","{""Present Curb Ramp"":""1""}"
3,680_135.jpg,51194,{},6,0,"{""name"":""point"",""cx"":18,""cy"":475}","{""Present Curb Ramp"":""1""}"
4,680_135.jpg,51194,{},6,1,"{""name"":""point"",""cx"":245,""cy"":413}","{""Present Curb Ramp"":""1""}"


In [95]:
labels['img_id'] = [int(x.split('_')[0]) for x in labels.filename.values]

In [96]:
labels.head()

Unnamed: 0,filename,file_size,file_attributes,region_count,region_id,region_shape_attributes,region_attributes,img_id
0,680_45.jpg,49558,{},3,0,"{""name"":""point"",""cx"":108,""cy"":389}","{""Present Curb Ramp"":""1\n""}",680
1,680_45.jpg,49558,{},3,1,"{""name"":""point"",""cx"":160,""cy"":389}","{""Present Curb Ramp"":""1""}",680
2,680_45.jpg,49558,{},3,2,"{""name"":""point"",""cx"":607,""cy"":453}","{""Present Curb Ramp"":""1""}",680
3,680_135.jpg,51194,{},6,0,"{""name"":""point"",""cx"":18,""cy"":475}","{""Present Curb Ramp"":""1""}",680
4,680_135.jpg,51194,{},6,1,"{""name"":""point"",""cx"":245,""cy"":413}","{""Present Curb Ramp"":""1""}",680


In [97]:
labels.shape

(3480, 8)

In [98]:
labels = labels[~(labels.img_id.isin(panos_to_remove))]
labels.shape

(3440, 8)

In [99]:
labels.to_csv('data/all_labels.csv', index=False)

In [108]:
df[(pano_id)]

Unnamed: 0,OBJECTID,FID_Intersections,RTTYP,Pano_ID
0,1,1,M,1.0
1,2,1,,1.0
2,3,2,M,1.0
3,4,2,,1.0
4,5,3,M,2.0
