# 04. Gather Dataset GSV

Gather a dataset for training and validation purposes from Google Street View images, based on a CSV file of intersections to sample.

Run this whole notebook, then go to the last cell where you will find a GUI to browse locations/images and record hits.

## Configuration

Any configuration that is required to run this notebook can be customized in the next cell

In [1]:
# Output CSV file of "hits", I.E. images where a clear bicycle lane marker was observed,
# for inclusion in the output dataset via the labelling stagef
# Will be written to the 'data_sources' directory
output_hits_filename = 'hits.csv'

# Input CSV file containing candidate intersections from notebook 02 or 03
# Must be in the data_sources directory
candidate_intersections_filename = 'pbn_intersections.csv'

# Width and Height to display each image
image_size = 400

# Google Street View API key filename
# To download Google Street View images via the API, you must have an API Key as per:
#  https://developers.google.com/maps/documentation/streetview/get-api-key
# linked to your Google account and billing information.  Otherwise they don't know who to charge,
# so you won't be able to download the images.
# Store your API key in a file with the name listed below, in the parent of the current working directory
# from which you launched Jupyter Notebook
# Do not share your API key with anyone else!
gsv_api_key_filename = 'apikey.txt'

## Code

In [2]:
# General imports
import os
import sys

import ipywidgets as widgets
from ipywidgets import Button, Layout

import pandas as pd

import google_streetview.api

import matplotlib.pyplot as plt
import matplotlib.image as mpimg

from geopy import Point
from geopy.distance import geodesic

In [3]:
# Derived path for output file
output_hits_path = os.path.join(os.path.abspath(os.pardir), 'data_sources', output_hits_filename)

# Derived GSV download/cache directory
gsv_download_dir = os.path.join(os.path.abspath(os.pardir), 'data_sources', 'gsv')

# Initialize output
hits = open(output_hits_path, 'w')
hits.write('id,offset,image_num\n')
hits.close()

In [4]:
# Load GSV API key
gsv_api_key_path = os.path.join(os.path.abspath(os.pardir), gsv_api_key_filename)

print('Loading Google Street View API key from [{0:s}]'.format(gsv_api_key_path))

with open(gsv_api_key_path) as f:
    api_key = f.readline()
    f.close

Loading Google Street View API key from [E:\Release\minor_thesis\apikey.txt]


In [5]:
# Read intersection list from CSV
candidate_intersections_path = os.path.join(os.path.abspath(os.pardir), 'data_sources', candidate_intersections_filename)

print('Loading list of candidate intersections from [{0:s}]'.format(candidate_intersections_path))
df = pd.read_csv(candidate_intersections_path)
df = df[df['intersection_street'].notnull()]

Loading list of candidate intersections from [E:\Release\minor_thesis\data_sources\pbn_intersections.csv]


In [6]:
df.tail(10)

Unnamed: 0.1,Unnamed: 0,objectid,local_street,town,suburb,city,count,intersection_street,intersection_lat,intersection_lon,bearing,bearing_lat,bearing_lon
387728,16255,40235,VICTORIA STREET,Thornbury,Thornbury,Melbourne,2,BRETT STREET,-37.767647,144.967759,242.0,4846.645979,-37.747687
387729,16255,40235,VICTORIA STREET,Thornbury,Thornbury,Melbourne,2,ELIZABETH STREET,-37.767663,144.967909,242.0,4835.654371,-37.747687
387730,16255,40235,VICTORIA STREET,Thornbury,Thornbury,Melbourne,2,NASH STREET,-37.76771,144.968363,241.0,4802.562009,-37.747687
387731,16255,40235,VICTORIA STREET,Thornbury,Thornbury,Melbourne,2,MARKS STREET,-37.767759,144.968806,241.0,4770.531053,-37.747687
387732,16255,40235,VICTORIA STREET,Thornbury,Thornbury,Melbourne,2,SOUTH AUDLEY STREET,-37.767795,144.96914,241.0,4746.388613,-37.747687
387733,16255,40235,VICTORIA STREET,Thornbury,Thornbury,Melbourne,2,ABERDEEN STREET,-37.767905,144.970168,240.0,4672.510854,-37.747687
387734,16255,40235,VICTORIA STREET,Thornbury,Thornbury,Melbourne,2,COLLACE STREET,-37.768018,144.971228,239.0,4596.936721,-37.747687
387743,16264,40245,LAHINCH STREET,Thornbury,Thornbury,Melbourne,2,OPAL STREET,-37.744379,145.017325,7.0,155.689616,-37.746675
387744,16264,40245,LAHINCH STREET,Thornbury,Thornbury,Melbourne,2,OVANDO STREET,-37.745761,145.017106,27.0,1.124818,-37.746675
387745,16264,40245,LAHINCH STREET,Thornbury,Thornbury,Melbourne,2,BELL STREET,-37.746587,145.016972,187.0,10.045779,-37.746675


In [7]:
# Define a function to project a certain "offset" number of metres from a point, along a particular bearing
# The offset can be negative
# This function will allow us to request a point slightly away from the dead centre of an intersection,
# in intervals of +/- 10 metres to get a distinct image from Google
def offset_coordinates(lat1, lon1, bearing=0, meters=0):
    # If the offset is zero metres, return the original coordinates
    if (meters == 0):
        return str(lat1) + ', ' + str(lon1)
    
    # Otherwise, use a geodesic function to calculate the projected coordinates
    return str(geodesic(meters=meters).destination(Point(lat1, lon1), bearing).format_decimal())

In [8]:
# Define a function to ask Google Street View for a set of four images at 0/90/180/270 degrees from a
# coordinate and bearing.
# Download the images and then return them for display.
def gsv_sample(id, lat, lon, bearing, offset):
    location = offset_coordinates(lat, lon, bearing, offset)
    
    heading_offsets = [0, 90, 180, 270]
    
    for heading_offset in heading_offsets:
        heading = bearing + heading_offset
        if heading >- 360:
            heading = heading - 360
            
        params = [{
            'key':      api_key,
            'size':     '640x640', # Maximum allowable size
            'location': location,
            'fov':      '90',
            'pitch':    '-20',    # Slightly towards the ground
            'heading':  str(heading)
        }]
            
        results = google_streetview.api.results(params)

        snapshot_path = os.path.join(gsv_download_dir, str(id), str(offset), str(heading_offset))
        results.download_links(snapshot_path)
       
    for idx, heading_offset in enumerate(heading_offsets):
        snapshot_path = os.path.join(gsv_download_dir, str(id), str(offset), str(heading_offset))
        image_path = os.path.join(snapshot_path, 'gsv_0.jpg')
        file = open(image_path, 'rb')
        img = file.read()
        file.close()
            
        if idx==0:
            img_list = [img]
        else:
            img_list = img_list + [img]
            
    return img_list

In [9]:
# Function to randomly sample a location from the CSV of candiate intersections
# id != 0 then just repeat the previous location, in case we want to adjust the offset
# to get a better picture of a bicycle lane marker in the distance
# offset allows us to specify an offset from the original coordinates along
# the bearing, in intervales of +/- 10 metres
# correction allows us to manually specify a bearing, if the one we estimated from
# the bearing to the prev/next nodes in the street looks off.  It's difficult to
# step up/down a street with an offset if the bearing is off!
def sample_location(id=0, offset=0, correction=0):
    if (id == 0):
        entry = df.sample()
        id = entry.index.tolist()[0]
    else:
        entry = df.filter(items=[id], axis=0)
    
    local_street        = entry['local_street'       ].iloc[0]
    town                = entry['town'               ].iloc[0]
    suburb              = entry['suburb'             ].iloc[0]
    city                = entry['city'               ].iloc[0]
    intersection_street = entry['intersection_street'].iloc[0]
    lat                 = entry['intersection_lat'   ].iloc[0]
    lon                 = entry['intersection_lon'   ].iloc[0]
    bearing             = entry['bearing'            ].iloc[0]
    
    bearing = bearing + correction
    if bearing >= 360:
        bearing = bearing - 360
        
    #print(entry[['local_street', 'town', 'intersection_street', 'intersection_lat', 'intersection_lon', 'bearing']])
    #print('Corrected Bearing: ' + str(bearing))
    
    img_list = gsv_sample(id, lat, lon, bearing, offset)
    
    entry.to_json(os.path.join(gsv_download_dir, str(id), str(offset), 'sample.json'))
    
    return id, img_list

In [10]:
# Function to record a "hit" to an output CSV, I.E. one of the images #0-3 had a bicycle lane marker in it
# Image display order:
#
# 0 1
# 2 3

def record_hit(id, offset=0, image_num=0):
    hits = open(output_hits_path, 'a')
    hits.write('{0:d},{1:d},{2:d}\n'.format(id, offset, image_num))
    hits.close()

## GUI

This will sample one location at a time.

* Hit one of the four "Image N" buttons to record a "hit" in the corresponding image.
* When you are ready to move to the next sample location, press "Next"
* If you need to adjust the heading so that the top-left image points forward down the road, enter a "delta" number of degrees in the "correction" box, then hit "Move" to update the camera position at the same sample location
* If you need to move up/down the road, enter a value such as "10" to move 10 metres down the road towards the top-left image.  Enter a negative value such as "-10" to move backwards, away from the top-left image, and towards the bottom-left image.  Then press "Move" to update the camera position at the same sample location.

In [11]:
id         = 0
offset     = 0
correction = 0

id, img_list = sample_location(id, offset, correction)

def on_submit(b):
    image_num = int(b.description.split()[1])
    record_hit(id, offset, image_num)
    status_box.value = 'Recorded image {0:d} id {1:d} offset {2:d}'.format(image_num, id, offset)

def on_move(b):
    status_box.value = 'Loading adjusted location...'
    id         = id_box.value
    offset     = offset_box.value
    correction = correction_box.value
    
    id, img_list = sample_location(id, offset, correction)
    
    image0_widget.value = img_list[0]
    image1_widget.value = img_list[1]
    image2_widget.value = img_list[2]
    image3_widget.value = img_list[3]
    
    status_box.value = ''
    
def on_next(b):
    status_box.value = 'Loading next location...'
    
    id         = 0
    offset     = 0
    correction = 0
    
    id, img_list = sample_location(0, 0, 0)
    
    id_box.value         = id
    offset_box.value     = offset
    correction_box.value = correction
    
    image0_widget.value = img_list[0]
    image1_widget.value = img_list[1]
    image2_widget.value = img_list[2]
    image3_widget.value = img_list[3]
    
    status_box.value = ''

    
image0_widget = widgets.Image(value=img_list[0], width=image_size, height=image_size)
image1_widget = widgets.Image(value=img_list[1], width=image_size, height=image_size)
image2_widget = widgets.Image(value=img_list[2], width=image_size, height=image_size)
image3_widget = widgets.Image(value=img_list[3], width=image_size, height=image_size)
    
button0 = widgets.Button(description='Image 0')
button1 = widgets.Button(description='Image 1')
button2 = widgets.Button(description='Image 2')
button3 = widgets.Button(description='Image 3')

id_box         = widgets.IntText(value=id,         description='Id')
offset_box     = widgets.IntText(value=offset,     description='Offset')
correction_box = widgets.IntText(value=correction, description='Correction')
move_button    = widgets.Button(description='Move')
next_button    = widgets.Button(description='Next')
status_box     = widgets.Textarea(value='', description='Status', layout=Layout(width='80%'))

row1 = widgets.HBox([image0_widget, image1_widget])
row2 = widgets.HBox([image2_widget, image3_widget])
row3 = widgets.HBox([id_box, offset_box, correction_box, move_button, next_button])
row4 = widgets.HBox([button0, button1])
row5 = widgets.HBox([button2, button3])
out = widgets.Output()

button0.on_click(on_submit)
button1.on_click(on_submit)
button2.on_click(on_submit)
button3.on_click(on_submit)

move_button.on_click(on_move)
next_button.on_click(on_next)

# Display the GUI
display(row1)
display(row2)
display(row3)
display(row4)
display(row5)
display(status_box)
display(out)

HBox(children=(Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00\x00\x01\x00\x01\x00\x00\xff\xdb\x00C…

HBox(children=(Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x00\x00\x01\x00\x01\x00\x00\xff\xdb\x00C…

HBox(children=(IntText(value=309785, description='Id'), IntText(value=0, description='Offset'), IntText(value=…

HBox(children=(Button(description='Image 0', style=ButtonStyle()), Button(description='Image 1', style=ButtonS…

HBox(children=(Button(description='Image 2', style=ButtonStyle()), Button(description='Image 3', style=ButtonS…

Textarea(value='', description='Status', layout=Layout(width='80%'))

Output()