# 04. Gather Dataset GSV

Gather a dataset for training and validation purposes from Google Street View images, based on a CSV file of intersections to sample

## Configuration

Any configuration that is required to run this notebook can be customized in the next cell

In [None]:
# Google Street View API key filename
# To download Google Street View images via the API, you must have an API Key as per:
#  https://developers.google.com/maps/documentation/streetview/get-api-key
# linked to your Google account and billing information.  Otherwise they don't know who to charge,
# so you won't be able to download the images.
# Store your API key in a file with the name listed below, in the parent of the current working directory
# from which you launched Jupyter Notebook
# Do not share your API key with anyone else!
gsv_api_key_filename = 'apikey.txt'

# List of intersections to sample
# Must be in the parent direcdtory
candidate_intersections_csv = 'pbn_bearings1.csv'

## Code

In [None]:
# General imports
import os
import sys

import pandas as pd

import google_streetview.api

import matplotlib.pyplot as plt
import matplotlib.image as mpimg

from geopy import Point
from geopy.distance import geodesic

In [None]:
# Load GSV API key
gsv_api_key_path = os.path.join(os.path.abspath(os.pardir), gsv_api_key_filename)

print('Loading Google Street View API key from [{0:s}]'.format(gsv_api_key_path))

with open(gsv_api_key_path) as f:
    api_key = f.readline()
    f.close

In [None]:
# Read intersection list from CSV
candidate_intersections_path = os.path.join(os.path.abspath(os.cwd()), candidate_intersections_csv)

print('Loading list of candidate intersections from [{0:s}]'.format(candidate_intersections_path))
df = pd.read_csv(candidate_intersections_csv)
df = df[df['intersection_street'].notnull()]

In [None]:
df.tail(10)

In [None]:
def offset_coordinates(lat1, lon1, bearing=0, meters=0):
    if (meters == 0):
        return str(lat1) + ', ' + str(lon1)
    return str(geodesic(meters=meters).destination(Point(lat1, lon1), bearing).format_decimal())

In [None]:
def gsv_sample(id, lat, lon, bearing, offset):
    location = offset_coordinates(lat, lon, bearing, offset)
    
    heading_offsets = [0, 90, 180, 270]
    
    for heading_offset in heading_offsets:
        heading = bearing + heading_offset
        if heading >- 360:
            heading = heading - 360
            
        params = [{
            'key': api_key,
            'size': '640x640',
            'location': location,
            'fov': '90',
            'pitch': '-20',
            'heading': str(heading)
        }]
            
        results = google_streetview.api.results(params)

        results.download_links('downloads/' + str(id) + '/' + str(offset) + '/' + str(heading_offset))
    
    fig = plt.figure(figsize=(20,20))
    
    for idx, heading_offset in enumerate(heading_offsets):
        image_path = 'downloads/' + str(id) + '/' + str(offset) + '/' + str(heading_offset) + '/gsv_0.jpg'
        img = mpimg.imread(image_path)

        plt.subplot(2, 2, idx+1)
        plt.imshow(img)
    plt.show()

In [None]:
def sample_location(id=0, offset=0, correction=0):
    if (id == 0):
        entry = df.sample()
        id = entry.index.tolist()[0]
    else:
        entry = df.filter(items=[id], axis=0)
            
    local_street        = entry['local_street'       ].iloc[0]
    town                = entry['town'               ].iloc[0]
    suburb              = entry['suburb'             ].iloc[0]
    city                = entry['city'               ].iloc[0]
    intersection_street = entry['intersection_street'].iloc[0]
    lat                 = entry['intersection_lat'   ].iloc[0]
    lon                 = entry['intersection_lon'   ].iloc[0]
    bearing             = entry['bearing'            ].iloc[0]
    
    bearing = bearing + correction
    if bearing >= 360:
        bearing = bearing - 360
        
    print(entry[['local_street', 'town', 'intersection_street', 'intersection_lat', 'intersection_lon', 'bearing']])
    print('Corrected Bearing: ' + str(bearing))
    
    gsv_sample(id, lat, lon, bearing, offset)
    
    entry.to_json('downloads/' + str(id) + '/' + str(offset) + '/sample.json')
    
    return id

In [None]:
def record_hit(id, offset=0, image_num=0):
    hits = open('downloads/hits.csv', 'a')
    hits.write(str(id) + ',' + str(offset) + ',' + str(image_num) + '\r\n')
    hits.close()

In [None]:
correction = 0
offset     = 0

if offset==0 and correction==0:
    id = 38635

id = sample_location(id, offset, correction)

In [None]:
image_num = 3

record_hit(id, offset, image_num)