In [1]:
import os
import urllib
import pandas as pd
import xml.etree.ElementTree as ET
import platform
from time import sleep
from subprocess import call, check_output
import s3fs
import boto3

In [2]:
meta_df = pd.read_csv('gsv_metadata.csv', index_col = 0)
meta_df.head()

Unnamed: 0,date,lat,long,pano_id,name
0,2019-06,42.957503,-87.938367,XPRpjNDhowVo8zvqvSU1CA,1.json
1,2016-10,42.899259,-88.047098,iEyn0apLSZvl4i4alUbfcA,10.json
2,2011-08,42.921614,-87.881025,1BzC3WoFeJ8U1aUT9Hx8mg,100.json
3,2018-09,43.050123,-88.040263,oRN5vilebPS0srDXRPylzw,1000.json
4,2019-05,42.959289,-88.026043,KimNSirhP1TzngZkSpc8UA,10000.json


In [3]:
pano_ids = list(meta_df.pano_id.values)
pano_ids[0]

'XPRpjNDhowVo8zvqvSU1CA'

In [30]:
def get_pano_metadata(pano_ids, target_dir, delay=1000., save_as_file=True, verbose=False):
    """
    This function collects Google Street View panorama metadata that corresponds to the nearest GSV panoramas.
    E.g.,
    http://cbk0.google.com/cbk?output=xml&ll=51.494966,-0.146674
    """


    if type(pano_ids) != list:
        raise ValueError('pano_ids must be a list of GSV panorama ids')

    api_header = 'http://cbk0.google.com/cbk?output=xml'
    for pano_id in pano_ids:
        if verbose:
            print('-- Extracting metadata for', pano_id, '...',)
        # Check if the directory exists. Then check if the file already exists and skip if it does.
        # Check file: http://stackoverflow.com/questions/82831/how-do-i-check-if-a-file-exists-using-python
        if not os.path.exists(target_dir + '/'):
            print('Invalid target directory')
            break
        if os.path.isfile(target_dir + '/' + pano_id + '.xml'):
            if verbose:
                print('File already exists.')
            continue

        url = api_header + '&panoid=' + pano_id
        try:
            req = urllib.request.urlopen(url)
        except:
            print(pano_id)
            continue

        if save_as_file:

            with open(target_dir + '/' + pano_id + '.xml', 'wb') as my_file:
                for line in req:
                    my_file.write(line)

In [31]:
target_dir = 'C:/Users/amybr/Desktop/W210_Capstone/meta'
get_pano_metadata(pano_ids[11737:],target_dir,verbose=False)

CAoSLEFGMVFpcE8yX0g1NFRlT3V2YXFsMm5sSGNJV1haNnZSNTNseEc3VEtaeVZf
CAoSLEFGMVFpcE5fWmlJWWdvaXg5dUtsamVFTG5WSExlQ1djT2E4ckhOV1dGTzYz
CAoSLEFGMVFpcFBDbGlycVhCTUdRMHcycnFmQUEzUnJ1Q1JZak5kbHhxVVhWOEwt
CAoSLEFGMVFpcE1aY2ZnMm05OXQxOG9GMU9aMnl4c3ZWQmFzN1pzdjJ1Y2l4Xy0y
CAoSLEFGMVFpcE1VSlRYSzdBVGptOHdJN2gycExBM3JmX0xhWUZsdjRjVWd5VVJp
CAoSLEFGMVFpcFBoNmFUbldwNV9yUzBsUWphRVIydGxiazFrWkJxY0taZ0Q5LTZR
CAoSLEFGMVFpcE94eHVUWlpEV2dpV1JzYkNjUWUyMEdMN01CV2NkSWZlVDAwQ2hl
CAoSLEFGMVFpcE9RNm1pUWxUdk80a0lsMDYzeGVHVW80SmVNVXN4dVR0bVl4aWJi
CAoSLEFGMVFpcE1ReUdTVHkxSldMbEdzUUFqVzIwQk1zZHRMWjFfQmUzZW9rNnc3
CAoSLEFGMVFpcE9RR2xHN25BclJtVFkxV3FWR3d0Nm9GYWhZM2RpRHh5cXZXVFd4
CAoSLEFGMVFpcE01LTdzazZsQkN0ZDhITG5FeDRCaTNtVTJYN1pxblNONmpnckx4
CAoSLEFGMVFpcE50YW1yV0FhakVUejZzMXU1VHlFM240NGpZbXNqUE1IclkyNXpY


In [35]:
for filename in os.listdir(target_dir):
    tree = ET.parse(target_dir + "/" + filename)
    proj_properties = tree.findall('./projection_properties')
    pano_yaw_deg = proj_properties[0].get('pano_yaw_deg')
    tilt_yaw_deg = proj_properties[0].get('tilt_yaw_deg')
    tilt_pitch_deg = proj_properties[0].get('tilt_pitch_deg')

data_properties {'image_width': '13312', 'image_height': '6656', 'tile_width': '512', 'tile_height': '512', 'image_date': '2018-09', 'pano_id': '--C5MmrcW22n89eUxitDmQ', 'imagery_type': '1', 'num_zoom_levels': '5', 'lat': '42.999103', 'lng': '-88.017385', 'original_lat': '42.999100', 'original_lng': '-88.017324', 'elevation_wgs84_m': '191.661326', 'elevation_egm96_m': '226.872726'}
projection_properties {'projection_type': 'spherical', 'pano_yaw_deg': '0.5', 'tilt_yaw_deg': '-77.75', 'tilt_pitch_deg': '1.87'}
annotation_properties {}


In [55]:
proj_properties = tree.findall('./projection_properties')
proj_properties[0].get('pano_yaw_deg')

'0.5'

In [56]:
# Now extract all the metadata and combine
pano_yaw, tilt_yaw, tilt_pitch = [],[],[]

for ix,row in meta_df.iterrows():
    pano_id = row.pano_id
    
    try:
        tree = ET.parse(target_dir + "/" + pano_id + '.xml')
    except:
        pano_yaw.append(None)
        tilt_yaw.append(None)
        tilt_pitch.append(None)
        continue
        
    proj_properties = tree.findall('./projection_properties')
    
    pano_yaw.append(proj_properties[0].get('pano_yaw_deg'))
    tilt_yaw.append(proj_properties[0].get('tilt_yaw_deg'))
    tilt_pitch.append(proj_properties[0].get('tilt_pitch_deg'))

meta_df['pano_yaw_deg'] = pano_yaw
meta_df['tilt_yaw_deg'] = tilt_yaw
meta_df['tilt_pitch_deg'] = tilt_pitch   

In [58]:
# strip the .json off of each name
meta_df['name'] = [x[:-5] for x in meta_df.name.values]

In [59]:
meta_df.head()

Unnamed: 0,date,lat,long,pano_id,name,pano_yaw_deg,tilt_yaw_deg,tilt_pitch_deg
0,2019-06,42.957503,-87.938367,XPRpjNDhowVo8zvqvSU1CA,1,91.979996,125.04,0.83
1,2016-10,42.899259,-88.047098,iEyn0apLSZvl4i4alUbfcA,10,145.81999,-178.26999,1.06
2,2011-08,42.921614,-87.881025,1BzC3WoFeJ8U1aUT9Hx8mg,100,359.18,30.769999,2.55
3,2018-09,43.050123,-88.040263,oRN5vilebPS0srDXRPylzw,1000,161.11,67.549995,1.5699999
4,2019-05,42.959289,-88.026043,KimNSirhP1TzngZkSpc8UA,10000,270.06,-114.81,1.35


In [60]:
meta_df.to_csv('meta_with_depth.csv')

In [4]:
!chmod -R +x './decode_depthmap'

In [18]:
def upload_to_s3(target_dir):
    fs = s3fs.S3FileSystem()
    s3_client = boto3.client('s3')
    bucket = 'gsv-depths'
    
    for filename in os.listdir(target_dir):
        if filename.endswith(".xml"):
            obj_name = 'depth_xml/' + filename
        elif filename.endswith('.txt'):
            obj_name = 'depth_txt/' + filename
        else:
            continue
            
        try:
            response = s3_client.upload_file(target_dir + '/' + filename, bucket, obj_name)
        except:
            print(filename)
            continue
        # delete from local store
        os.remove(target_dir + '/' + filename)

In [6]:
def decode_depthmap(file_in, file_out, verbose=True):
    """
     Copied from https://github.com/ProjectSidewalk/sidewalk-panorama-tools/blob/ce1565993129a76afc52346366c0e73884b84090/GSVScraper.py
     
     This function executes ./decode_depthmap . The decode_depthmap retrieves 3D point-cloud data 
     from the file_in (depth.xml) and spits out the result. 
     call function
     http://stackoverflow.com/questions/89228/calling-an-external-command-in-python
    """    

    if verbose: print('-- Decoding depth data...'), 
    if os.path.isfile(file_out):
        print('File already exists.')
        return
    
    operating_system = platform.system()
    
    if operating_system == 'Windows':
        # Windows
        #
        # Caution!!! I have worked on this for a couple of hours, but I could not run the decode_depthmap_win.exe 
        # from PyLab using subprocess.call. Quick walk around is to run the python script from the cmd.exe
        # Will investigate the solution in future.
        # http://stackoverflow.com/questions/3022013/windows-cant-find-the-file-on-subprocess-call
        # http://stackoverflow.com/questions/10236260/subprocess-pydev-console-vs-cmd-exe
        
        # pwd = os.path.dirname(os.path.abspath(__file__))
        # bin_dir = "\\".join(pwd.split("\\")[:-1]) + "\\bin"
        # my_env = os.environ.copy()
        # my_env["PATH"] += os.pathsep + bin_dir

        call(["decode_depthmap", file_in, file_out], shell=True)
        #popen = subprocess.Popen(["../bin/decode_depthmap_win.exe", file_in, file_out], creationflags=subprocess.CREATE_NEW_CONSOLE)
        #popen.wait()
        #out = check_output([bin_dir + "\decode_depthmap_win.exe", file_in, file_out], env=my_env)
        #if verbose: print out
    else:
        # Mac
        call(["./decode_depthmap", file_in, file_out])
    return

In [12]:
def get_pano_depthdata(target_dir, pano_ids, delay=1000., decode=True, verbose=False):
    '''
     Edited based on sidewalk-panorama-tools/GSVScraper.py
     
     This method downloads a xml file that contains depth information from GSV. It first
     checks if we have a folder for each pano_id, and checks if we already have the corresponding
     depth file or not.  
    '''
    cnt = 0
    base_url = "http://maps.google.com/cbk?output=xml&cb_client=maps_sv&hl=en&dm=1&pm=1&ph=1&renderer=cubic,spherical&v=4&panoid="
    for pano_id in pano_ids:
        cnt += 1
        # Check if the directory exists. Then check if the file already exists and skip if it does.
        if not os.path.exists(target_dir + '/'):
            print('Invalid target directory')
            break
        if os.path.isfile(target_dir + '/' + pano_id + '.xml'):
            if verbose:
                print('File already exists.')
            continue

        url = base_url + pano_id
        with open(target_dir + '/' + pano_id + '.xml', 'wb') as f:
            try:
                req = urllib.request.urlopen(url)
            except:
                print(pano_id)
                continue
            for line in req:
                f.write(line)

        # Wait a little bit so you don't get blocked by Google
        sleep_in_seconds = float(delay) / 1000
        sleep(sleep_in_seconds)

        if decode:
            decode_depthmap(target_dir + '/' + pano_id + '.xml', target_dir + '/' + pano_id + '.txt', verbose=False)

        if cnt%100 == 0:
            upload_to_s3(target_dir)
        
    return

In [21]:
target_dir = './depth'
get_pano_depthdata(target_dir, pano_ids, delay=1000., decode=True, verbose=False)

CAoSLEFGMVFpcE9UUzFOUTZzcE1qTmRobXJ2UjVpMDR1UHZORFlEZ1lpV18xaWNT
CAoSLEFGMVFpcE9LcDlBb0VQSkhTTXUtb1h1Rl80dGRiQmQ5cWptN2RHck8yMGhT
CAoSLEFGMVFpcE9tV2p2TFZscVB6NFFqTFlOUG9DRVcyRi1PeFF5OUNOdlVFRkph
CAoSLEFGMVFpcE5hQWtJVS05RlEzb2cyR0VROE81WFpTeWZDa1NZYk9kaHVObXlK
CAoSLEFGMVFpcFB3VFR2N3ppOGZ2RlVXQWExSFA1V1UxRmM3N3lUM1dtRzNlZGVS
CAoSLEFGMVFpcE1ZbFAzalBkSGlhcHRSZ3NQSnhuRWl0dTFQUzRyS3lpckxuVUZP
CAoSLEFGMVFpcE9LeTg0ZkVoR0RWVTVJeVkxQm5pSktTbEh4Q1EzUTdJSkNmMmp1
CAoSLEFGMVFpcFA4aTJZTTRFYlIzS0E2Y0kxZ0NWMTBNX2pLYV8ya2VIUDg4Yzc2
CAoSLEFGMVFpcE5nUEJBNm1ydVZjakt1R3V6SGlHRWc3SDlLVk1PRThjUmVJNUJw
CAoSLEFGMVFpcFByOTVfT3l3OGsxVjZyV2RfbWY5bkgtNVdhT1ZoN2xfamdpMzFF
CAoSLEFGMVFpcE0waE9QYXJqYUZlc3dXYnZVeUx2NWIta3d3SkQ2dTI1M3Y4LTdK
CAoSLEFGMVFpcE5BSmh4c18wNmdVY2ZwbUkyYXJ3dERiMmt4M3Q3aDEzYTdWb2VK
CAoSLEFGMVFpcE5VVXRZVFZPRG5vd1U1N1lsRGZ2TEFnSmp4MHlTNTI0Qi1hN041
CAoSLEFGMVFpcE56TTl3Q0Z5S1RYUkJaamV0bkQ3R1p4blA4QjRfWU1FUVBXN2c2
CAoSLEFGMVFpcE9xVmRXb0ZaVzdXSjh1NlZyTHpIWEdRM0lhNi0wRThHTW5JWFE2
CAoSLEFGMVFpcE14Qmg3SHRuM