In [None]:
import mapillary as mly
from mapillary.models.geojson import GeoJSON
import json
import requests
import pandas as pd
import numpy as np
from tqdm import tqdm

In [None]:
# Mapillary API token
my_token = 'MLY|25519453337668549|19ddb43d09d1996ff1ceaec8b0693efc'
mly.interface.set_access_token(my_token)

In [None]:
# Load bounding box info (DC)
df_bbox = pd.read_csv('/content/drive/MyDrive/Homeless_SF/df_bbox_99.csv')

# Select the rows with GEOID starting with 06075 (San Francisco)
df_bbox['GEOID'] = df_bbox['GEOID'].astype(str)
df_bbox['GEOID'] = df_bbox['GEOID'].apply(lambda x: x.zfill(12))
df_bbox = df_bbox[df_bbox['GEOID'].str.startswith('06075')]

# Reset index
df_bbox.reset_index(drop=True, inplace=True)

# Drop the first column Unnamed: 0
df_bbox = df_bbox.drop(['Unnamed: 0'], axis=1)

# Separate 'swen_edges' into four columns
df_bbox[['south','west','north','east']] = df_bbox['swne_edges'].apply(lambda x: pd.Series(str(x).strip('()').replace(" ","").split(',')))
df_bbox[['south','west','north','east']] = df_bbox[['south','west','north','east']].astype(float)

# Create bboxid adding row_num and col_num
df_bbox['bboxid'] = 'bbox_' + df_bbox['row_num'].astype(str) + '_' + df_bbox['col_num'].astype(str)

In [None]:
# Create safe_append function to add np.nan for missing query
def safe_append(data, key, append_to, fallback=np.nan):

    try:
        append_to.append(data[key])

    except (AttributeError, KeyError):
        append_to.append(fallback)


# Create a function to extract image info
def extract_mapillary_image_info(df_bbox, resolution=1024):

    # Initialize lists to store image info
    image_info = {'image_id': [], 'user_id': [], 'sequence_id': [], 'image_coord': [], 'image_timestamp_unix': [], 'image_url': [],
                  'image_angle': [], 'geoid': [], 'bboxid': []}

    for idx, row in tqdm(df_bbox.iterrows(), total=df_bbox.shape[0]):
        # Construct bounding box dictionary
        bbox = {k: row[k] for k in ['west', 'south', 'east', 'north']}
        print(f'{idx+1} / {df_bbox.shape[0]}')

        # Fetch images data within the bounding box
        images_data = json.loads(mly.interface.images_in_bbox(bbox, image_type='flat'))

        for feature in images_data['features']:
            properties = feature['properties']
            geometry = feature['geometry']

            # Safely append values to respective lists
            safe_append(properties, 'id', image_info['image_id'])
            safe_append(properties, 'creator_id', image_info['user_id'])
            safe_append(properties, 'sequence_id', image_info['sequence_id'])
            safe_append(geometry, 'coordinates', image_info['image_coord'])
            safe_append(properties, 'captured_at', image_info['image_timestamp_unix'])
            safe_append(properties, 'compass_angle', image_info['image_angle'])
            image_info['geoid'].append(row['GEOID'])
            image_info['bboxid'].append(row['bboxid'])

            # Extract image URL
            image_id = properties.get('id', np.nan)

            if np.isnan(image_id):
                image_info['umage_url'].append(np.nan)
            else:
                try:
                    url = mly.interface.image_thumbnail(image_id=image_id, resolution=resolution)
                    image_info['image_url'].append(url)
                except:
                    image_info['image_url'].append(np.nan)

    # Create a dataframe
    df_images = pd.DataFrame(image_info)
    return df_images

In [None]:
import logging
logging.disable(logging.CRITICAL)

num_chunks = 100
chunks = np.array_split(df_bbox, num_chunks)

c = 0 # pick up from where the previous session stopped

for m, chunk in enumerate(chunks[c:100]):
    df_images = extract_mapillary_image_info(chunk, resolution=1024)
    df_images.to_csv(f'/content/drive/MyDrive/Homeless_SF/image_url/df_mapillary_{m+c}.csv')