# To be run on kaggle

In [None]:
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import ImageGrid

import io
import requests
from requests import Session, Request
from typing import Union

In [None]:
def get_label_url(imtype : Union["Aerial", "Road"],z,x,y):
    # Set the request parameters
#     imtype = "Aerial"

#     style_str = "me|lbc:00FFFFFF;loc:00FFFFFF_road|fillColor:FFFFFFFF;strokeColor:FFFFFFFF_arterialRoad|sc:FFFFFF;fc:FFFFFF_majorRoad|sc:FFFFFFFF;fc:FFFFFFFF_railway|sc:000000;fc:000000_structure|fc:00000000_area|fc:00000000_transportation|sc:00000000;fc:00000000_global|landColor:00000000"
# #     ard|sc:157399;fc:000000_mr|sc:157399;fc:000000_rl|sc:146474;fc:000000_str|fc:115166
    if imtype == "Road" : 
        imtype = "CanvasDark"
        
    url = f'http://dev.virtualearth.net/REST/v1/Imagery/Map/{imtype}/{x},{y}/'
    params = {
        #'mapArea': '47.375502,8.547080,47.379033,8.549305',
        'zoomLevel': z,
        'mapLayer': 'Basemap',
        'format': 'png',
        'key': 'AoFiUmLksAZN1Xbbj5meVqVW-5vVU3DbfUpC0xJd1p9m0kLPrT2kJ-qJM3xXpe4v',
        'mapSize':'400,400',
        'style':"me|fc:FFFFFFFF;lv:0;shadowVisible:0;sc:FFFFFFFF;v:0_pl|bv:0_rd|v:1_trl|v:1_wlk|v:1"
    }
    
    p = Request('GET', url, params=params).prepare()
    return p.url
print(get_label_url("Road",18, 32.45361328125,-84.98698112117805))

In [None]:
#this function checks if the pixel is part of the arrow labels on the roads
def is_arrow(image,x,y):
    pixel = image.getpixel((x, y))
    if pixel == (0, 0, 0): return False
    if(x>0 and image.getpixel((x-1, y)) == (0, 0, 0)): return False
    if(x<399 and image.getpixel((x+1, y)) == (0, 0, 0)): return False
    if(y>0 and image.getpixel((x, y-1)) == (0, 0, 0)): return False
    if(y<399 and image.getpixel((x, y+1)) == (0, 0, 0)): return False
    return True

In [None]:
from PIL import Image

def convert_arrows_to_white(image):
    width, height = image.size

    # Loop through each pixel in the image
    for x in range(width):
        for y in range(height):
            if (is_arrow(image,x,y)):
                # Change the pixel color to white
                image.putpixel((x, y), (255, 255, 255))
    return image


In [None]:
import os.path
import urllib.request

from tqdm import tqdm
import time
import random

BING_DATA_DIR = "/kaggle/working/bing"

def fetch_road_tile(z,y,x, overwrite=True):
    print(f"y : {y}")
    print(f"x : {x}")

    url = get_label_url("Road",z,x,y)
    dir = f"{BING_DATA_DIR}/label/{x},{y}.png"
    try:
        os.makedirs(os.path.dirname(dir))
    except:
        pass
    if overwrite or not os.path.isfile(dir):
        image = Image.open(io.BytesIO(requests.get(url).content)).convert("RGB")
        image = convert_arrows_to_white(image)
        image.save(dir)


def fetch_sat_tile(z,y, x, overwrite=True):
    url = get_label_url("Aerial",z,x ,y)
    dir = f"{BING_DATA_DIR}/sat/{x},{y}.png"
    try:
        os.makedirs(os.path.dirname(dir))
    except:
        pass
    
    if overwrite or not os.path.isfile(dir):
        urllib.request.urlretrieve(url, dir)


def offset(tile, long_meters, lat_meters):
    z, long, lat = tile
    offset_lat = 1 / 111111 * lat_meters
    offset_long = 1 / (111111 * math.cos(lat)) * long_meters
    return (z, long + offset_long, lat + offset_lat)



In [None]:
import json
import math


cities = []
#with open("/kaggle/input/cities-valebi/cities.json") as f:
#    ls = json.load(f)
#    for c in ls:
#        ppl = int(c["population"])
#        if ppl > 0.8 * 1000000:
#            cities.append((c["city"], (c["longitude"], c["latitude"])))
cities = [("extra1", (-118.450419, 34.037052)), ("extra2", (-71.112710, 42.334054))]
print(cities)

In [None]:
import random

if __name__ == "__main__":

    tiles_per_city = 40
    
    print(f"scraping {tiles_per_city*tiles_per_city*len(cities)*2} images")
    spec = []
    for i, (name, (long, lat)) in enumerate(cities):
        print(f"Fetching data for {name} [{i+1}/{len(cities)}]")
        tile = (18, long, lat)
        roffset1 = random.randint(-50,50)
        roffset2 = random.randint(-50,50)
        tiles_around = [offset(tile, i*250 + roffset1, j*250 + roffset2) for i in range(-tiles_per_city // 2, tiles_per_city // 2) for j in range(-tiles_per_city // 2, tiles_per_city // 2)]
        for tile in tqdm(tiles_around):
            try:
                fetch_road_tile(*tile)
            except:
                print("Failed to fetch road tile", tile)
                time.sleep(2)
            try:
                fetch_sat_tile(*tile)
            except:
                print("Failed to fetch sat tile", tile)
                time.sleep(2)

In [None]:
from PIL import Image
import os


# this function removes the microsoft logo in the bing data
def crop_last_24_lines(image_path, output_directory):
    # Open the image
    image = Image.open(image_path)

    # Get the image dimensions
    width, height = image.size

    # Calculate the coordinates for cropping
    left = 0
    upper = 0
    right = width
    lower = height - 24  # Crop the last 24 lines

    # Crop the image
    cropped_image = image.crop((left, upper, right, lower))

    # Create the output directory if it doesn't exist
    os.makedirs(output_directory, exist_ok=True)

    # Save the cropped image in the output directory
    file_name = os.path.basename(image_path)
    cropped_file_path = os.path.join(output_directory, "cropped_" + file_name)
    cropped_image.save(cropped_file_path)

# Specify the directory containing the images
input_directory ="/kaggle/working/bing/label"

# Specify the directory to save the cropped images
output_directory = "/kaggle/working/processed/cropped_label"

# Iterate over the files in the input directory
for filename in os.listdir(input_directory):
    if filename.endswith(".jpg") or filename.endswith(".png"):
        # Construct the full path to the image file
        image_path = os.path.join(input_directory, filename)

        # Call the function to crop the last 24 lines and save in the output directory
        crop_last_24_lines(image_path, output_directory)
        
# Specify the directory containing the images
input_directory ="/kaggle/working/bing/sat"

# Specify the directory to save the cropped images
output_directory = "/kaggle/working/processed/cropped_sat"

# Iterate over the files in the input directory
for filename in os.listdir(input_directory):
    if filename.endswith(".jpg") or filename.endswith(".png"):
        # Construct the full path to the image file
        image_path = os.path.join(input_directory, filename)

        # Call the function to crop the last 24 lines and save in the output directory
        crop_last_24_lines(image_path, output_directory)

def compare_directories(dir1, dir2):
    files1 = set(os.listdir(dir1))
    files2 = set(os.listdir(dir2))

    unique_files_dir1 = files1 - files2
    unique_files_dir2 = files2 - files1

    # Remove files unique to dir1
    for file in unique_files_dir1:
        file_path = os.path.join(dir1, file)
        os.remove(file_path)
        print("Removed", file, "from", dir1)

    # Remove files unique to dir2
    for file in unique_files_dir2:
        file_path = os.path.join(dir2, file)
        os.remove(file_path)
        print("Removed", file, "from", dir2)

# Specify the directories to compare
directory1 = "/kaggle/working/processed/cropped_sat"
directory2 = "/kaggle/working/processed/cropped_label"

# Call the function to compare the directories and remove the files
compare_directories(directory1, directory2)


In [None]:
import shutil
shutil.make_archive("/kaggle/working/output2", 'zip', "/kaggle/working/processed")