In [None]:
from PIL import Image
from matplotlib.pyplot import imshow
import numpy as np
from tqdm import tqdm as tqdm

from google.colab import drive
import os
drive.mount('/content/drive')

Mounted at /content/drive


# Inference pipeline

### 0. Set up path structure

In [None]:
# Path that holds the raw images we want to make predictions for - 500k Brazil images for example
raw_img_directory = '/content/drive/MyDrive/UNICEF & NYU Giga initiative - data sharing/data/pipeline/raw'
# Path that will hold the resolution enhanced and 512x512 resized images 
interim_directory = '/content/drive/MyDrive/UNICEF & NYU Giga initiative - data sharing/data/pipeline/interim'
# Path where trained models are stored
models_directory =  '/content/drive/MyDrive/UNICEF & NYU Giga initiative - data sharing/data/models'

### 1. Resolution enhancement

In [None]:
%cd /content
!rm -rf GFPGAN
!git clone https://github.com/TencentARC/GFPGAN.git
%cd GFPGAN
!pip install basicsr
!pip install facexlib
!pip install -r requirements.txt
!python setup.py develop
!pip install realesrgan
!wget https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.4.pth -P experiments/pretrained_models

In [None]:
upscale = 4
version = 1.4
res_enh_cmd = f'inference_gfpgan.py --input "{raw_img_directory}" --output "{interim_directory}" --version {version} --upscale {upscale} --bg_upsampler realesrgan'

In [None]:
!rm -rf results

In [None]:
%run $res_enh_cmd

### 2. Resizing

In [None]:
# Create folder to store resized images within interim path
resized_imgs_path = os.path.join(f'{interim_directory}', 'resized_imgs')
if (not os.path.exists(resized_imgs_path)):
  os.mkdir(resized_imgs_path)

# Define new size of images
newsize = (512, 512)

# Iterate through the images in the restored_imgs path 
for i in tqdm(os.listdir(os.path.join(interim_directory, 'restored_imgs'))):
  current_im = Image.open(os.path.join(interim_directory, 'restored_imgs', i))
  resized_im = current_im.resize(newsize)
  resized_im.save(os.path.join(resized_imgs_path, i), "PNG")

100%|██████████| 202/202 [02:24<00:00,  1.40it/s]


### 3. Model execution

In [None]:
!git clone https://github.com/ultralytics/yolov5
!pip install -r yolov5/requirements.txt
!pip install pyyaml
!pip install layoutparser

!nvcc --version

In [None]:
import torch
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)

%cd yolov5

torch:  1.12 ; cuda:  cu113
/content/GFPGAN/yolov5


In [None]:
import json
import yaml
import shutil
import glob
import re
from pathlib import Path

def find_results(folder, path=interim_directory + '/results_([0-9]+)'):
  return re.findall(path, folder)

# Create a yaml file with data configuration
dict_file = {
    'path': interim_directory,
    'inference': 'resized_imgs',
    'names': {0: 'all'}
}
with open(r'./data_config.yaml', 'w') as file:
    documents = yaml.dump(dict_file, file)

# If overwrite = True -> delete current folder containing results and save new results in new created folder
# If overwrite = False -> create new results folder (results_n)
overwrite = True
if (overwrite == True) & (os.path.exists(os.path.join(interim_directory, 'results'))):
  shutil.rmtree(os.path.join(interim_directory, 'results'))
  fname='results'
else:
  result_folders = glob.glob(interim_directory + '/results*')
  if len(result_folders) == 0:
    fname='results'
  else:
    num_folders = [int(find_results(folder)[0]) for folder in result_folders if len(find_results(folder)) > 0]
    next_num = max(num_folders) + 1
    fname=f'results_{next_num}'
print(f'Saving in directory {fname}')

Saving in directory results


In [None]:
result_directory = os.path.join(interim_directory, fname)
!python detect.py --weights '/content/drive/MyDrive/UNICEF & NYU Giga initiative - data sharing/data/models/yolo_model_100_epoch_optim_hyperparam.pt' --data data_config.yaml --imgsz 512 --iou-thres 0.5 --conf-thres 0.5 --save-txt --name 'inference' --source '/content/drive/MyDrive/UNICEF & NYU Giga initiative - data sharing/data/pipeline/interim/resized_imgs'
# Saving YOLO files
shutil.copytree('runs/detect/inference', result_directory)

### 4. Geo json mapping from bounding box prediction

#### 4.1 Convert from YOLO 2 COCO

In [None]:
def yolo2coco(yolo_data, img_w=520, img_h=520):
  x_yolo = yolo_data[0]
  y_yolo = yolo_data[1]
  w_yolo = yolo_data[2]
  h_yolo = yolo_data[3]
  
  w_coco = w_yolo * img_w
  h_coco = h_yolo * img_h
  x_coco = x_yolo * img_w - w_coco/2
  y_coco = y_yolo * img_h - h_coco/2
  return [x_coco, y_coco, w_coco, h_coco]

def create_coco_file(predn):
    # Save one JSON result {"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}
    jdict = []
    for key, values in predn.items():
        b = yolo2coco(values[1:])
        jdict.append({
            'image_id': key,
            'category_id': int(values[0]),
            'bbox': [round(x, 3) for x in b]
        })
    return jdict

def read_yolo_file(path):
  path = Path(path)
  with open(path, 'rt') as fd:
    for line in fd.readlines():
        predn = line.split()
  predn = list(map(float, predn))
  image_id = int(path.stem) if path.stem.isnumeric() else path.stem
  return image_id, predn

In [None]:
label_directory = os.path.join(result_directory, 'labels')
yolo_files = glob.glob(os.path.join(label_directory,'*'))
predictions = {}
for path in yolo_files:
  image_id, predn = read_yolo_file(path)
  predictions[image_id] = predn

In [None]:
coco_format = create_coco_file(predictions)

In [None]:
coco_path = os.path.join(label_directory, 'coco_labels.json')
with open(coco_path, 'w') as f:
    json.dump(coco_format, f)

#### 4.2 Georreference

In [None]:
!pip install geopandas rasterio

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import geopandas as gpd
import pandas as pd
from shapely.geometry import box
from shapely.affinity import affine_transform
from rasterio.transform import from_bounds

In [None]:
def from_imgcoords_to_latlong(
  bbox_coords, img_lat_long, 
  img_measurements=[512,512]
  ):
  """
  Args:
    img_coords (list): list with [x_min, y_min, x_max, y_max]
    img_lat_long (list): lat long coordinates of the image with 
    the format [top, left, bottom, rigth]
    img_measurements (list): [img_width, img_height]
  Returns:
    Returns a transformed geometry using an affine transformation matrix
  """
  bbox = box(bbox_coords[0], bbox_coords[1], bbox_coords[2], bbox_coords[3])
  affine_obj = from_bounds(
      north = img_lat_long[0],
      west = img_lat_long[1], 
      south = img_lat_long[2], 
      east = img_lat_long[3], 
      width = img_measurements[0], 
      height = img_measurements[1]
      )

  geom_bbox = affine_transform(
      bbox, 
      [affine_obj.a, affine_obj.b,
       affine_obj.d, affine_obj.e,
       affine_obj.xoff, affine_obj.yoff]
       )
  return geom_bbox

In [None]:
# File with node_id column containing unique id and top, left, bottom right columns containing lat/long boundaries of the image
img_latlong_path = '/content/drive/MyDrive/UNICEF & NYU Giga initiative - data sharing/Satellite images/unfiltered_with_bboxes.csv'
img_latlong_data = pd.read_csv(img_latlong_path)
bbox_pred = json.load(open(coco_path))

In [None]:
geom_list_geo = []
node_ids = []
for bbox in bbox_pred:
  # Getting node id
  node_id = bbox['image_id']

  # Getting lat long of the image
  img_latlong = img_latlong_data.loc[
    img_latlong_data['node_id'] == node_id, 
    ['top', 'left', 'bottom', 'right']
    ].to_numpy()
  if img_latlong.shape[0] == 0:
    raise ValueError(f"Node id {node_id} doesn't exist")
  if img_latlong.shape[0] > 1:
    raise ValueError('Node id tied to more than one image')
  img_latlong = img_latlong[0]

  # Converting bbox from coco to xmin, ymin, xmax, ymax
  bbox_coco = bbox['bbox']
  bbox_kitti = [
      bbox_coco[0],
      bbox_coco[1],
      bbox_coco[0] + bbox_coco[2],
      bbox_coco[1] + bbox_coco[3]
      ]

  # Transforming bbox into lat/long coords
  geom_bbox = from_imgcoords_to_latlong(
        bbox_kitti,
        img_latlong
    )
  geom_list_geo.append(geom_bbox)
  node_ids.append(node_id)

In [None]:
geo_results_path = os.path.join(result_directory, 'geo_boxes.geojson')
crs = "EPSG:4326"
geo_df = gpd.GeoDataFrame(
    node_ids, 
    columns=['node_id'],
    geometry=geom_list_geo,
    crs=crs
    )
# save geojson file
geo_df.to_file(
    geo_results_path
    )