In [70]:
import geopandas as gpd
import numpy as np
import rasterio
from rasterio.mask import mask
from rasterio.plot import show
from pathlib import Path
import pandas as pd
from PIL import Image
%matplotlib inline

In [31]:
curr_path = Path('')
data_path = curr_path / './vision_project'
stac_path = data_path / 'stac'

sub_df = pd.read_csv('./vision_project/submission_format.csv')
trn_df = pd.read_csv('./vision_project/train_labels.csv')
met_df = pd.read_csv('./vision_project/metadata.csv')

tiff_paths = list(met_df.image.values)
train_geo_paths = list(met_df.train.values)
test_geo_paths = list(met_df.test.values)

In [90]:
for fpath_tiff, train_geojson in zip(tiff_paths, train_geo_paths):
    train_geojson = str(data_path / train_geojson)
    df_roof_geometries = gpd.read_file(train_geojson)
    
    fpath_tiff = str(data_path / fpath_tiff)
    
    with rasterio.open(fpath_tiff) as tiff:
        tiff_crs = tiff.crs.data
        df_roof_geometries['projected_geometry'] = (
            df_roof_geometries[['geometry']].to_crs(tiff_crs)
        )
    
    roof_geometries = (
        df_roof_geometries[['id', 'projected_geometry','roof_material']].values)
    
    with rasterio.open(fpath_tiff) as tiff:
        for roof_id, projected_geometry, roof_material in roof_geometries:
            roof_image, _ = mask(
                tiff, [projected_geometry], crop=True, pad=True,
                filled=False, pad_width=0.5
            )
            roof_image = np.transpose(roof_image, (1, 2, 0))
            roof_mask, _ = mask(
                tiff, [projected_geometry], crop=True, pad=True,
                filled=True, pad_width=0.5
            )
            roof_mask = np.transpose(roof_mask, (1, 2, 0))
            
            pil_img = Image.fromarray(np.array(roof_image))
            save_path = str(data_path / "training/unmasked/{}/{}.png".format(roof_material, roof_id))
            pil_img.save(save_path)

In [91]:
for fpath_tiff, test_geojson in zip(tiff_paths, test_geo_paths):
    test_geojson = str(data_path / test_geojson)
    df_roof_geometries = gpd.read_file(test_geojson)
    
    fpath_tiff = str(data_path / fpath_tiff)
    
    with rasterio.open(fpath_tiff) as tiff:
        tiff_crs = tiff.crs.data
        df_roof_geometries['projected_geometry'] = (
            df_roof_geometries[['geometry']].to_crs(tiff_crs)
        )
    
    roof_geometries = (
        df_roof_geometries[['id', 'projected_geometry']].values)
    
    with rasterio.open(fpath_tiff) as tiff:
        for roof_id, projected_geometry in roof_geometries:
            roof_image, _ = mask(
                tiff, [projected_geometry], crop=True, pad=True,
                filled=False, pad_width=0.5
            )
            roof_image = np.transpose(roof_image, (1, 2, 0))
            roof_mask, _ = mask(
                tiff, [projected_geometry], crop=True, pad=True,
                filled=True, pad_width=0.5
            )
            roof_mask = np.transpose(roof_mask, (1, 2, 0))
            
            pil_img = Image.fromarray(np.array(roof_mask))
            save_path = str(data_path / "testing/masked/{}.png".format(roof_id))
            pil_img.save(save_path)
            
            pil_img = Image.fromarray(np.array(roof_image))
            save_path = str(data_path / "testing/unmasked/{}.png".format(roof_id))
            pil_img.save(save_path)