In [1]:
import pprint
from datetime import datetime
import pandas as pd
import os
import rasterio as rs
from rasterio.features import shapes
import geopandas as gpd
import numpy as np
from scipy.ndimage import label, sum as ndi_sum
import subprocess
import yaml
from osgeo import gdal
from osgeo import osr
from osgeo import ogr
from osgeo import gdalconst
import sys
from scipy import ndimage
sys.path.append('../src/')
# import prepare_rasters as pr
import prepare_polys as pp
import prepare_rasters as pr
import prepare_polys_orig as preprocess

%load_ext autoreload
%autoreload 2

## Prototyping
What are the input requirements?
1. inputParams
2. TTC - these look the same
3. lulc 2016 - slightly differently sizes bc of method for resampling/reprj. no use of compression

In [18]:
## quick comp of inputs (after preprocessing)
j_lulc = rs.open('../data/costa_rica/interim/lulc_cr_reprj.tif').read(1)
j_ttc = rs.open('../data/costa_rica/raw/CostaRica.tif').read(1)
j_sdpt = rs.open('../data/costa_rica/interim/cri_sdpt_v2.tif').read(1)
j_lulc.shape, j_ttc.shape, j_sdpt.shape

((35683, 37670), (33372, 37119), (33372, 37119))

In [3]:
preprocess.preprocess_datasets('../data/Input/lulc.tif')

Creating output file that is 37119P x 33372L.
Processing ../data/Input/lulc.tif [1/1] : 0Using internal nodata values (e.g. -1) for image ../data/Input/lulc.tif.
Copying nodata values from source ../data/Input/lulc.tif to destination ../data/Input/lulc_reprj.tif.
...10...20...30...40...50...60...70...80...90...100 - done.


In [26]:
e_lulc = rs.open('../data/Input/lulc_reprj.tif').read(1)
e_ttc = rs.open('../data/Input/TTC.tif').read(1)
e_lulc.shape, e_ttc.shape

((33372, 37119), (33372, 37119))

In [28]:
j_lulc.shape[0] - e_lulc.shape[0]

2311

In [29]:
j_lulc.shape[1] - e_lulc.shape[1]

551

In [3]:
# in this iteration, the only change 
pp.create_highconf_polygons(params_path='../data/Input/inputParams.csv',
                            lulc_path = '../data/Input/lulc_reprj.tif',
                            sdpt_path = '../data/costa_rica/interim/cri_sdpt_v2.tif', 
                            ttc_path = '../data/Input/TTC.tif'
                           )


 Step 3: Executing classifyRasters function for each unique category:

Processing  Mono_Palm :

lulc array shape: (33372, 37119)
lulc: 20
Calculating binary arr for lulc with <function reclassify_by_value at 0x10db024c0>
sdpt array shape: (33372, 37119)
sdpt: 1.0
Calculating binary arr for sdpt with <function reclassify_by_value at 0x10db024c0>
ttc array shape: (33372, 37119)
ttc: 40
Comparison type: greater
Calculating binary arr for ttc with <function reclassify_above_threshold at 0x10db02430>
Binary list length:  3
Array 0 shape: (33372, 37119)
Array 1 shape: (33372, 37119)
Array 2 shape: (33372, 37119)
Multiplying arrays...
Processing  Mono_Forest :

lulc array shape: (33372, 37119)
lulc: 8
Calculating binary arr for lulc with <function reclassify_by_value at 0x10db024c0>
sdpt array shape: (33372, 37119)
sdpt: 2.0
Calculating binary arr for sdpt with <function reclassify_by_value at 0x10db024c0>
ttc array shape: (33372, 37119)
ttc: 80
Comparison type: greater
Calculating binary ar

In [7]:
# now run to combine output
pp.create_highconf_polygons(params_path='../data/Input/inputParams.csv',
                            lulc_path = '../data/Input/lulc_reprj.tif',
                            sdpt_path = '../data/costa_rica/interim/cri_sdpt_v2.tif', 
                            ttc_path = '../data/Input/TTC.tif'
                           )


 Step 5: Getting centroids from polygons...

Processing ../data/Output/Intermediate/Poly/Mono_Forest.shp
Processing ../data/Output/Intermediate/Poly/Agro_Coffee.shp
Processing ../data/Output/Intermediate/Poly/NP_Mangrove.shp
Processing ../data/Output/Intermediate/Poly/NP_Urban_2.shp
Processing ../data/Output/Intermediate/Poly/NP_Urban.shp
Processing ../data/Output/Intermediate/Poly/NP_Pineapple.shp
Processing ../data/Output/Intermediate/Poly/NP_Mature.shp
Processing ../data/Output/Intermediate/Poly/Mono_Palm.shp
Processing ../data/Output/Intermediate/Poly/NP_Bare.shp
Processing ../data/Output/Intermediate/Poly/NP_Bare_2.shp


  final_centroids.to_file(output_shapefile_centroids)



All centroids have been saved to ../data/Output/Final/all_centroids.shp
All polygons have been merged and saved to ../data/Output/Final/merged_polygons.shp
Statistics saved to ../data/Output/Final/stats_jessica.csv

           Filename  Number of Polygons  Number of Centroids  Min Size  \
0   Mono_Forest.shp                 141                  141       196   
1   Agro_Coffee.shp                  43                   43       196   
2   NP_Mangrove.shp                 802                  802       196   
3    NP_Urban_2.shp                1351                 1351       196   
4      NP_Urban.shp                2739                 2739       196   
5  NP_Pineapple.shp                 160                  160       196   
6     NP_Mature.shp               26791                26791       196   
7     Mono_Palm.shp                 788                  788       196   
8       NP_Bare.shp                 945                  945       196   
9     NP_Bare_2.shp                  48    

In [None]:
# get the starting shape of the AF raster
# and the count of polygons
af_raster = rs.open('../data/Output/Intermediate/Rasters/Agro_Coffee.tif').read(1)
af_poly = gpd.read_file('../data/Output/Intermediate/Poly/Agro_Coffee.shp')
af_raster.shape, af_poly.shape

In [34]:
# we know af raster is comprised of 1s and 0s
np.unique(af_raster, return_counts=True)

(array([0, 1], dtype=uint8), array([1238688044,      47224]))

In [41]:
# now label and get count for each label
labeled_array, num_features = ndimage.label(af_raster)

In [42]:
# get pixel count for each label
label_size = [(labeled_array == label).sum() for label in range(num_features + 1)]


KeyboardInterrupt



In [None]:
label_size

In [None]:
thresh = 196

In [None]:
for label,size in enumerate(label_size):
    if size < thresh:
        arr[labeled_array == label] = 0

In [None]:
    filtered_image = image * mask

    results = (
        {'properties': {'raster_val': v}, 'geometry': s}
        for i, (s, v) in enumerate(shapes(filtered_image,
                                        mask=filtered_image, 
                                        transform=src.transform))
                                        )

    geoms = list(results)
    if geoms:
        gdf = gpd.GeoDataFrame.from_features(geoms)
        gdf.crs = crs  # set the CRS for the GeoDataFrame
        gdf.to_file(os.path.join(output_folder, filename.replace('.tif', '.shp')))