In [4]:
import pandas as pd
import numpy as np
import rasterio as rs
from rasterio.merge import merge
from rasterio.mask import mask
from datetime import datetime
from sklearn.utils import resample
import sys

sys.path.append('../../src/')
from evaluation import validation as val
%load_ext autoreload
%autoreload 2

## Sampling Design
- type: pixel-based analysis
- design: stratified sampling approach by class
- total study area (all pixels in the strata): 26 districts

In [12]:
land_use_map = rs.open(f'../../tmp/ghana/preds/mosaic/ghana_v27_2024-09-27.tif')
total_samples = 100 # when choosing sample count, consider how many will be dropped from buffer zone AND lack of ARD
outfile = f'../../data/validation/sampled_points.shp'

In [23]:
sampled_points = val.run_validation_workflow(f'../../tmp/ghana/preds/mosaic/ghana_v27_2024-09-27.tif', 
                                             total_samples, 
                                             outfile,
                                             1000,
                                            '../../params.yaml')

Class distribution: {0: 269113871, 1: 1272342, 2: 138084189, 3: 99051836}
Class proportions: {0: 53.03, 1: 0.25, 2: 27.21, 3: 19.52}
Total count (pixels): 507522238
Creating buffer with ['v08', 'v14', 'v15', 'v19', 'v20', 'v21', 'v22'] batches
(1342, 40)
(1342, 41)


AttributeError: 'MultiPolygon' object has no attribute 'shape'

In [80]:
# confirm the AOI with 3 different methods
# land_use_map.size	
# Very fast, direct access to array size	
# Includes 255 and other invalid values in the count
# Getting the total number of pixels

# np.bincount()
# Efficiently counts only specified valid classes
# Limited by minlength, ignores other valid classes	
# If you know exactly which classes to count

# Conditional sum()	
# Counts all pixels except 255, flexible
# Slower and more memory-intensive for large datasets
# When you want to exclude 255 without specifying valid classes

counts = np.bincount(land_use_map.flatten(), minlength=4) 
classes = [0, 1, 2, 3] 
valid_counts = counts[classes]
total_count = valid_counts.sum()
lulc_count = sum(land_use_map[land_use_map != 255])

print(land_use_map.size)
print(total_count)
print(lulc_count)

In [59]:
class_prop

{0: 53.02504025449226,
 1: 0.250696798038631,
 2: 27.207514993658265,
 3: 19.516747953810842}

In [60]:
total_count

507522238

In [None]:
# Load the model
model_path = os.path.join(RESULTS_FOLDER, "model_SI_LULC.pkl")
model = joblib.load(model_path)

# load the test features
features_test = 

# Predict the test labels
predicted_labels_test = model.predict(features_test)


class_labels = np.unique(labels_test)
class_names = ['no tree', 'monoculture', 'agroforestry', 'natural']
mask = np.in1d(predicted_labels_test, labels_test)  # noqa: NPY201
predictions = predicted_labels_test[mask]
true_labels = labels_test[mask]

# Extract and display metrics
f1_scores = metrics.f1_score(true_labels, predictions, labels=class_labels, average=None)
avg_f1_score = metrics.f1_score(true_labels, predictions, average="weighted")
recall = metrics.recall_score(true_labels, predictions, labels=class_labels, average=None)
precision = metrics.precision_score(true_labels, predictions, labels=class_labels, average=None)
accuracy = metrics.accuracy_score(true_labels, predictions)

print("Classification accuracy {:.1f}%".format(100 * accuracy))
print("Classification F1-score {:.1f}%".format(100 * avg_f1_score))
print()
print("             Class              =  F1  | Recall | Precision")
print("         --------------------------------------------------")
for idx, lulctype in enumerate([class_names[idx] for idx in class_labels]):
    line_data = (lulctype, f1_scores[idx] * 100, recall[idx] * 100, precision[idx] * 100)
    print("         * {0:20s} = {1:2.1f} |  {2:2.1f}  | {3:2.1f}".format(*line_data))

In [None]:
## Confusion Matrix
# visual inspo: https://github.com/sentinel-hub/eo-learn/blob/master/examples/land-cover-map/SI_LULC_pipeline.ipynb

## Other Exercises

Unnamed: 0,Count,Percentage (%)
2,36934,76.0
255,5880,12.0
1,3361,7.0
0,2629,5.0


In [23]:
# create a comb mosaic
def mosaic_tif(tifs_to_mosaic, outpath):

    ''''
    Takes in a list of raster files and
    merges them to form a single tif.

    '''
    dir = f'../tmp/ghana/preds/mosaic/'
    reader_mode = []

    for file in tifs_to_mosaic:
        src = rs.open(dir+file)
        reader_mode.append(src) 
    print(f'Merging {len(reader_mode)} tifs.')

    mosaic, out_transform = merge(reader_mode)
    date = datetime.today().strftime('%Y-%m-%d')
    outpath = f"{dir}{outpath}_{date}.tif"
    out_meta = src.meta.copy()  
    out_meta.update({'driver': "GTiff",
                     'dtype': 'uint8',
                     'height': mosaic.shape[1],
                     'width': mosaic.shape[2],
                     'transform': out_transform,
                     'compress':'lzw',
                     'nodata': 255})

    with rs.open(outpath, "w", **out_meta) as dest:
        dest.write(mosaic)

    # Ensure to close all files
    for src in reader_mode:
        src.close()

    return None

In [25]:
tifs = ['pd_north_v27_2024-08-23.tif',
        'pd_east_v27_2024-08-23.tif',
        'pd_west_v27_2024-08-23.tif',
       ]

mosaic_tif(tifs, 'ghana_v27')

[<open DatasetReader name='../tmp/ghana/preds/mosaic/pd_north_v27_2024-08-23.tif' mode='r'>, <open DatasetReader name='../tmp/ghana/preds/mosaic/pd_east_v27_2024-08-23.tif' mode='r'>, <open DatasetReader name='../tmp/ghana/preds/mosaic/pd_west_v27_2024-08-23.tif' mode='r'>]
Merging 3 tifs.
