In [1]:
import os
# import scipy
import numpy as np
import rasterio
from sklearn.metrics import confusion_matrix
import pandas as pd
from sklearn.metrics import jaccard_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from tqdm import tqdm

In [2]:
## DICTIONARY FOR RECLASSIFICATION ##

fmask_reclass_dict = {
    0: 0, # clear land
    1: 0, # clear water
    2: 0, # cloud shadow
    3: 0, # snow
    4: 1, # cloud
    255: 0 # fill
}

manual_reclass_dict = {
    0: 0, # fill
    64: 0, # cloud shadow
    128: 0, # clear
    192: 1, # thin cloud
    255: 1, # cloud
}

In [3]:
top_dir = r'D:\BIOME-dataset-images\fmask_run'
folders = os.listdir(top_dir)

results_csv_path = 'results.csv'

if not os.path.exists(results_csv_path):
    # Create a pandas dataframe to store the results
    results_df = pd.DataFrame(columns=['Image', 'Accuracy', 'Precision', 'Recall', 'F1 Score', 'Jaccard Score', 'Kappa Score', 'ROC AUC Score'])
    # Write the header to the CSV file
    results_df.to_csv(results_csv_path, index=False)
else:
    # Read the results DataFrame from the CSV file
    results_df = pd.read_csv(results_csv_path)

    # read through images, and if the image is already in the csv, remove it from the list of images to process
    for index, row in results_df.iterrows():
        if row['Image'] in folders:
            folders.remove(row['Image'])


# Loop through each folder in the directory
for folder in tqdm(folders, desc='Processing Images'):
    # Get the folder path
    folder_path = os.path.join(top_dir, folder)
    # print(folder_path)

    # Get the files in the folder
    files = os.listdir(folder_path)
#     for file in files:
#         print(file)

    # Get the fixedmask.img with rasterio
    fixedmaskstr = ''
    for file in files:
        if '_fixedmask.img' in file:
            fixedmaskstr = file
    manual_classified_path = os.path.join(folder_path, fixedmaskstr)
    manual_classified = rasterio.open(manual_classified_path)
    manual_classified = manual_classified.read(1)

    # Get the fmask image
    fmaskstr = ''
    for file in files:
        if '_Fmask4.tif' in file:
            fmaskstr = file
    fmask_path = os.path.join(folder_path, fmaskstr)
    fmask = rasterio.open(fmask_path)
    fmask = fmask.read(1)

    # Reclassify the fmask image
    fmask_reclassified = np.vectorize(fmask_reclass_dict.get)(fmask)

    # Reclassify the manual_classified image
    manual_classified_reclassified = np.vectorize(manual_reclass_dict.get)(manual_classified)

    # Calculate the confusion matrix
    confusion_matrix_result = confusion_matrix(manual_classified_reclassified.flatten(), fmask_reclassified.flatten())

    # Calculate the accuracy
    accuracy = accuracy_score(manual_classified_reclassified.flatten(), fmask_reclassified.flatten())

    # Calculate the precision
    precision = precision_score(manual_classified_reclassified.flatten(), fmask_reclassified.flatten())

    # Calculate the recall
    recall = recall_score(manual_classified_reclassified.flatten(), fmask_reclassified.flatten())

    # Calculate the F1 score
    f1 = f1_score(manual_classified_reclassified.flatten(), fmask_reclassified.flatten())

    # Calculate the jacard index
    jaccard = jaccard_score(manual_classified_reclassified.flatten(), fmask_reclassified.flatten())

    # Calculate the kappa score
    kappa = cohen_kappa_score(manual_classified_reclassified.flatten(), fmask_reclassified.flatten())

    # Calculate the roc auc score
    if len(np.unique(manual_classified_reclassified)) == 1:
        roc_auc = -999
    else:
        roc_auc = roc_auc_score(manual_classified_reclassified.flatten(), fmask_reclassified.flatten())

    # Create a new DataFrame with the calculated metrics
    metrics_df = pd.DataFrame({
        'Image': [folder],
        'Accuracy': [accuracy],
        'Precision': [precision],
        'Recall': [recall],
        'F1 Score': [f1],
        'Jaccard Score': [jaccard],
        'Kappa Score': [kappa],
        'ROC AUC Score': [roc_auc]
    })

    # Write the new metrics DataFrame to the CSV file
    metrics_df.to_csv('results.csv', mode='a', header=False, index=False)



Processing Images:   0%|          | 0/29 [00:00<?, ?it/s]

  dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
  dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
  dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
  dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
  dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
  dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
  dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
  dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
  dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
  dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
  dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
  dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
  dataset = DatasetReader(path, driver=driver, sharing=sharing, **kwargs)
  dataset = DatasetReader(path, driver

KeyboardInterrupt: 