### Step 1 : Import required libraries

In [1]:
import os
import traceback
from pathlib import Path

import pandas as pd
from osgeo import gdal
from helpers import *
from imputation import Imputation as imp
import numpy as np
from experiment import Imputation

### Step 2 :  The below code is to conduct experiments on each dataset 

- for each file in the input folder 
      for each missing percent(from 10% to 50%)
         create missing pixels 
         call imputation class function to predict missing pixels 
         get the results and store the statistics in a dataframe for further analysis 
         

In [None]:


# Create an empty DataFrame to store results
finalDataframe = pd.DataFrame(columns=['fileName', 'missingPercent', 'algo', 'RSE', 'runTime'])

# Loop through files in the specified path
for filename in os.listdir(path):
    if filename.endswith('.IMG'):
        print(filename)
        
        # Iterate over missing percentage values (10 to 50, step 10)
        for missingPercent in range(10, 60, 10):
            filePath = os.path.join(path, filename)
            
            # Create an instance of the Imputation class
            impute = Imputation(inputFile=filePath, outputFile=path)
            
            # Create missing pixels in the image
            impute.createMissingPixels(percent=missingPercent)
            
            # Predict missing pixels using the specified algorithms
            results = impute.predictMissingPixels(outputFolder=path)
            
            # Concatenate the results DataFrame with the new results
            finalDataframe = pd.concat([finalDataframe, pd.DataFrame(results)], ignore_index=True)
        
        # print('------------------1file---------------------')

# Save the final results DataFrame to a TSV file
finalDataframe.to_csv('imputation_ChandDataset2_Results.tsv', sep='\t')


### Step 4 : Creating heat map for the above obtained results 

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize
from osgeo import gdal



# function to calculate RMSE values of each pixel in the image
def calculate_rmse(image1, image2):
    mse = np.mean((image1 - image2) ** 2)
    rmse = np.sqrt(mse)
    return rmse


# Filepath of images 
orig_image = '/Users/bunny/PycharmProjects/Imputation/data_/MI/dataset2/Area2_MI_MAP_03_N22E196N21E197SC.tif'
predicted_image = '/Users/bunny/PycharmProjects/Imputation/data_/MI/dataset2/CMSI/Area2_MI_MAP_03_N22E196N21E197SC.tif/10CMSI.tif'


# read images using gdal and scale factor of the file 
#(The data is read as (9,1048,1048) i.e, color bands,width, height) using the gdal library in order to visualize the image using matplotlib I have transformed the image 
predicted_image = gdal.Open(predicted_image).ReadAsArray().T
original_image = gdal.Open(orig_image).ReadAsArray().T * 0.00002
original_image = original_image[:403,:403,:]


# Read it as numpy array 
original_array = np.array(original_image)
predicted_array = np.array(predicted_image)

# call the rmse function with the inputs 
rmse_array = np.sqrt(np.mean((original_array - predicted_array) ** 2, axis=-1))

# Create heat map
plt.figure(figsize=(12, 8))

plt.subplot(1, 3, 1)
plt.imshow(original_array[:, :, :3])
plt.title('Original Image')
plt.axis('off')

plt.subplot(1, 3, 2)
plt.imshow(predicted_array[:, :, :3])
plt.title('Predicted Image')
plt.axis('off')

plt.subplot(1, 3, 3)
plt.imshow(rmse_array, cmap='hot', interpolation='nearest')
plt.colorbar(label='RMSE Value')
plt.title('RMSE Heatmap')

plt.tight_layout()
plt.show()
