In [4]:
import rasterio
import pandas as pd
import numpy as np
import os
from datetime import datetime

# Specify the base output path
output_base_path = r'Z:\Thesis\Data\GEE\MERRA2_aer\MERRA2_num_data'

# Specify the file path to your TIFF image
file_path = r"C:\Users\Charl\Downloads\2016123023.tif"  # Replace with the actual path to your TIFF file

# Extract year and month from the filename (Try to handle different formats)
file_name = os.path.basename(file_path)

# Print the filename to help debug
print(f"Filename: {file_name}")

try:
    # Assuming the filename contains a date in the format YYYYMMDD, we search for it
    date_part = ''.join(filter(str.isdigit, file_name))  # Extract only digits from the filename

    # Extract the date, assuming it's the first 8 digits in YYYYMMDD format
    date_obj = datetime.strptime(date_part[:8], "%Y%m%d")
    year = date_obj.strftime("%Y")
    month = date_obj.strftime("%m")
except (ValueError, IndexError):
    print("Could not extract date from filename. Please check the filename format.")
    year, month = "unknown", "unknown"  # Default values if date extraction fails

# Create directories for year and month if they don't exist
output_path = os.path.join(output_base_path, year, month)
os.makedirs(output_path, exist_ok=True)

# List of band names (50 bands corresponding to the MERRA-2 imagery data)
band_names = [
    "BCANGSTR", "BCCMASS", "BCEXTTAU", "BCFLUXU", "BCFLUXV", "BCSCATAU", "BCSMASS", "DMSCMASS", "DMSSMASS",
    "DUANGSTR", "DUCMASS25", "DUCMASS", "DUEXTT25", "DUEXTTAU", "DUFLUXU", "DUFLUXV", "DUSCAT25", "DUSCATAU",
    "DUSMASS25", "DUSMASS", "OCANGSTR", "OCCMASS", "OCEXTTAU", "OCFLUXU", "OCFLUXV", "OCSCATAU", "OCSMASS",
    "SO2CMASS", "SO2SMASS", "SO4CMASS", "SO4SMASS", "SSANGSTR", "SSCMASS25", "SSCMASS", "SSEXTT25", "SSEXTTAU",
    "SSFLUXU", "SSFLUXV", "SSSCAT25", "SSSCATAU", "SSSMASS25", "SSSMASS", "SUANGSTR", "SUEXTTAU", "SUFLUXU",
    "SUFLUXV", "SUSCATAU", "TOTANGSTR", "TOTEXTTAU", "TOTSCATAU"
]

# Ensure that the number of bands matches the number of labels
assert len(band_names) == 50, "The number of band names must match the number of bands in the imagery."

# Initialize an empty list to store the flattened band data
band_data = []

# Open the MERRA-2 TIFF file and process the bands
with rasterio.open(file_path) as src:
    for i in range(1, src.count + 1):  # Loop through all the bands in the TIFF file
        band = src.read(i)  # Read the current band
        band_flat = band.flatten()  # Flatten the 2D band data into 1D
        band_data.append(band_flat)  # Append the flattened data to the list

# Convert the list of bands to a DataFrame with the band names as columns
df = pd.DataFrame(np.array(band_data).T, columns=band_names)

# Generate the output file path based on year and month
csv_file_path = os.path.join(output_path, f"merra2_numerical_data_{year}_{month}.csv")

# Save the DataFrame to a CSV file
df.to_csv(csv_file_path, index=False)

print(f"CSV file saved at: {csv_file_path}")


Filename: 2016123023.tif
CSV file saved at: Z:\Thesis\Data\GEE\MERRA2_aer\MERRA2_num_data\2016\12\merra2_numerical_data_2016_12.csv
