In [9]:
import zipfile
import pandas as pd
import hyperspy.api as hs
import os
import csv

from typing import Dict, Any

def recursive_search(d: Dict[str, Any], target_key: str) -> Any:
    for key, value in d.items():
        if key == target_key:
            return value
        if isinstance(value, dict):
            found = recursive_search(value, target_key)
            if found is not None:
                return found
    return None

def extract_and_write_metadata(conversion_csv, input_path, output_csv):
    df = pd.read_csv(conversion_csv)
    hyperspy_names = df['hyperspy'].dropna().tolist()
    
    with open(output_csv, 'w', newline='') as csvfile:
        fieldnames = ['filename'] + hyperspy_names
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()

    if input_path.endswith('.zip'):
        # Create a temporary directory to extract files
        with tempfile.TemporaryDirectory() as tmpdirname:
            with zipfile.ZipFile(input_path, 'r') as zip_ref:
                zip_ref.extractall(tmpdirname)
                
                for filename in os.listdir(tmpdirname):
                    if filename.endswith('.tif'):
                        file_path = os.path.join(tmpdirname, filename)
                        image = hs.load(file_path)
                        om = image.original_metadata.as_dictionary()
                        metadata_dict = {'filename': filename}
                            for hyperspy_name in hyperspy_names:
                                value = recursive_search(om, hyperspy_name)
                                if isinstance(value, tuple) and len(value) > 1:
                                    value = value[1]
                                metadata_dict[hyperspy_name] = value if value is not None else 'N/A'
                            writer.writerow(metadata_dict)
        else:
            for filename in os.listdir(input_path):
                if filename.endswith('.tif'):
                    image_path = os.path.join(input_path, filename)
                    image = hs.load(image_path)
                    om = image.original_metadata.as_dictionary()
                    metadata_dict = {'filename': filename}
                    for hyperspy_name in hyperspy_names:
                        value = recursive_search(om, hyperspy_name)
                        if isinstance(value, tuple) and len(value) > 1:
                            value = value[1]
                        metadata_dict[hyperspy_name] = value if value is not None else 'N/A'
                    writer.writerow(metadata_dict)
                    

In [10]:
conversion_csv = "/Users/elias/Downloads/hyperspy_metadata_variable_names - Sheet1.csv"
# input_path = "/Users/elias/Desktop/MatWerk_Projects/metadata_SEM_LM_Alexey/SEM_20230207to20230222"  # Replace with the path to your ZIP file containing SEM images
input_path = "/Users/elias/Desktop/MatWerk_Projects/metadata_SEM_LM_Alexey/SEM_20230207to20230222/csv_test_images.zip"
output_csv = "/Users/elias/Desktop/MatWerk_Projects/metadata_SEM_LM_Alexey/results/metadata_summary.csv"  # Replace with the path where you want to save the summary CSV
extract_and_write_metadata(conversion_csv, input_path, output_csv)

<zipfile.ZipExtFile name='20220404_Pbmrk_TS_250x_01.tif' mode='r' compress_type=deflate> 20220404_Pbmrk_TS_250x_01.tif


ValueError: The filenames parameter must be a list, tuple, string or None, not <class 'zipfile.ZipExtFile'>

In [66]:
imgPath = '/Users/elias/Desktop/MatWerk_Projects/metadata_SEM_LM_Alexey/SEM_20230207to20230222/20220404_Pbmrk_TS_SE_95x_02.tif'
f = hs.load(imgPath)
om = f.original_metadata.as_dictionary()

om

{'NewSubfileType': <FILETYPE.UNDEFINED: 0>,
 'ImageWidth': 1024,
 'ImageLength': 768,
 'BitsPerSample': 8,
 'Compression': <COMPRESSION.NONE: 1>,
 'PhotometricInterpretation': <PHOTOMETRIC.PALETTE: 3>,
 'StripOffsets': (101136,),
 'Orientation': <ORIENTATION.TOPLEFT: 1>,
 'SamplesPerPixel': 1,
 'RowsPerStrip': 4294967295,
 'StripByteCounts': (786432,),
 'XResolution': (1, 1),
 'YResolution': (1, 1),
 'ResolutionUnit': <RESUNIT.NONE: 1>,
 'ColorMap': array([[    0,   257,   514,   771,  1028,  1285,  1542,  1799,  2056,
          2313,  2570,  2827,  3084,  3341,  3598,  3855,  4112,  4369,
          4626,  4883,  5140,  5397,  5654,  5911,  6168,  6425,  6682,
          6939,  7196,  7453,  7710,  7967,  8224,  8481,  8738,  8995,
          9252,  9509,  9766, 10023, 10280, 10537, 10794, 11051, 11308,
         11565, 11822, 12079, 12336, 12593, 12850, 13107, 13364, 13621,
         13878, 14135, 14392, 14649, 14906, 15163, 15420, 15677, 15934,
         16191, 16448, 16705, 16962, 17219,