# Testing Various Extraction Methods

1. PIL Image tags
2. Using the [Zeiss Metadata TIFF Reader](https://github.com/cgohlke/tifffile)
3. Using Hyperspy

In [124]:
# imgPath = '/Users/elias/Desktop/MatWerk_Projects/SEMtestImages/P10_ID01#4-PtPd_080.tif'
imgPath = '/Users/elias/Desktop/MatWerk_Projects/SEMtestImages/Au-Gr_06.tif'

# PIL Image Tags


In [125]:
from PIL import Image
from PIL.ExifTags import TAGS

In [162]:
image = Image.open(imgPath)
tags  = image.tag_v2

# 34118, 34682

# for tag, value in tags.items():
#     print(f"Tag: {tag}, Value: {value}")
#     print('\n' + '-'*50 + '\n')

In [127]:
def pil_extract(file_path) -> dict:
    try:
        with Image.open(file_path) as img:
            # Extract metadata from tags 34118 and 34682
            tags_to_check = [34118, 34682]
            metadata_strs = [img.tag_v2.get(tag) for tag in tags_to_check if img.tag_v2.get(tag)]
                
            # Process the metadata strings
            metadata = {}
            for metadata_str in metadata_strs:
                lines = metadata_str.split("\n")
                for line in lines:
                    if "=" in line:
                        key, value = line.split("=", 1)
                        key = key.strip()
                        values = tuple(val.strip() for val in value.split())
                        if len(values) == 1:
                            metadata[key] = values[0]
                        else:
                            metadata[key] = values
                    elif ":" in line:
                        key, value = line.split(":", 1)
                        key = key.strip()
                        values = tuple(val.strip() for val in value.split())
                        if len(values) == 1:
                            metadata[key] = values[0]
                        else:
                            metadata[key] = values
            return metadata

    except Exception as e:
        return {"error": str(e)}

In [128]:
PIL_md = pil_extract(imgPath)

len(PIL_md)

721

## Zeiss Metadata Reader

In [129]:
import zeisstiffmeta as ztm
def zeiss_extract(file_path: str) -> dict:
    """
    Extracts metadata using the Zeiss TIFF reader method.

    Args:
        file_path (str): Path to the TIFF file.

    Returns:
        dict: Extracted metadata as a Python dictionary.
    """
    try:
        # Extract metadata from the TIFF file using zeiss_tiff_meta
        metadata_list = ztm.zeiss_meta(file_path)
        
        # Convert the metadata list to a dictionary
        metadata = ztm.meta_to_dict_all(metadata_list)
        
        return metadata

    except Exception as e:
        return {"error": str(e)}

In [130]:
# zeiss_md = zeiss_extract(imgPath)
# zeiss_md.pop('xpts')
# zeiss_md.pop('ypts')
# zeiss_md.pop('height')
# zeiss_md.pop('width')

## Hyperspy Method

In [154]:
import hyperspy.api as hs

f = hs.load(imgPath)

# hs_md_raw = f.original_metadata.as_dictionary().get('fei_metadata', {})
hs_md_raw = f.original_metadata.as_dictionary().get('CZ_SEM', {})
hs_md_raw.pop('', None)

(0,
 0,
 0,
 9.183126e-10,
 399850.2,
 6,
 5000.0,
 2.25,
 2e-07,
 0.004021607,
 1,
 9.183126e-10,
 399850.2,
 6,
 5000.0,
 2.25,
 2e-07,
 0.004021607,
 2,
 3.696236e-06,
 99.3409,
 6,
 0.0,
 2.25,
 2e-07,
 0.00246,
 3,
 3.696236e-06,
 99.3409,
 6,
 0.0,
 2.25,
 2e-07,
 0.00246,
 724)

In [132]:
def flatten_dict(d):
    """
    Flattens a nested dictionary by removing the top-level keys.
    
    Args:
        d (dict): The input nested dictionary.
        
    Returns:
        dict: The flattened dictionary with top-level keys removed.
    """
    flattened = {}
    for k, v in d.items():
        if isinstance(v, dict):
            flattened.update(flatten_dict(v))
        else:
            flattened[k] = v
    return flattened

In [165]:
hs_md = flatten_dict(hs_md_raw)
print(f'Length of HS metadata: {len(hs_md)}, Length of PIL metadata: {len(PIL_md)}')

Length of HS metadata: 722, Length of PIL metadata: 721


In [160]:
hyperspy_keys = []
for item in hs_md:
    hyperspy_keys.append(hs_md[item][0])
    
hyperspy_keys

['Vent inhibit',
 'OptiBeam Mode',
 'VP Aperture',
 'Input LUT Mode',
 'BSD Autolevel Mode',
 'Recipe',
 'Signal B',
 'BSD Fast',
 'Opt.Aperture',
 'Track Z',
 'EPSE Electrode',
 'C3 Rot Corrn.',
 'Fisheye Mode',
 'Stage Tilt',
 'Argon Gun',
 'Column Isolated',
 'Mixing',
 'Flood Gun Mode Target',
 'STEM Gain',
 'High Current',
 'Dwell Time',
 'SmartImage Noise Reduction',
 'Right Frozen',
 'Flood Gun Blanked',
 'Fast Mode',
 'Compuc. Mode',
 'Anode Aperture Changer',
 'GIS Shutdown State',
 'FIB Column',
 'Stage Tilted',
 'FIB Suppressor is',
 'VPSE detector',
 'Crosshairs',
 'Double Deflection State',
 'WDX Gate Valve posn',
 'HE-SE2 Fitted',
 'EP Gas',
 'Condenser Normal',
 'Invert B',
 'BSD Gain',
 'FIB Gun Valve',
 'Aperture',
 'Electron Counting',
 'Saturate at',
 'Stage Angle corrn.',
 'Store resolution',
 'Field Mode',
 'CoolStage Type',
 'Best Aperture',
 'Flood Gun Actual Mode',
 'STEM Auto',
 'Stage Initialised',
 'GIS Channel with CC',
 'Display',
 'Flood Gun Fitted',
 'BSD

In [161]:
PIL_keys = list(PIL_md)

PIL_keys

['Vent inhibit',
 'OptiBeam Mode',
 'VP Aperture',
 'Input LUT Mode',
 'BSD Autolevel Mode',
 'Recipe',
 'Signal B',
 'BSD Fast',
 'Opt.Aperture',
 'Track Z',
 'EPSE Electrode',
 'C3 Rot Corrn.',
 'Fisheye Mode',
 'Stage Tilt',
 'Argon Gun',
 'Column Isolated',
 'Mixing',
 'Flood Gun Mode Target',
 'STEM Gain',
 'High Current',
 'Dwell Time',
 'SmartImage Noise Reduction',
 'Right Frozen',
 'Flood Gun Blanked',
 'Fast Mode',
 'Compuc. Mode',
 'Anode Aperture Changer',
 'GIS Shutdown State',
 'FIB Column',
 'Stage Tilted',
 'FIB Suppressor is',
 'VPSE detector',
 'Crosshairs',
 'Double Deflection State',
 'WDX Gate Valve posn',
 'HE-SE2 Fitted',
 'EP Gas',
 'Condenser Normal',
 'Invert B',
 'BSD Gain',
 'FIB Gun Valve',
 'Aperture',
 'Electron Counting',
 'Saturate at',
 'Stage Angle corrn.',
 'Store resolution',
 'Field Mode',
 'CoolStage Type',
 'Best Aperture',
 'Flood Gun Actual Mode',
 'STEM Auto',
 'Stage Initialised',
 'GIS Channel with CC',
 'Display',
 'Flood Gun Fitted',
 'BSD

In [141]:
# Things in PIL but not in HS
for k in PIL_keys:
    if k not in hyperspy_keys:
        print(f"key {k} in PIL metadata with value {PIL_md[k]}")

key N in PIL metadata with value 1


In [149]:
# Things in HS but not in PIL
for k in hyperspy_keys:
    if k not in PIL_keys:
        print(k)

In [138]:
PIL_md

{'Vent inhibit': ('Beam', 'Present'),
 'OptiBeam Mode': 'Resolution',
 'VP Aperture': 'No',
 'Input LUT Mode': 'Transparent',
 'BSD Autolevel Mode': 'Normal',
 'Recipe': 'Idle',
 'Signal B': (),
 'BSD Fast': 'No',
 'Opt.Aperture': '2',
 'Track Z': 'Off',
 'EPSE Electrode': 'Ring',
 'C3 Rot Corrn.': 'On',
 'Fisheye Mode': 'Off',
 'Stage Tilt': ('in', 'X'),
 'Argon Gun': 'Absent',
 'Column Isolated': 'No',
 'Mixing': 'Off',
 'Flood Gun Mode Target': ('Low', 'Energy', 'Mode'),
 'STEM Gain': 'Low',
 'High Current': 'Off',
 'Dwell Time': ('100', 'ns'),
 'SmartImage Noise Reduction': 'Off',
 'Right Frozen': 'Yes',
 'Flood Gun Blanked': 'No',
 'Fast Mode': 'Off',
 'Compuc. Mode': 'Off',
 'Anode Aperture Changer': 'Absent',
 'GIS Shutdown State': 'Normal',
 'FIB Column': 'None',
 'Stage Tilted': 'No',
 'FIB Suppressor is': 'Absent',
 'VPSE detector': 'Absent',
 'Crosshairs': 'Off',
 'Double Deflection State': 'Off',
 'WDX Gate Valve posn': 'OK',
 'HE-SE2 Fitted': 'No',
 'EP Gas': 'Air',
 'Cond

### Duplicates

How should this be implemented? Should I identify duplicates and delete them dynamically in the script? Or just use this func to identify what the duplicates are in HS and then always just delete those statically?

Not even true duplicates, so need a way to differentiate

HS Metadata:
* `'ap_dc_shift_y': ('Aperture Align Y', 0.0, '%')`
* `'ap_aperture_align_y': ('Aperture Align Y', 1.3, '%')`

and

*  `'ap_dc_shift_x': ('Aperture Align X', 0.0, '%')`
* `'ap_aperture_align_x': ('Aperture Align X', -10.4, '%')`

PIL Metadata:
* `AP_DC_SHIFT_Y: Aperture Align Y =   0.0 %`
* `AP_APERTURE_ALIGN_Y: Aperture Align Y =   1.3 %`

and

* `AP_DC_SHIFT_X: Aperture Align X =   0.0 %`
* `AP_APERTURE_ALIGN_X: Aperture Align X = -10.4 %`

In [152]:
def ident_duplicates(key_list):
    duplicates = []
    unique_set = set()

    for item in key_list:
        if item in unique_set:
            duplicates.append(item)
        else:
            unique_set.add(item)
    return duplicates

print(ident_duplicates(hyperspy_keys))
print(ident_duplicates(PIL_keys))

['Aperture Align X', 'Aperture Align Y']
[]
