# Preprocess the simulated 4dstem

Chia-Hao Lee

cl2696@cornell.edu

Created 2025.02.18

# 01. Imports

In [1]:
%reload_ext autoreload
%autoreload 2

import os

import h5py
import numpy as np
import matplotlib.pyplot as plt
from scipy.ndimage import gaussian_filter

In [2]:
import os
work_dir = "H:\workspace\ptyrad"
os.chdir(work_dir)
print("Current working dir: ", os.getcwd())

Current working dir:  H:\workspace\ptyrad


In [3]:
def meas_add_poisson_noise(meas, unit, value, scan_step_size):
    ''' Add Poisson noise to meas '''
    # meas (N, ky, kx)
    # value, scalar float
    # scan_step_size, scalar float, unit: Ang
    
    if unit == 'total_e_per_pattern':
        total_electron = value
        dose = total_electron / scan_step_size **2
    elif unit == 'e_per_Ang2':
        dose = value
        total_electron =  dose * scan_step_size **2 # Number of electron per diffraction pattern
    else:
        raise ValueError(f"Unsupported unit: '{unit}' for Poisson noise. Expected 'total_e_per_pattern' or 'e_per_Ang2'.")
    
    print(f"total electron per measurement = dose x scan_step_size^2 = {dose:.3f}(e-/Ang^2) x {scan_step_size:.3f}(Ang)^2 = {total_electron:.3f}")
    meas = meas / meas.sum((-2,-1))[:,None,None] # Make each slice of the meas to sum to 1
    meas = np.random.poisson(meas * total_electron)
    print(f"Adding Poisson noise with a total electron per diffraction pattern of {int(total_electron)}")
    
    return meas

## Combine the hdf5 into a single one

In [4]:
hdf5_dir = 'data\paper\simu_tBL_WSe2/'

hdf5s = []
for file in os.listdir(hdf5_dir):
    if file.startswith('phonon_temporal_N16384_dp128_start_'):
        hdf5s.append(file)
hdf5s.sort()

In [5]:
# Retrieve cbeds
cbeds_resample = np.zeros([16384,128,128], dtype=np.float32)
for i,file in enumerate(hdf5s):
    with h5py.File(os.path.join(hdf5_dir, file), 'a') as hf:
        dp = hf['/dp'][:]
        cbeds_resample[i*1024:(i+1)*1024,:,:] = dp

In [6]:
# Retrieve metadata
with h5py.File(os.path.join(hdf5_dir, file), 'a') as hf:
    potential_resample = hf['/full_volume'][()]
    potential_crop = hf['/volume'][()]
    gt_phase = hf['/gt_phase'][()]
    abtem_params = {}
    for key, value in hf['abtem_params'].items():
        abtem_params[key] = hf['abtem_params'][key][()]

In [7]:
# Apply partial spatial coherence
N_scan_slow, N_scan_fast = 128, 128
source_size_std_ang = 0.34 # Ang
scan_step_size = 0.429 # Ang

cbeds_resample = cbeds_resample.reshape(N_scan_slow, N_scan_fast, cbeds_resample.shape[-2], cbeds_resample.shape[-1])
source_size_std_px = source_size_std_ang / scan_step_size # The source size blur std is now in unit of scan steps
cbeds_resample = gaussian_filter(cbeds_resample, sigma=source_size_std_px, axes=(0,1)) # Partial spatial coherence is approximated by mixing DPs at nearby probe positions
print(f"Adding source size (partial spatial coherence) of Gaussian blur std = {source_size_std_px:.4f} scan_step sizes or {source_size_std_ang:.4f} Ang to measurements along the scan directions")
cbeds_resample = cbeds_resample.reshape(-1, cbeds_resample.shape[-2], cbeds_resample.shape[-1])
print(f"Reshape measurements back to (N, ky, kx) = {cbeds_resample.shape}")
abtem_params['use_partial_spatial_source'] = True

Adding source size (partial spatial coherence) of Gaussian blur std = 0.7925 scan_step sizes or 0.3400 Ang to measurements along the scan directions
Reshape measurements back to (N, ky, kx) = (16384, 128, 128)


In [8]:
output_path = os.path.join(hdf5_dir, 'phonon_temporal_spatial_N16384_dp128.hdf5')
with h5py.File(output_path, 'a') as hf:
    hf.create_dataset('/full_volume',   data = potential_resample)
    hf.create_dataset('/volume',        data = potential_crop)
    hf.create_dataset('/gt_phase',      data = gt_phase)
    hf.create_dataset('/dp',            data = cbeds_resample)
    param_group = hf.create_group('abtem_params')
    for key,value in abtem_params.items():
        param_group.create_dataset(key, data=value)
print(f"Saved hdf5 as {output_path}")

Saved hdf5 as data\paper\simu_tBL_WSe2/phonon_temporal_spatial_N16384_dp128.hdf5


## Add noise for a single file

In [9]:
output_path = 'data\paper\simu_tBL_WSe2/phonon_temporal_spatial_N16384_dp128.hdf5'
scan_step_size = 0.429 # Ang

# Open the HDF5 file in append mode to read and write
with h5py.File(output_path, 'a') as hf:
    # Load the '/dp' dataset
    dp = hf['/dp'][:]
    
    for dose in [1e4, 1e5, 1e6, 1e7]:
        print(f"Preprocessing with dose = {dose}")
        dp_noise = meas_add_poisson_noise(dp, unit='e_per_Ang2', value=dose, scan_step_size=scan_step_size)
        
        # Normalizing the meas_data so that the averaged DP has max at 1. This will make each DP has max somewhere ~ 1
        normalization_const = (np.mean(dp_noise, 0).max())
        dp_noise = dp_noise / normalization_const 
        dp_noise = dp_noise.astype('float32')
        print(f"Normalizing measurements by {normalization_const:.8g} so the averaged measurement has max intensity at 1 for ease of display/comparison")
    
        # Save the processed data back to the same HDF5 as new datasets
        # Check if the dataset already exists, then delete it to overwrite
        if f'/dp_{dose:.0e}' in hf:
            del hf[f'/dp_{dose:.0e}']
        hf.create_dataset(f'/dp_{dose:.0e}', data=dp_noise)

Preprocessing with dose = 10000.0
total electron per measurement = dose x scan_step_size^2 = 10000.000(e-/Ang^2) x 0.429(Ang)^2 = 1840.410
Adding Poisson noise with a total electron per diffraction pattern of 1840
Normalizing measurements by 4.4510498 so the averaged measurement has max intensity at 1 for ease of display/comparison
Preprocessing with dose = 100000.0
total electron per measurement = dose x scan_step_size^2 = 100000.000(e-/Ang^2) x 0.429(Ang)^2 = 18404.100
Adding Poisson noise with a total electron per diffraction pattern of 18404
Normalizing measurements by 44.168945 so the averaged measurement has max intensity at 1 for ease of display/comparison
Preprocessing with dose = 1000000.0
total electron per measurement = dose x scan_step_size^2 = 1000000.000(e-/Ang^2) x 0.429(Ang)^2 = 184041.000
Adding Poisson noise with a total electron per diffraction pattern of 184040
Normalizing measurements by 440.85614 so the averaged measurement has max intensity at 1 for ease of displ