# Automated Gaussian Decomposition (`gausspy`) of TIGRESS-NCR spectra
Trey V. Wenger - October 2024

Run in `gausspy` environment:
```bash
conda activate gausspy
```

In [1]:
import pickle
import gausspy.gp as gp
import numpy as np

outdir = "/media/drive1/tigress_ncr_results/R8_4pc"

# trained parameters from Murray et al. (2018)
a_emission = 3.75
snr_emission = 5.0
a1_absorption = 1.12
a2_absorption = 2.75
snr_absorption = 3.0

# restrict TB to realistic values from Murray et al. (2018)
max_tb = "max"

# allow widths to vary by 10% between from Murray et al. (2018)
p_width = 0.1

# allow centroids to vary by 10% from Murray et al. (2018)
# gausspy parameterizes this in terms of number of channels
# not clear how a 10% variation applies to mean velocity since velocity can be zero
# so let's use the gausspy default of 2 channels
d_mean = 2

# drop emission components within 1 native channel of absorption components from Murray et al. (2018)
drop_width = 4

In [2]:
# Joint decomposition
g_joint = gp.GaussianDecomposer()
g_joint.set("phase", "two")
g_joint.set("alpha1", a1_absorption)
g_joint.set("alpha2", a2_absorption)
g_joint.set("SNR_thresh", snr_absorption)
g_joint.set("SNR2_thresh", snr_absorption)
g_joint.set("alpha_em", a_emission)
g_joint.set("SNR_em", snr_emission)
g_joint.set("max_tb", max_tb)
g_joint.set("p_width", p_width)
g_joint.set("d_mean", d_mean)
g_joint.set("drop_width", drop_width)

## Prepare spectra
Interpolate to 0.1 km/s resolution and convert absorption spectra to optical depth.

In [3]:
import pickle
import numpy as np
from tqdm.notebook import tqdm

datatypes = ["mismatched", "matched"]
fwhms = ["1pix", "3pix", "10pix"]

for datatype in datatypes:
    for fwhm in fwhms:
        # Interpolate spectra to 0.1 km/s resolution
        with open(f"{outdir}/HI_{datatype}_spectra_{fwhm}.pkl", "rb") as f:
            data = pickle.load(f)

        print(datatype, fwhm)
        pbar = tqdm(total=len(data['data_list']))
        
        for idx in range(len(data['data_list'])):
            for feature in ['', '_em']:
                x_values = data[f"x_values{feature}"][idx]
                data_list = data[f"data_list{feature}"][idx]
                errors = data[f"errors{feature}"][idx]
                if x_values[0] > x_values[1]:
                    x_values = x_values[::-1]
                    data_list = data_list[::-1]
                    errors = errors[::-1]
                start = x_values[0]
                end = x_values[-1]
                new_x_values = np.arange(start, end, 0.1) # km/s
                data[f"data_list{feature}"][idx] = np.interp(new_x_values, x_values, data_list)
                data[f"errors{feature}"][idx] = np.interp(new_x_values, x_values, errors)
                data[f"x_values{feature}"][idx] = new_x_values

            # And change absorption from 1-exp(-tau) to tau
            data[f"data_list"][idx] = -np.log(1.0-data[f"data_list"][idx])
            data[f"errors"][idx] *= 1.0 / np.abs(1.0 - data[f"data_list"][idx])

            # drop nans
            isnan = np.isnan(data[f"data_list"][idx])
            data[f"data_list"][idx][isnan] = 10.0
            data[f"errors"][idx][isnan] = 10.0
            
            pbar.update(1)
        
        with open(f"{outdir}/HI_{datatype}_spectra_{fwhm}_resampled.pkl", "wb") as f:
            pickle.dump(data, f)

mismatched 1pix


  0%|          | 0/1000 [00:00<?, ?it/s]

  data[f"data_list"][idx] = -np.log(1.0-data[f"data_list"][idx])


mismatched 3pix


  0%|          | 0/1000 [00:00<?, ?it/s]

mismatched 10pix


  0%|          | 0/1000 [00:00<?, ?it/s]

matched 1pix


  0%|          | 0/1000 [00:00<?, ?it/s]

matched 3pix


  0%|          | 0/1000 [00:00<?, ?it/s]

matched 10pix


  0%|          | 0/1000 [00:00<?, ?it/s]

## Gaussian decomposition

In [3]:
datatypes = ["mismatched", "matched"]
fwhms = ["1pix", "3pix", "10pix"]

for datatype in datatypes:
    for fwhm in fwhms:
        print(datatype, fwhm)
        decomposed_joint = g_joint.batch_decomposition(f"{outdir}/HI_{datatype}_spectra_{fwhm}_resampled.pkl")
        with open(f"{outdir}/HI_{datatype}_spectra_{fwhm}_resampled_agd.pkl", "wb") as f:
            pickle.dump(decomposed_joint, f)

mismatched 1pix
using 18 out of 24 cpus


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 999/999 [03:34<00:00, 4.65it/s]
999it [00:00, 826125.73it/s]


SUCCESS
100 finished.%
mismatched 3pix
using 18 out of 24 cpus


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 999/999 [03:35<00:00, 4.64it/s]
999it [00:00, 1105861.62it/s]


SUCCESS
100 finished.%
mismatched 10pix
using 18 out of 24 cpus


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 999/999 [18:42<00:00, 1.12s/it]
999it [00:00, 1305330.12it/s]


SUCCESS
100 finished.%
matched 1pix
using 18 out of 24 cpus


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 999/999 [17:54<00:00, 1.08s/it]
999it [00:00, 1150181.09it/s]


SUCCESS
100 finished.%
matched 3pix
using 18 out of 24 cpus


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 999/999 [17:30<00:00, 1.05s/it]
999it [00:00, 1311458.43it/s]


SUCCESS
100 finished.%
matched 10pix
using 18 out of 24 cpus


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 999/999 [17:25<00:00, 1.05s/it]
999it [00:00, 1281379.11it/s]

SUCCESS
100 finished.%



