# Training and Saving a Model

In [1]:
import numpy as np
from astropy.io import fits

import sys
sys.path.insert(0, "/home/vmehta/honours-project/pyght/src")

from AnniesLasso.thecannon.model import CannonModel
from AnniesLasso.thecannon.vectorizer.polynomial import PolynomialVectorizer

In [2]:
filepath = '/data/mustard/vmehta/sfh_1000_10_20250925_111440/sfh_1000_10_20250925_111440'
labels = ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"]

data = fits.getdata(f"{filepath}_weights.fits")
data = np.log10(data)

flux = np.load(f"{filepath}_snr_spectra.npy")
ivar = np.load(f"{filepath}_snr_invvar.npy")
wav = np.load(f"{filepath}_wavelength.npy")

vectorizer = PolynomialVectorizer(labels, 2)

In [3]:
model = CannonModel(data, flux, ivar, vectorizer=vectorizer, dispersion=wav)
model.train()

2025-10-21 16:51:17,321 [INFO] Training 10-label CannonModel with 1000 stars and 2222 pixels/star
stty: 'standard input': Inappropriate ioctl for device
2025-10-21 16:51:17,328 [DEBUG] Couldn't get screen size. Progressbar may look odd.
stty: 'standard input': Inappropriate ioctl for device
2025-10-21 16:51:17,328 [DEBUG] Couldn't get screen size. Progressbar may look odd.




(array([[ 9.97571600e-01,  3.30624059e-02,  3.69350379e-03, ...,
          2.29602977e-03,  6.46944665e-04,  1.39950041e-03],
        [ 1.00070612e+00,  2.63573822e-02,  1.64462937e-03, ...,
          2.06978742e-03,  5.81971395e-04,  9.30864693e-04],
        [ 9.99398997e-01,  2.78081510e-02,  2.18464158e-03, ...,
          2.13250326e-03,  6.10230660e-04,  1.02970555e-03],
        ...,
        [ 9.95940771e-01, -5.07346911e-04, -7.81276878e-04, ...,
          2.72925397e-04,  1.38158299e-04, -6.99514944e-05],
        [ 9.96649448e-01, -1.10896837e-03, -1.11763722e-03, ...,
          2.53345439e-04,  1.07741962e-04, -1.11888090e-04],
        [ 9.95888465e-01, -3.20186208e-04, -7.97687032e-04, ...,
          1.85905965e-04,  9.01702331e-05, -5.54061067e-05]],
       shape=(2222, 66)),
 array([0., 0., 0., ..., 0., 0., 0.], shape=(2222,)),
 [{'grad': array([-1.04609654e-11, -2.40543097e-09,  1.22089427e-09,  5.48250334e-10,
           9.30647004e-10,  3.89242527e-10,  7.32152339e-10,  1.

In [4]:
# Inspect model parameters after training, before writing
print('theta:', model._theta if hasattr(model, '_theta') else None)
print('s2:', model._s2 if hasattr(model, '_s2') else None)

theta: [[ 9.97571600e-01  3.30624059e-02  3.69350379e-03 ...  2.29602977e-03
   6.46944665e-04  1.39950041e-03]
 [ 1.00070612e+00  2.63573822e-02  1.64462937e-03 ...  2.06978742e-03
   5.81971395e-04  9.30864693e-04]
 [ 9.99398997e-01  2.78081510e-02  2.18464158e-03 ...  2.13250326e-03
   6.10230660e-04  1.02970555e-03]
 ...
 [ 9.95940771e-01 -5.07346911e-04 -7.81276878e-04 ...  2.72925397e-04
   1.38158299e-04 -6.99514944e-05]
 [ 9.96649448e-01 -1.10896837e-03 -1.11763722e-03 ...  2.53345439e-04
   1.07741962e-04 -1.11888090e-04]
 [ 9.95888465e-01 -3.20186208e-04 -7.97687032e-04 ...  1.85905965e-04
   9.01702331e-05 -5.54061067e-05]]
s2: [0. 0. 0. ... 0. 0. 0.]


In [5]:
# Check if attributes exist using hasattr AND their actual values
print('hasattr _theta:', hasattr(model, '_theta'))
print('hasattr _s2:', hasattr(model, '_s2'))
print('hasattr _training_set_labels:', hasattr(model, '_training_set_labels'))
print('hasattr _training_set_flux:', hasattr(model, '_training_set_flux'))
print('hasattr _training_set_ivar:', hasattr(model, '_training_set_ivar'))
print()
print('getattr _theta type:', type(getattr(model, '_theta', 'NOT FOUND')))
print('getattr _s2 type:', type(getattr(model, '_s2', 'NOT FOUND')))
print('getattr _training_set_labels type:', type(getattr(model, '_training_set_labels', 'NOT FOUND')))
print('getattr _training_set_flux type:', type(getattr(model, '_training_set_flux', 'NOT FOUND')))
print('getattr _training_set_ivar type:', type(getattr(model, '_training_set_ivar', 'NOT FOUND')))
print()
# Check if they are None
print('_theta is None:', getattr(model, '_theta', 'NOT FOUND') is None)
print('_s2 is None:', getattr(model, '_s2', 'NOT FOUND') is None)
print('_training_set_labels is None:', getattr(model, '_training_set_labels', 'NOT FOUND') is None)
print('_training_set_flux is None:', getattr(model, '_training_set_flux', 'NOT FOUND') is None)
print('_training_set_ivar is None:', getattr(model, '_training_set_ivar', 'NOT FOUND') is None)

hasattr _theta: True
hasattr _s2: True
hasattr _training_set_labels: True
hasattr _training_set_flux: True
hasattr _training_set_ivar: True

getattr _theta type: <class 'numpy.ndarray'>
getattr _s2 type: <class 'numpy.ndarray'>
getattr _training_set_labels type: <class 'numpy.ndarray'>
getattr _training_set_flux type: <class 'numpy.ndarray'>
getattr _training_set_ivar type: <class 'numpy.ndarray'>

_theta is None: False
_s2 is None: False
_training_set_labels is None: False
_training_set_flux is None: False
_training_set_ivar is None: False


In [6]:
path = 'model_test.pkl'
model.write(path)

Serializing attribute: vectorizer
  Checking for private attribute: _vectorizer
  hasattr result: True
  Using private attribute _vectorizer
  Got value type: <class 'AnniesLasso.thecannon.vectorizer.polynomial.PolynomialVectorizer'>, is None: False
  Value: ('PolynomialVectorizer', {'label_names': ('1', '2', '3', '4', '5', '6', '7', '8', '9', '10'), 'terms': [[(0, 1)], [(1, 1)], [(2, 1)], [(3, 1)], [(4, 1)], [(5, 1)], [(6, 1)], [(7, 1)], [(8, 1)], [(9, 1
Serializing attribute: censors
  Checking for private attribute: _censors
  hasattr result: True
  Using private attribute _censors
  Got value type: <class 'AnniesLasso.thecannon.censoring.Censors'>, is None: False
  Value: {'label_names': ('1', '2', '3', '4', '5', '6', '7', '8', '9', '10'), 'num_pixels': 2222, 'items': {}}
Serializing attribute: regularization
  Checking for private attribute: _regularization
  hasattr result: True
  Using private attribute _regularization
  Got value type: <class 'NoneType'>, is None: True
  Value:

In [7]:
import pickle

with open('model_test.pkl', 'rb') as f:
    loaded_model = pickle.load(f)

loaded_model

{'vectorizer': ('PolynomialVectorizer',
  {'label_names': ('1', '2', '3', '4', '5', '6', '7', '8', '9', '10'),
   'terms': [[(0, 1)],
    [(1, 1)],
    [(2, 1)],
    [(3, 1)],
    [(4, 1)],
    [(5, 1)],
    [(6, 1)],
    [(7, 1)],
    [(8, 1)],
    [(9, 1)],
    [(0, 2.0)],
    [(0, 1), (1, 1)],
    [(0, 1), (2, 1)],
    [(0, 1), (3, 1)],
    [(0, 1), (4, 1)],
    [(0, 1), (5, 1)],
    [(0, 1), (6, 1)],
    [(0, 1), (7, 1)],
    [(0, 1), (8, 1)],
    [(0, 1), (9, 1)],
    [(1, 2.0)],
    [(1, 1), (2, 1)],
    [(1, 1), (3, 1)],
    [(1, 1), (4, 1)],
    [(1, 1), (5, 1)],
    [(1, 1), (6, 1)],
    [(1, 1), (7, 1)],
    [(1, 1), (8, 1)],
    [(9, 1), (1, 1)],
    [(2, 2.0)],
    [(2, 1), (3, 1)],
    [(2, 1), (4, 1)],
    [(2, 1), (5, 1)],
    [(2, 1), (6, 1)],
    [(2, 1), (7, 1)],
    [(2, 1), (8, 1)],
    [(9, 1), (2, 1)],
    [(3, 2.0)],
    [(3, 1), (4, 1)],
    [(3, 1), (5, 1)],
    [(3, 1), (6, 1)],
    [(3, 1), (7, 1)],
    [(3, 1), (8, 1)],
    [(9, 1), (3, 1)],
    [(4, 2.0)],


In [8]:
# Inspect theta and s2 after loading with pickle
print('theta (pickle):', loaded_model.get('_theta', None) if isinstance(loaded_model, dict) else getattr(loaded_model, '_theta', None))
print('s2 (pickle):', loaded_model.get('_s2', None) if isinstance(loaded_model, dict) else getattr(loaded_model, '_s2', None))

theta (pickle): None
s2 (pickle): None


In [9]:
read_model = CannonModel.read('model_test.pkl')
read_model.test(flux, ivar)

stty: 'standard input': Inappropriate ioctl for device
2025-10-21 16:51:24,120 [DEBUG] Couldn't get screen size. Progressbar may look odd.
2025-10-21 16:51:24,121 [INFO] Running test step on 1000 spectra
2025-10-21 16:51:24,121 [INFO] Running test step on 1000 spectra




2025-10-21 16:52:22,943 [ERROR] Exception within wrapped function
Traceback (most recent call last):
  File "/home/vmehta/honours-project/pyght/src/AnniesLasso/thecannon/utils.py", line 148, in __call__
    result = self.f(*(list(x) + self.args), **self.kwds)
  File "/home/vmehta/honours-project/pyght/src/AnniesLasso/thecannon/fitting.py", line 132, in fit_spectrum
    op_labels, cov, meta, mesg, ier = op.leastsq(
                                      ~~~~~~~~~~^
        x0=(x0 - fiducials)/scales, full_output=True, **kwds)
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/vmehta/.pyenv/versions/sfh-venv/lib/python3.13/site-packages/scipy/optimize/_minpack_py.py", line 448, in leastsq
    retval = _minpack._lmder(func, Dfun, x0, args, full_output,
                             col_deriv, ftol, xtol, gtol, maxfev,
                             factor, diag)
  File "/home/vmehta/honours-project/pyght/src/AnniesLasso/thecannon/fitting.py", line 98, in <lambda>
    

KeyboardInterrupt: 

In [10]:
# Test what __getstate__ returns for numpy arrays
test_array = np.array([1, 2, 3])
print("Array:", test_array)
print("Has __getstate__:", hasattr(test_array, "__getstate__"))
if hasattr(test_array, "__getstate__"):
    state = test_array.__getstate__()
    print("__getstate__ returns:", state)
    print("Type:", type(state))

Array: [1 2 3]
Has __getstate__: True
__getstate__ returns: None
Type: <class 'NoneType'>


In [11]:
## Test the fix - retrain and save the model

In [12]:
# Retrain the model with the fixed code
model2 = CannonModel(data, flux, ivar, vectorizer=vectorizer, dispersion=wav)
model2.train()
print("Model trained successfully")

2025-10-21 16:52:26,917 [INFO] Training 10-label CannonModel with 1000 stars and 2222 pixels/star
stty: 'standard input': Inappropriate ioctl for device
2025-10-21 16:52:26,925 [DEBUG] Couldn't get screen size. Progressbar may look odd.
stty: 'standard input': Inappropriate ioctl for device
2025-10-21 16:52:26,925 [DEBUG] Couldn't get screen size. Progressbar may look odd.


Model trained successfully
Model trained successfully


In [13]:
# Save the model with the fix
path2 = 'model_test_fixed.pkl'
model2.write(path2, overwrite=True)
print("Model written successfully")

Serializing attribute: vectorizer
  Checking for private attribute: _vectorizer
  hasattr result: True
  Using private attribute _vectorizer
  Got value type: <class 'AnniesLasso.thecannon.vectorizer.polynomial.PolynomialVectorizer'>, is None: False
  Value: ('PolynomialVectorizer', {'label_names': ('1', '2', '3', '4', '5', '6', '7', '8', '9', '10'), 'terms': [[(0, 1)], [(1, 1)], [(2, 1)], [(3, 1)], [(4, 1)], [(5, 1)], [(6, 1)], [(7, 1)], [(8, 1)], [(9, 1
Serializing attribute: censors
  Checking for private attribute: _censors
  hasattr result: True
  Using private attribute _censors
  Got value type: <class 'AnniesLasso.thecannon.censoring.Censors'>, is None: False
  Value: {'label_names': ('1', '2', '3', '4', '5', '6', '7', '8', '9', '10'), 'num_pixels': 2222, 'items': {}}
Serializing attribute: regularization
  Checking for private attribute: _regularization
  hasattr result: True
  Using private attribute _regularization
  Got value type: <class 'NoneType'>, is None: True
  Value:

In [14]:
# Load and verify the fixed model
with open('model_test_fixed.pkl', 'rb') as f:
    loaded_model_fixed = pickle.load(f)

print("Loaded model keys:", list(loaded_model_fixed.keys()))
print("\ntheta in saved model:", 'theta' in loaded_model_fixed)
print("s2 in saved model:", 's2' in loaded_model_fixed)
print("\ntheta is None:", loaded_model_fixed.get('theta') is None)
print("s2 is None:", loaded_model_fixed.get('s2') is None)
print("\ntheta shape:", loaded_model_fixed.get('theta').shape if loaded_model_fixed.get('theta') is not None else None)
print("s2 shape:", loaded_model_fixed.get('s2').shape if loaded_model_fixed.get('s2') is not None else None)

Loaded model keys: ['vectorizer', 'censors', 'regularization', 'dispersion', 'theta', 's2', 'training_set_labels', 'metadata']

theta in saved model: True
s2 in saved model: True

theta is None: False
s2 is None: False

theta shape: (2222, 66)
s2 shape: (2222,)
