# Training and Saving a Model

In [2]:
import numpy as np
from astropy.io import fits
from AnniesLasso.thecannon.model import CannonModel
from AnniesLasso.thecannon.vectorizer.polynomial import PolynomialVectorizer

In [24]:
filepath = '/data/mustard/vmehta/sfh_1000_10_20250925_111440/sfh_1000_10_20250925_111440'
labels = ["1", "2", "3", "4", "5", "6", "7", "8", "9", "10"]

data = fits.getdata(f"{filepath}_weights.fits")
data = np.log10(data)

flux = np.load(f"{filepath}_snr_spectra.npy")
ivar = np.load(f"{filepath}_snr_invvar.npy")
wav = np.load(f"{filepath}_wavelength.npy")

vectorizer = PolynomialVectorizer(labels, 2)

In [29]:
model = CannonModel(data, flux, ivar, vectorizer=vectorizer, dispersion=wav)
model.train()

2025-10-21 10:52:08,593 [INFO] Training 10-label CannonModel with 1000 stars and 2222 pixels/star
stty: 'standard input'2025-10-21 10:52:08,624 [DEBUG] Couldn't get screen size. Progressbar may look odd.


[==                                                                                                  ]   2% (50/2222)                          

: Inappropriate ioctl for device




(array([[ 9.97571600e-01,  3.30624059e-02,  3.69350379e-03, ...,
          2.29602977e-03,  6.46944665e-04,  1.39950041e-03],
        [ 1.00070612e+00,  2.63573822e-02,  1.64462937e-03, ...,
          2.06978742e-03,  5.81971395e-04,  9.30864693e-04],
        [ 9.99398997e-01,  2.78081510e-02,  2.18464158e-03, ...,
          2.13250326e-03,  6.10230660e-04,  1.02970555e-03],
        ...,
        [ 9.95940771e-01, -5.07346910e-04, -7.81276878e-04, ...,
          2.72925397e-04,  1.38158299e-04, -6.99514944e-05],
        [ 9.96649448e-01, -1.10896837e-03, -1.11763722e-03, ...,
          2.53345439e-04,  1.07741962e-04, -1.11888090e-04],
        [ 9.95888465e-01, -3.20186207e-04, -7.97687032e-04, ...,
          1.85905965e-04,  9.01702331e-05, -5.54061067e-05]]),
 array([0., 0., 0., ..., 0., 0., 0.]),
 [{'grad': array([-6.15439699e-10,  5.19280086e-09, -5.65964497e-09, -8.18401791e-10,
          -1.93385402e-09, -1.28906688e-09, -1.60234204e-09, -1.23400856e-09,
           1.98441263e-11,

In [31]:
path = 'model_test.pkl'
model.write(path)

Serializing attribute: vectorizer
  Value: ('PolynomialVectorizer', {'label_names': ('1', '2', '3', '4', '5', '6', '7', '8', '9', '10'), 'terms': [[(0, 1)], [(1, 1)], [(2, 1)], [(3, 1)], [(4, 1)], [(5, 1)], [(6, 1)], [(7, 1)], [(8, 1)], [(9, 1
Serializing attribute: censors
  Value: {'label_names': ('1', '2', '3', '4', '5', '6', '7', '8', '9', '10'), 'num_pixels': 2222, 'items': {}}
Serializing attribute: regularization
  Value: None
Serializing attribute: dispersion
  Value type: np.ndarray, shape: (2222,), dtype: float64, first 3: [3699.9526 3700.853  3701.7531]
Serializing attribute: theta
  Value type: np.ndarray, shape: (2222, 66), dtype: float64, first 3: [0.9975716  0.03306241 0.0036935 ]
Serializing attribute: s2
  Value type: np.ndarray, shape: (2222,), dtype: float64, first 3: [0. 0. 0.]
Serializing attribute: training_set_labels
  Value type: np.ndarray, shape: (1000, 10), dtype: float64, first 3: [-1.03153555 -1.08590728 -0.60691449]
Serializing attribute: training_set_flux

In [32]:
import pickle

with open('model_test.pkl', 'rb') as f:
    loaded_model = pickle.load(f)

loaded_model

{'vectorizer': ('PolynomialVectorizer',
  {'label_names': ('1', '2', '3', '4', '5', '6', '7', '8', '9', '10'),
   'terms': [[(0, 1)],
    [(1, 1)],
    [(2, 1)],
    [(3, 1)],
    [(4, 1)],
    [(5, 1)],
    [(6, 1)],
    [(7, 1)],
    [(8, 1)],
    [(9, 1)],
    [(0, 2.0)],
    [(0, 1), (1, 1)],
    [(0, 1), (2, 1)],
    [(0, 1), (3, 1)],
    [(0, 1), (4, 1)],
    [(0, 1), (5, 1)],
    [(0, 1), (6, 1)],
    [(0, 1), (7, 1)],
    [(0, 1), (8, 1)],
    [(0, 1), (9, 1)],
    [(1, 2.0)],
    [(1, 1), (2, 1)],
    [(1, 1), (3, 1)],
    [(1, 1), (4, 1)],
    [(1, 1), (5, 1)],
    [(1, 1), (6, 1)],
    [(1, 1), (7, 1)],
    [(1, 1), (8, 1)],
    [(9, 1), (1, 1)],
    [(2, 2.0)],
    [(2, 1), (3, 1)],
    [(2, 1), (4, 1)],
    [(2, 1), (5, 1)],
    [(2, 1), (6, 1)],
    [(2, 1), (7, 1)],
    [(2, 1), (8, 1)],
    [(9, 1), (2, 1)],
    [(3, 2.0)],
    [(3, 1), (4, 1)],
    [(3, 1), (5, 1)],
    [(3, 1), (6, 1)],
    [(3, 1), (7, 1)],
    [(3, 1), (8, 1)],
    [(9, 1), (3, 1)],
    [(4, 2.0)],


In [34]:
read_model = CannonModel.read('model_test.pkl')
read_model.test(flux, ivar)

stty: 'standard input': Inappropriate ioctl for device
2025-10-21 10:56:40,841 [DEBUG] Couldn't get screen size. Progressbar may look odd.
2025-10-21 10:56:40,842 [INFO] Running test step on 1000 spectra




(array([[-1.11355235, -1.17065918, -0.44733871, ..., -1.62503606,
         -1.13313955, -0.78836786],
        [-0.68191999, -1.72824856, -0.97437605, ..., -3.62413474,
         -0.94371927, -0.59855896],
        [-1.03111431, -1.84615278, -1.64567721, ..., -0.19624579,
         -0.94138115, -1.52800322],
        ...,
        [-0.81445045, -1.11767685, -0.67738049, ..., -0.4851465 ,
         -1.12724933, -1.38122692],
        [-1.02391041, -1.10183752, -1.17270659, ...,  0.30810715,
         -2.88779912, -1.02761302],
        [-1.13379019, -1.19808246, -1.39671987, ..., -0.79743319,
         -1.90375404, -0.55582029]]),
 array([[[ 7.02529219e+01,  7.80373565e+01,  1.42427274e+02, ...,
           2.51683955e+02, -1.79334069e+02, -6.09549213e+02],
         [ 7.80373565e+01,  1.96081005e+02,  1.97734296e+02, ...,
           6.31146921e+02, -3.20638203e+02, -6.68172998e+02],
         [ 1.42427274e+02,  1.97734296e+02,  3.38889265e+02, ...,
           8.00687453e+02, -5.21896857e+02, -1.1378