In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import svm
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import Lasso,LassoCV
from sklearn.metrics import mean_absolute_error
import matplotlib.pylab as plt
%matplotlib inline 

# load data

In [2]:
# load normalized spectra
flux_end_train = np.load('./Preprocessed_spectra_data.npy')
# load label(APOGEE_payne catalog:Teff,Logg,FeH,LASP:combined_teff,combined_logg,combined_feh,combined_obsid,combined_snrg)
train_label = np.load('./Label_APOGEE_payne_LASP.npy')

In [3]:
# divide training set and test set
X_train, X_val, y_train, y_val = train_test_split(flux_end_train, train_label, test_size=0.2, random_state=42)

In [4]:
# log teff
y_train[:,0] = np.log10(y_train[:,0])

In [5]:
# data normalization
sc=StandardScaler()
sc.fit(X_train)
X_train_std=sc.transform(X_train)
X_val_std=sc.transform(X_val)

# the model of teff

## feature extraction by LASSO

In [6]:
alphas = np.logspace(-4, 1, 50)
clf = LassoCV(alphas=alphas)
clf.fit(X_train_std,y_train[:,0])
clf.alpha_

  tol, rng, random, positive)
  positive)


0.00020235896477251576

In [7]:
mask = clf.coef_ != 0
X_1 = X_train_std[:,mask]
print(X_1.shape)
X_2 = X_val_std[:,mask] # AP_LA_IN
print(X_2.shape)

(7898, 643)
(1975, 643)


## model training and test

In [8]:
clf0 = MLPRegressor(hidden_layer_sizes=(400,50), learning_rate_init=0.0009,activation='logistic', max_iter=2000, early_stopping=True, random_state=1)
clf0.fit(X_1, y_train[:,0])
y_pred0 = clf0.predict(X_2)
print('Teff MAE:', mean_absolute_error(10 ** y_pred0, y_val[:, 0]))
diff_Teff = 10 ** y_pred0 - y_val[:,0] 
mu = np.mean(diff_Teff)
sigma = np.std(diff_Teff)
print('Teff mu:',mu)
print('Teff sigam:',sigma)

Teff MAE: 90.28841248240154
Teff mu: -2.8915472729017586
Teff sigam: 152.64986653320466


# the model of log g

## feature extraction by LASSO

In [9]:
alphas = np.logspace(-4, 1, 50)
clf = LassoCV(alphas=alphas, cv=10)
clf.fit(X_train_std,y_train[:,1])
clf.alpha_

  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)


  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)


  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)


  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)


  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)


  positive)


0.00339322177189533

In [10]:
mask = clf.coef_ != 0
X_1 = X_train_std[:,mask]
print(X_1.shape)
X_2 = X_val_std[:,mask]
print(X_2.shape)

(7898, 775)
(1975, 775)


## model training and test

In [11]:
clf0 = MLPRegressor(hidden_layer_sizes=(600,50), learning_rate_init=0.00002,activation='logistic', max_iter=2000, early_stopping=True, random_state=1)
clf0.fit(X_1, y_train[:,1])
y_pred0 = clf0.predict(X_2)
print('logg MAE:', mean_absolute_error(y_pred0, y_val[:, 1]))
diff_logg = y_pred0 - y_val[:,1]
mu = np.mean(diff_logg)
sigma = np.std(diff_logg)
print('logg mu:',mu)
print('logg sigma:',sigma)

logg MAE: 0.1523727101531931
logg mu: 0.01713725398574798
logg sigma: 0.2580123436661139


# the model of [Fe/H]

## feature extraction by LASSO

In [12]:
alphas = np.logspace(-4, 1, 50)
clf = LassoCV(alphas=alphas, cv=10)
clf.fit(X_train_std,y_train[:,2])
clf.alpha_

  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)


  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)


  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)


  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  tol, rng, random, positive)
  positive)


0.0013257113655901094

In [13]:
mask = clf.coef_ != 0
X_1 = X_train_std[:,mask]
print(X_1.shape)
X_2 = X_val_std[:,mask]
print(X_2.shape)

(7898, 930)
(1975, 930)


In [14]:
from sklearn.neural_network import MLPRegressor
clf0 = MLPRegressor(hidden_layer_sizes=(600,100), learning_rate_init=0.0003
                    ,activation='logistic', max_iter=2000, early_stopping=True, random_state=1)
clf0.fit(X_1, y_train[:,2])
y_pred0 = clf0.predict(X_2)
print('[Fe/H] MAE:', mean_absolute_error(y_pred0, y_val[:, 2]))
diff_FeH = y_pred0 - y_val[:,2]
mu = np.mean(diff_FeH)
sigma = np.std(diff_FeH)
print('[Fe/H] mu:',mu)
print('[Fe/H] sigma:',sigma)

[Fe/H] MAE: 0.0639444518927337
[Fe/H] mu: 0.004396977944478042
[Fe/H] sigma: 0.0961628970223735
