In [24]:
import SNIDsn
import SNIDdataset as snid
import numpy as np
import SNePCA

import plotly.plotly as ply
import plotly.graph_objs as go
import plotly.tools as tls

import matplotlib.pyplot as plt
from BinSpectra import lowres_dataset
from BinSpectra import GetTypes
import warnings
warnings.filterwarnings('ignore')

from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split

# 1 SVM

## 1.1 Load preprocessed SNID datasets 

datasetX contains the SNID spectra for the phase range X +/- 5 days, where each SNe has only 1 spectrum in this phase range.  The spectrum with phase closest to X is chosen. All of the preprocessing has been applied (wavelength cut, smoothing, phase type, etc)

In [25]:
dataset0 = snid.loadPickle('../Data/DataProducts/dataset0.pickle')
dataset5 = snid.loadPickle('../Data/DataProducts/dataset5.pickle')
dataset10 = snid.loadPickle('../Data/DataProducts/dataset10.pickle')
dataset15 = snid.loadPickle('../Data/DataProducts/dataset15.pickle')

## 1.2 Run PCA

In [26]:
#k = 0.0027069686088658784       # Original resolution
k = 0.11098571296350102         # Lowest resolution

dataset_lowres0 = lowres_dataset(dataset0, k)
dataset_lowres5 = lowres_dataset(dataset5, k)
dataset_lowres10 = lowres_dataset(dataset10, k)
dataset_lowres15 = lowres_dataset(dataset15, k)


snidPCA0 = SNePCA.SNePCA(dataset_lowres0, -5, 5)
snidPCA5 = SNePCA.SNePCA(dataset_lowres5, 0, 10)
snidPCA10 = SNePCA.SNePCA(dataset_lowres10, 5, 15)
snidPCA15 = SNePCA.SNePCA(dataset_lowres15, 10, 20)

snidPCA0.snidPCA()
snidPCA5.snidPCA()
snidPCA10.snidPCA()
snidPCA15.snidPCA()

snidPCA0.calcPCACoeffs()
snidPCA5.calcPCACoeffs()
snidPCA10.calcPCACoeffs()
snidPCA15.calcPCACoeffs()

In [4]:
print(snidPCA0.pcaCoeffMatrix.shape)
print(snidPCA5.pcaCoeffMatrix.shape)
print(snidPCA10.pcaCoeffMatrix.shape)
print(snidPCA15.pcaCoeffMatrix.shape)

(55, 5)
(52, 5)
(56, 5)
(54, 5)


## 1.3 Linear SVM 

In [4]:
IIbMask, IbMask, IcMask, IcBLMask = snidPCA0.getSNeTypeMasks()
labels = 1*IIbMask + 2*IbMask + 3*IcMask + 4*IcBLMask

In [23]:
labels

array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4])

In [27]:
GetTypes(dataset0)

{'sn1998dt': 'Ib-norm',
 'sn1999dn': 'Ib-norm',
 'sn1999ex': 'Ib-norm',
 'sn2004gq': 'Ib-norm',
 'sn2005bf': 'Ib-norm',
 'sn2005hg': 'Ib-norm',
 'sn2007ag': 'Ib-norm',
 'sn2007C': 'Ib-norm',
 'sn2007kj': 'Ib-norm',
 'sn2007uy': 'Ib-pec',
 'sn2007Y': 'Ib-norm',
 'sn2008D': 'Ib-norm',
 'sn2009er': 'Ib-pec',
 'sn2009jf': 'Ib-norm',
 'sn2012P': 'IIb',
 'sn2013df': 'IIb',
 'sn2016gkg': 'IIb',
 '11hs': 'IIb',
 'sn1993J': 'IIb',
 'sn1996cb': 'IIb',
 'sn1998fa': 'IIb',
 'sn2000H': 'IIb',
 'sn2004ff': 'IIb',
 'sn2006el': 'IIb',
 'sn2008bo': 'IIb',
 'sn2009mg': 'IIb',
 'sn2011dh': 'IIb',
 'sn2011ei': 'IIb',
 'sn2011fu': 'IIb',
 'LSQ14efd': 'Ic',
 '13ge': 'Ic',
 '15dtg': 'Ic',
 'sn1994I': 'Ic-norm',
 'sn2004aw': 'Ic-norm',
 'sn2004fe': 'Ic-norm',
 'sn2005az': 'Ic-norm',
 'sn2005ek': 'Ic-pec',
 'sn2005kl': 'Ic-norm',
 'sn2005mf': 'Ic-norm',
 'sn2007gr': 'Ic-norm',
 'sn2011bm': 'Ic-norm',
 'sn2013dk': 'Ic-norm',
 '16coi': 'Ic-broad',
 'sn1997ef': 'Ic-broad',
 'sn1998bw': 'Ic-broad',
 'sn2002ap': 'I

In [143]:
x = snidPCA0.pcaCoeffMatrix[:,3]
y = snidPCA0.pcaCoeffMatrix[:,4]

In [144]:
data = np.column_stack((x,y))

In [145]:
linsvm = LinearSVC()
test_score = []

In [154]:
for i in range(50):
    trainX, testX, trainY, testY = train_test_split(data, labels, test_size=0.30)
    linsvm.fit(trainX, trainY)
    trainscore = linsvm.score(trainX, trainY)
    testscore = linsvm.score(testX, testY)    
    test_score.append(testscore)

In [155]:
np.array(test_score).mean(), np.array(test_score).std()

(0.3392941176470588, 0.09648091766794324)