In [1]:
from sklearn.cross_decomposition import PLSRegression
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import ShuffleSplit,train_test_split,GridSearchCV,KFold
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import cross_val_score
from sklearn.utils import shuffle
from numpy.linalg import matrix_rank as rank
import numpy as np
class UVE:
    def __init__(self, X, y, ncomp=25, nrep=100, testSize=0.2):

        '''
        X : 预测变量矩阵
        y ：标签
        ncomp : 结果包含的变量个数
        testSize: PLS中划分的数据集
        return ：波长筛选后的光谱数据
        '''

        self.X = X
        self.y = y
        # The number of latent components should not be larger than any dimension size of independent matrix
        self.ncomp = min([ncomp, rank(X)])
        self.nrep = nrep
        self.testSize = testSize
        self.criteria = None

        self.featureIndex = None
        self.featureR2 = np.full(self.X.shape[1], np.nan)
        self.selFeature = None

    def calcCriteria(self):
        PLSCoef = np.zeros((self.nrep, self.X.shape[1]))
        ss = ShuffleSplit(n_splits=self.nrep, test_size=self.testSize,random_state=15)
        step = 0
        for train, test in ss.split(self.X,self.y):
            Xtrain = self.X[train, :]
            ytrain = self.y[train]
            plsModel = PLSRegression(min([self.ncomp, rank(Xtrain)]))
            plsModel.fit(Xtrain, ytrain)
            PLSCoef[step, :] = plsModel.coef_[0].T
            step += 1
        meanCoef = np.mean(PLSCoef, axis=0)
        stdCoef = np.std(PLSCoef, axis=0)
        self.criteria = meanCoef / stdCoef

    def evalCriteria(self,cv=5):
        self.featureIndex = np.argsort(-np.abs(self.criteria))
        for i in range(self.X.shape[1]):
            Xi = self.X[:, self.featureIndex[:i + 1]]
            if i<self.ncomp:
                regModel = LinearRegression()
            else:
                regModel = PLSRegression(min([self.ncomp, rank(Xi)]))

            cvScore = cross_val_score(regModel, Xi, self.y, cv=cv)
            self.featureR2[i] = np.mean(cvScore)

    def cutFeature(self, *args):
        cuti = np.argmax(self.featureR2)
        self.selFeature = self.featureIndex[:cuti+1]
        if len(args) != 0:
            returnX = list(args)
            i = 0
            for argi in args:
                if argi.shape[1] == self.X.shape[1]:
                    returnX[i] = argi[:, self.selFeature]
                i += 1
        return self.selFeature


#导入数据data
Uve = UVE(X, y)#X为data中的光谱数据，y为data数据中的物质含量
Uve.calcCriteria()
Uve.evalCriteria(cv=5)
Featuresecletidx = Uve.cutFeature(X)
Featuresecletidx

NameError: name 'X' is not defined