In [1]:
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error,r2_score
from sklearn.cross_decomposition import PLSRegression
def plscv1(x,y):
    M,N = x.shape
    if N < 25:
        pc = N
    else:
        pc = 25
    kf = KFold(n_splits=5)
    RMSECV = []
    for i in range(pc):
        RMSE = []
        for train_index, test_index in kf.split(x):
            x_train, x_test = x[train_index], x[test_index]
            y_train, y_test = y[train_index], y[test_index]
            pls = PLSRegression(n_components=i + 1)
            pls.fit(x_train, y_train)
            y_predict = pls.predict(x_test)
            RMSE.append(np.sqrt(mean_squared_error(y_test, y_predict)))
        RMSE_mean = np.mean(RMSE)
        RMSECV.append(RMSE_mean)
    rindex = np.argmin(RMSECV)+1
    return rindex

def plscv(x,y):
    M,N = x.shape
    if N < 25:
        pc = N
    else:
        pc = 25
    kf = KFold(n_splits=5)
    RMSECV = []
    for i in range(pc):
        RMSE = []
        for train_index, test_index in kf.split(x):
            x_train, x_test = x[train_index], x[test_index]
            y_train, y_test = y[train_index], y[test_index]
            pls = PLSRegression(n_components=i + 1)
            pls.fit(x_train, y_train)
            y_predict = pls.predict(x_test)
            RMSE.append(np.sqrt(mean_squared_error(y_test, y_predict)))
        RMSE_mean = np.mean(RMSE)
        RMSECV.append(RMSE_mean)
    rindex = np.argmin(RMSECV)+1
    RMSE = []
    for train_index, test_index in kf.split(x):
        x_train, x_test = x[train_index], x[test_index]
        y_train, y_test = y[train_index], y[test_index]
        pls = PLSRegression(n_components=rindex)
        pls.fit(x_train, y_train)
        y_predict = pls.predict(x_test)
        RMSE.append(np.sqrt(mean_squared_error(y_test, y_predict)))
    RMSE_mean = np.mean(RMSE)
    return RMSE_mean

def generateNewModel(V0, nV1, B, varIndex, X, Y):
    nV0 = len(V0)
    d = nV1 - nV0
    if d > 0:
        varIndex = np.setdiff1d(varIndex, V0)
        kvar = len(varIndex)
        perm = np.random.permutation(kvar)
        perm = perm[0:min(3 * int(d), kvar)]
        Vstartemp = np.concatenate((V0, varIndex[perm]))
        B = MCS_ratio(X[:, Vstartemp], Y)
        index = np.argsort(-B)#argsort返回的是数组值从小到大的索引值
        Vstar = Vstartemp[index[:int(nV1)]]
    elif d < 0:
        index = np.argsort(-B)
        Vstar = V0[index[:int(nV1)]]
    else:
        Vstar = V0
    return Vstar

def MCS_ratio(X, Y):
    ratio_MCS = 0.8
    num_MCS = 100
    M, N = X.shape
    Q1 = int(M * ratio_MCS)
    vsel = np.arange(N)
    vector = np.zeros((num_MCS, N))
    for iters in range(num_MCS):
        perm1 = np.random.permutation(M)
        Xcal_train = X[perm1[:Q1], :]
        Ycal_train = Y[perm1[:Q1]]
        
        n_com = plscv1(Xcal_train,Ycal_train)
        lasso = PLSRegression(n_components=n_com)
        lasso.fit(Xcal_train, Ycal_train)
        
        mat = lasso.coef_
        vector[iters, vsel] = mat.flatten()#填充其于未选中的变量
        
    w = np.abs(vector)
    sum_w = np.sum(w, axis=0)
    c = (sum_w - np.min(sum_w)) / (np.max(sum_w) - np.min(sum_w))
    
    v = np.abs(vector)
    v_mean = np.mean(v, axis=0)
    v_std = np.std(v, axis=0)
    nonzero_idx = (v_mean != 0)
    cv_ = np.zeros(N)
    cv_[nonzero_idx] = np.abs(v_mean[nonzero_idx] / v_std[nonzero_idx])
    s = (cv_ - np.min(cv_)) / (np.max(cv_) - np.min(cv_))
    v = c * s
    return v

def randomfrog(X, Y, N):
    # Basic parameters of data input
    Q = 875
    n, p = X.shape
    Q0 = Q
    varIndex = np.arange(p)
    perm = np.random.permutation(p)
    V0 = perm[:Q]
    
    # Main loop for Random Frog
    probability = np.zeros(p)
    RMSEP = []
    nVar = []
    Erro = []
    for i in range(N):
        nVstar = min(p, max([np.round(np.random.randn() * 0.3 * Q + Q), 5]))
        Erro.append(V0)
        B = MCS_ratio(X[:, V0], Y)
        
        
        Vstar = generateNewModel(V0, nVstar, B, varIndex, X, Y)
        CV0 = plscv(X[:, V0], Y)
        ARMSEP0 = CV0
        CVstar = plscv(X[:, Vstar], Y)
        ARMSEPstar = CVstar
    
        if ARMSEPstar < ARMSEP0:
            probAccept = 1
        else:
            probAccept = 0.1 * (ARMSEP0 + 0.001) / (ARMSEPstar + 0.001)
    
        randJudge = np.random.rand()
        if probAccept > randJudge:
            V0 = Vstar
            RMSEP.append(ARMSEPstar)
            nVar.append(nVstar)
        else:
            V0 = V0
            RMSEP.append(ARMSEP0)
            nVar.append(Q)
        probability[V0] += 1
        Q = len(V0)
        if (i+1) % 100 == 0:
            print(f"The {i+1}th sampling for random frog finished.")
    probability /= N
    Vrank = np.argsort(-probability)
    Vtop10 = Vrank[:10]
    
    F = {
        'N': N,
        'Q': Q0,
        'Vrank': Vrank,
        'probability': probability,
        'nVar': nVar,
        'RMSEP': RMSEP
        }
    return F,Erro

#导入数据data
F = randomfrog(X, Y, n) #设置迭代次数，X为data中的光谱数据，Y为data数据中的物质含量
k = 0
RMSECV = np.zeros((300))#运行变量数量的次数
vsel_temp = dict()
for j in range(10, 310):
    Utemp = F[0]["Vrank"][:j]
    vsel_temp[k] = Utemp
    Xtrain_temp = X[:, Utemp]
    CV = plscv(Xtrain_temp, Y)      
    RMSECV[k] = CV
    k += 1
index = np.argmin(RMSECV)
vsel = vsel_temp[index]
vsel

NameError: name 'X' is not defined