In [1]:
import numpy as np
import pandas as pd
import joblib
import warnings
import os
from sklearn.preprocessing import StandardScaler

In [2]:
use_cols = joblib.load('./use_cols')
scaler = joblib.load('./scaler')
pca = joblib.load('./model/pca_1XX')

In [3]:
def load_train(train_no=1, use_cols=None):
    '''
    描述：
        提取trian_HA的特征，数据要从concat_cut中取。
    '''
    if use_cols==None:
        use_cols = []
        
    input_dir = '../../2.1preprocess_train_data/concats_more/'
    train = joblib.load(input_dir+'concat_0%d.lz4'%train_no) 
    y_train = train[['RULR']]
    x_train = train[use_cols]
    CLI = train['CLI']
    CL = train['CL']
    return x_train, y_train, CLI, CL

def standard_scale_new(data,scaler):
    cols = data.columns
    idx = data.index
    data_s = scaler.transform(data.values)
    return pd.DataFrame(data=data_s,columns=cols,index=idx)

def cal_T2(data,pca):
    T2=[]
    projected_data = pca.transform(data)
    eigenvalue_matrix = np.mat(np.diag(pca.singular_values_ ))
    for x in projected_data:
        tmp = np.mat(x)*np.linalg.inv(eigenvalue_matrix)*np.mat(x).T
        T2.append(tmp[0,0])
    return T2

def write_files(train_no, use_cols, scaler, pca):
    x_train, y_train, CLI, CL = load_train(train_no, use_cols)
    print(x_train.shape)
    data_stand = standard_scale_new(x_train,scaler)
    T2 = cal_T2(data_stand,pca)
    
    T2 = pd.DataFrame(data=T2,columns=['PCA_T2'], index=data_stand.index)
    T2['CLI'] = CLI
    T2['CL'] = CL
    T2['RULR'] = y_train
    
    if not os.path.exists('./train_HA'):
        os.mkdir('./train_HA')
    joblib.dump(T2, './train_HA/train_HA_0%d.lz4'%train_no, compress='lz4')

In [4]:
write_files(1, use_cols, scaler, pca)

(110027, 17)


In [5]:
write_files(2, use_cols, scaler, pca)

(107839, 17)


In [6]:
write_files(3, use_cols, scaler, pca)

(85293, 17)


In [7]:
# 健康指标 (PCA_T2)
train_HA_01 = joblib.load('./train_HA/train_HA_01.lz4')
train_HA_01.head()

Unnamed: 0,PCA_T2,CLI,CL,RULR
0,30.296621,5,4.0,0.983333
1,31.557455,5,4.000167,0.983333
2,31.806602,5,4.000333,0.983332
3,31.564906,5,4.0005,0.983331
4,31.426296,5,4.000667,0.983331
