In [9]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier, GradientBoostingRegressor, GradientBoostingClassifier
import sklearn
from sklearn.metrics import mean_squared_error, r2_score, confusion_matrix, accuracy_score, roc_auc_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split, KFold, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
import random
import warnings
from sklearn.decomposition import PCA
import Processor
import importlib

In [3]:
index_names  = ['id', 'cycles']
setting_names  = ['setting_1', 'setting_2', 'setting_3']
sensor_names = [f"s_{x}" for x in range(1, 22)]
col_names = index_names + setting_names + sensor_names

def apply_names (path, column_names):
    df  = pd.read_csv(path, header=None, delimiter =r"\s+", names = column_names)
#     df = df.set_axis(column_names, axis=1)
    return df
df_train = apply_names('train_FD001.csv', col_names)
df_test= apply_names('test_FD001.csv', col_names)

In [None]:
df_train.head()
print(len(df_train['id'].unique()))
df_train.info()
fig =plt.figure()
ax = fig.add_subplot(projection='3d')

pca = PCA(n_components=3)
fit_data = pca.fit_transform(df_train)
ax.scatter(fit_data[:,0],fit_data[:,1], fit_data[:,2])

In [None]:
importlib.reload(Processor)
df_train = Processor.calculate_RUL(df_train)
df_test = Processor.calculate_RUL(df_test)

normalized_train = Processor.normalizer(df_train, sensor_names)
normalized_test = Processor.normalizer(df_test, sensor_names)

columns_to_drop = ['cycles', 'setting_1', 'setting_2', 'setting_3', 's_1', 's_5', 's_6', 's_10', 's_16', 's_18', 's_19']

normalized_train = Processor.drop_col(normalized_train, columns_to_drop)
normalized_test = Processor.drop_col(normalized_test, columns_to_drop)


def plot_sensor(sensor_name,X):
    plt.figure(figsize=(13,5))
    for i in X['id'].unique():
#         if i == 10:
#             continue
        if (i % 10 == 0):  # only plot every engine
            plt.plot('RUL', sensor_name, 
                     data=X[X['id']==i].rolling(5).mean(), alpha = 1, label = f'Engine {i}')
            plt.axvline(30, color='red', linestyle='dashed', linewidth=2)
    plt.rcParams['figure.facecolor'] = 'white'
    plt.xlim(250, 0)  # reverse the x-axis so RUL counts down to zero
    plt.xticks(np.arange(0, 275, 25))
    plt.ylabel(sensor_name)
    plt.xlabel('Remaining Use fulLife')
    plt.legend()
    plt.show()
    

for sensor in normalized_train.drop(['id', 'RUL'], axis=1).columns:
    plot_sensor(sensor, normalized_train)
    

In [41]:
normalized_train.head()

Unnamed: 0,id,RUL,s_2,s_3,s_4,s_7,s_8,s_9,s_11,s_12,s_13,s_14,s_15,s_17,s_20,s_21
0,1,191,0.183735,0.406802,0.309757,0.726248,0.242424,0.109755,0.369048,0.633262,0.205882,0.199608,0.363986,0.333333,0.713178,0.724662
1,1,190,0.283133,0.453019,0.352633,0.628019,0.212121,0.100242,0.380952,0.765458,0.279412,0.162813,0.411312,0.333333,0.666667,0.731014
2,1,189,0.343373,0.369523,0.370527,0.710145,0.272727,0.140043,0.25,0.795309,0.220588,0.171793,0.357445,0.166667,0.627907,0.621375
3,1,188,0.343373,0.256159,0.331195,0.740741,0.318182,0.124518,0.166667,0.889126,0.294118,0.174889,0.166603,0.333333,0.573643,0.662386
4,1,187,0.349398,0.257467,0.404625,0.668277,0.242424,0.14996,0.255952,0.746269,0.235294,0.174734,0.402078,0.416667,0.589147,0.704502


In [47]:
sensor_cols = [col for col in normalized_train.columns if 'sensor' in col or 'op_setting' in col]

# def make_features(df, window_size=30):
#     df_feat = df.copy()
#     grouped = df_feat.groupby('id')
#     features = []

#     for unit, group in grouped:
# #         group = group.sort_values('time', ascending=False).head(window_size)
#         print(group)
#         row = group[sensor_cols].agg(['mean', 'std', 'min', 'max']).T
#         row.columns = [f'{stat}' for stat in row.columns]
#         row = row.stack().to_frame().T
#         row['unit'] = unit
#         features.append(row)

#     return pd.concat(features).reset_index(drop=True)

X_train = normalized_train
y_train = normalized_train.groupby('id')['RUL'].min().reset_index(drop=True)

# X_train = normalized_train[sensor_names]
# y_train = normalized_train['RUL']
X = X_train.values
y = y_train.values

def fit_bayesian_linear_regression(X, y, alpha=1.0, beta=25.0):
    D = X.shape[1]
    I = np.eye(D)
    S_N = np.linalg.inv(alpha * I + beta * X.T @ X)
    m_N = beta * S_N @ X.T @ y
    return m_N, S_N

m_N, S_N = fit_bayesian_linear_regression(X, y)
def fit_bayesian_linear_regression(X, y, alpha=1.0, beta=25.0):
    D = X.shape[1]
    I = np.eye(D)
    S_N = np.linalg.inv(alpha * I + beta * X.T @ X)
    m_N = beta * S_N @ X.T @ y
    return m_N, S_N

m_N, S_N = fit_bayesian_linear_regression(X_train.values, y_train.values)
# X_test = make_features(normalized_test)
X_test = normalized_test
X_test.drop(columns='id', inplace=True)
y_pred_mean, y_pred_std = predict_bayesian(X_test.values, m_N, S_N)

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 100 is different from 20631)