In [None]:
import pandas as pd
from matplotlib import pyplot as plt
from tqdm import tqdm
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.svm import NuSVR
from sklearn.metrics import mean_absolute_error
from scipy.stats import kurtosis


data = pd.read_csv("train_small.csv")
plt.plot(data['time_to_failure'],data['acoustic_data'])
plt.show()

data.shape


In [None]:
train = pd.read_csv('train.csv',dtype={'acoustic_data': np.int16, 'time_to_failure': np.float64})

rows = 150_000
segments = int(np.floor(train.shape[0] / rows))


X_train = pd.DataFrame(index=range(segments), dtype=np.float64,
                       columns=['ave', 'std','kurt','quantile', 'max', 'min'])
y_train = pd.DataFrame(index=range(segments), dtype=np.float64,
                       columns=['time_to_failure'])


for segment in tqdm(range(segments)):
    seg = train.iloc[segment*rows:segment*rows+rows]
    x = seg['acoustic_data'].values
    y = seg['time_to_failure'].values[-1]
    
    y_train.loc[segment, 'time_to_failure'] = y
    
    X_train.loc[segment, 'ave'] = x.mean()
    X_train.loc[segment, 'kurt'] = kurtosis(x)
    X_train.loc[segment, 'quantile'] = np.quantile(x,0.01)
    X_train.loc[segment, 'std'] = x.std()
    X_train.loc[segment, 'max'] = x.max()
    X_train.loc[segment, 'min'] = x.min()
    
X_train.head()

In [None]:
X_train.shape

In [None]:
plt.plot(X_train['std'])
plt.show()

In [None]:
scaler = StandardScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)

In [None]:
svm = NuSVR()
svm.fit(X_train_scaled, y_train.values.flatten())
y_pred = svm.predict(X_train_scaled)

In [None]:
plt.figure(figsize=(6, 6))
plt.scatter(y_train.values.flatten(), y_pred)
plt.xlim(0, 20)
plt.ylim(0, 20)
plt.xlabel('actual', fontsize=12)
plt.ylabel('predicted', fontsize=12)
plt.plot([(0, 0), (20, 20)], [(0, 0), (20, 20)])
plt.show()

In [None]:
score = mean_absolute_error(y_train.values.flatten(), y_pred)
print(f'Score: {score:0.3f}')