In [None]:
import os
import sys
project_path = '..'
sys.path.append(project_path)
from src.data_preprocessing import data_processing

In [None]:
import numpy as np
import pandas as pd
import torch

In [None]:
import matplotlib.pyplot as plt
from matplotlib_inline import backend_inline
backend_inline.set_matplotlib_formats('png')
plt.rcParams['figure.dpi'] = 300 
plt.rcParams['savefig.dpi'] = 300 
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.svm import SVR

In [None]:
df = pd.read_csv('../data/data_hn/hn_02-01/hn_02-01_2024-05-29.csv')
df.shape

In [None]:
df = data_processing.filter_data(df, 'y', filter_step=100)
df.shape

In [None]:
df = data_processing.filter_data(df, 'y', filter_step=1000)
df.shape

In [None]:
df = data_processing.filter_data(df, 'y', filter_step=200)
df.shape

In [None]:
df['video_record_time'] = pd.to_datetime(df['video_record_time'])
df['y'] = df['y'].astype(float)
df.set_index('video_record_time', inplace=True)

In [None]:
time = df.index
data = df['y'].values
time.shape, data.shape

In [None]:
sample_num = data.shape[0]
predict_step = 15
predict_num = 1

In [None]:
data_set = []
for i in range(sample_num - predict_step - predict_num + 1):
    data_set.append(data[i:i + predict_step + predict_num])
data_set = np.array(data_set)
time = time[:data_set.shape[0]]

In [None]:
# split data
train_set, test_set = data_set[:int(data_set.shape[0] * 0.7)], data_set[int(data_set.shape[0] * 0.7):]
x_train, y_train = train_set[:, :-1], train_set[:, -1].reshape(-1, 1)
x_test, y_test = test_set[:, :-1], test_set[:, -1].reshape(-1, 1)
x_train.shape, y_train.shape, x_test.shape, y_test.shape

In [None]:
# Nomalization
scaler = MinMaxScaler(feature_range=(0, 1))
x_train = scaler.fit_transform(x_train)
y_train = scaler.fit_transform(y_train)
x_test = scaler.fit_transform(x_test)
y_test = scaler.fit_transform(y_test)
x_train.shape, y_train.shape, x_test.shape, y_test.shape

In [None]:
# transfrom to tensor
x_train = torch.tensor(x_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32)
x_test = torch.tensor(x_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32)

In [None]:
svr = SVR(kernel='rbf', C=4.0, gamma=0.8, epsilon=0.01)
svr.fit(x_train, y_train)

In [None]:
# predict
res_train = svr.predict(x_train)
res_test = svr.predict(x_test)

In [None]:
res_train = scaler.inverse_transform(res_train.reshape(-1, 1)).flatten()
res_test = scaler.inverse_transform(res_test.reshape(-1, 1)).flatten()
y_train_original = scaler.inverse_transform(y_train).flatten()
y_test_original = scaler.inverse_transform(y_test).flatten()
res_train.shape, res_test.shape, y_train_original.shape, y_test_original.shape

In [None]:
train_error = mean_squared_error(y_train_original, res_train)
test_error = mean_squared_error(y_test_original, res_test)

In [None]:
y_train_original.shape, res_train.shape, y_test_original.shape, res_test.shape, time.shape

In [None]:
import matplotlib.dates as mdates

plt.figure(figsize=(10,5))
plt.plot(time[:len(y_train_original)], y_train_original, 'r-', label= 'Real Train')
plt.plot(time[:len(y_train_original)], res_train, 'b-', label = 'Predicted Train')
plt.legend()
plt.xlabel('Predict Train Data')
plt.ylabel('Predicted Value')
plt.title(f'Train set prediction (MSE: {train_error})')
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
plt.grid()
plt.show()

In [None]:
plt.figure(figsize=(10,5))
plt.plot(time[len(y_train_original):], y_test_original, 'r-', label= 'Real Test')
plt.plot(time[len(y_train_original):], res_test, 'b-', label = 'Predicted Test')
plt.legend()
plt.xlabel('Predict Test Data')
plt.ylabel('Predicted Value')
plt.title(f'Test set prediction (MSE: {test_error})')
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
plt.grid(True)
plt.show()

In [None]:
train_r2 = r2_score(y_train_original, res_train)
test_r2 = r2_score(y_test_original, res_test)
train_mae = mean_absolute_error(y_train_original, res_train)
test_mae = mean_absolute_error(y_test_original, res_test)
train_mbe = np.mean(y_train_original - res_train)
test_mbe = np.mean(y_test_original - res_test)
print(f'Train R2: {train_r2}, Test R2: {test_r2}')
print(f'Train MAE: {train_mae}, Test MAE: {test_mae}')
print(f'Train MBE: {train_mbe}, Test MBE: {test_mbe}')

In [None]:
plt.figure(figsize=(10,5))
plt.scatter(y_train_original, res_train, c='b', s=25)
plt.plot([y_train_original.min(), y_train_original.max()], [y_train_original.min(), y_train_original.max()], 'k--', lw=2)
plt.xlabel('Real Train')
plt.ylabel('Predicted Train')
plt.title(f'Train set prediction vs Real')
plt.grid()
plt.show()

In [None]:
plt.figure(figsize=(10,5))
plt.scatter(y_test_original, res_test, c='b', s=25)
plt.plot([y_test_original.min(), y_test_original.max()], [y_test_original.min(), y_test_original.max()], 'k--', lw=2)
plt.xlabel('Real Test')
plt.ylabel('Predicted Test')
plt.title(f'Test set prediction vs Real')
plt.grid()
plt.show()