In [1]:
import hopsworks
import pandas as pd
import tensorflow as tf
from keras.layers import Dense
from keras.layers import LSTM
from keras.models import Sequential
import os
import numpy as np
import joblib
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler



In [2]:
# # You have to set the environment variable 'HOPSWORKS_API_KEY' for login to succeed
project = hopsworks.login()
fs = project.get_feature_store()

Connected. Call `.close()` to terminate connection gracefully.

Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/194723
Connected. Call `.close()` to terminate connection gracefully.


In [3]:
# Get features (data) from feature store
# gold data
gold_fg = fs.get_feature_group(name="gold", version=1)
print(gold_fg)
query = gold_fg.select_all()
feature_view = fs.get_or_create_feature_view(name="gold",
                                  version=1,
                                  description="Read from gold dataset",
                                  labels=["close"],
                                  query=query)
print(feature_view)

df_gold = gold_fg.read()
df_gold = df_gold.sort_values(by="date")
df_gold.reset_index(drop=True,inplace=True)

print(df_gold)
y_gold = df_gold['close'].values.reshape(-1,1)

<hsfs.feature_group.FeatureGroup object at 0x10491c220>
<hsfs.feature_view.FeatureView object at 0x2807b97b0>
Finished: Reading data from Hopsworks, using ArrowFlight (1.09s) 
                          date        close
0    2000-08-30 00:00:00+00:00   273.899994
1    2000-08-31 00:00:00+00:00   278.299988
2    2000-09-01 00:00:00+00:00   277.000000
3    2000-09-05 00:00:00+00:00   275.799988
4    2000-09-06 00:00:00+00:00   274.200012
...                        ...          ...
5854 2023-12-29 00:00:00+00:00  2062.399902
5855 2024-01-02 00:00:00+00:00  2064.399902
5856 2024-01-03 00:00:00+00:00  2034.199951
5857 2024-01-04 00:00:00+00:00  2042.300049
5858 2024-01-05 00:00:00+00:00  2042.400024

[5859 rows x 2 columns]


In [4]:
# forecast setting
n_forecast = 90  # length of output sequences (forecast period)
n_lookback = 3*n_forecast  # length of input sequences (lookback period, should be 3 times longer than forecast period)

In [None]:
# Model Training for Gold
scaler_gold = MinMaxScaler(feature_range=(0, 1))
scaler_gold = scaler_gold.fit(y_gold)
y_gold = scaler_gold.transform(y_gold)

X_gold = []
Y_gold = []

for i in range(n_lookback, len(y_gold) - n_forecast + 1):
    X_gold.append(y_gold[i - n_lookback: i])
    Y_gold.append(y_gold[i: i + n_forecast])

X_gold = np.array(X_gold)
Y_gold = np.array(Y_gold)
print(X_gold.shape, Y_gold.shape)

# fit / train the model
model_gold = Sequential()
model_gold.add(LSTM(units=128, return_sequences=True, input_shape=(n_lookback, 1)))
model_gold.add(LSTM(units=64, return_sequences=True))
model_gold.add(LSTM(units=64, return_sequences=False))
model_gold.add(Dense(n_forecast))
model_gold.compile(loss='mean_squared_error', optimizer='adam')
model_gold.fit(X_gold, Y_gold, epochs=20, batch_size=32, verbose=1)

(5500, 270, 1) (5500, 90, 1)
Epoch 1/20


2024-01-06 15:24:51.164094: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
 17/172 [=>............................] - ETA: 48s - loss: 0.0019

In [None]:
# We will now upload our model to the Hopsworks Model Registry. First get an object for the model registry.
mr = project.get_model_registry()
model_dir="gold_model"
if os.path.isdir(model_dir) == False:
    os.mkdir(model_dir)

# Save both our model and the confusion matrix to 'model_dir', whose contents will be uploaded to the model registry
joblib.dump(model_gold, model_dir + "/gold_model.pkl")
# fig.savefig(model_dir + "/wine_confusion_matrix.png")

# # Create an entry in the model registry that includes the model's name, desc, metrics
gold_model = mr.python.create_model(
    name="gold_model",
    # metrics={"accuracy" : metrics['accuracy']},
    # model_schema=model_schema,
    description="Gold Predictor"
)

# Upload the model to the model registry, including all files in 'model_dir'
gold_model.save(model_dir)
