In [102]:
# -------------------------
# Step 1: Import Libraries
# -------------------------
import yfinance as yf
import pandas as pd
import numpy as np
import ta
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from transformers import pipeline
import datetime
import time

In [103]:
# -------------------------
# Step 2: Fetch Historical Data
# -------------------------
ticker = 'RELIANCE.BO'  # NSE Reliance on Yahoo Finance
start_date = '2015-01-01'
end_date = datetime.date.today().strftime('%Y-%m-%d')

data = yf.download(ticker, start=start_date, end=end_date, interval='1d')
data = data[['Open','High','Low','Close','Volume']]


  data = yf.download(ticker, start=start_date, end=end_date, interval='1d')
[*********************100%***********************]  1 of 1 completed


In [104]:
# -------------------------
# Step 3: Feature Engineering
# -------------------------
close_series = data['Close'].squeeze()

# Technical indicators
data['SMA_10'] = close_series.rolling(10).mean()
data['SMA_20'] = close_series.rolling(20).mean()
data['EMA_10'] = close_series.ewm(span=10, adjust=False).mean()
data['EMA_20'] = close_series.ewm(span=20, adjust=False).mean()
data['RSI'] = ta.momentum.RSIIndicator(close=close_series, window=14).rsi()
macd = ta.trend.MACD(close=close_series)
data['MACD'] = macd.macd()
data['MACD_signal'] = macd.macd_signal()

# Volume features
data['Volume_SMA_10'] = data['Volume'].rolling(10).mean()
data['Volume_SMA_20'] = data['Volume'].rolling(20).mean()
data['Volume_Change'] = data['Volume'].pct_change()

# Trend features
data['Price_Change'] = data['Close'].pct_change()
data['Volatility_10'] = data['Price_Change'].rolling(10).std()
data['Volatility_20'] = data['Price_Change'].rolling(20).std()
data['Momentum_5'] = data['Close'] - data['Close'].shift(5)
data['Momentum_10'] = data['Close'] - data['Close'].shift(10)

# Lag features
lags = 3
for lag in range(1, lags+1):
    data[f'Close_lag{lag}'] = data['Close'].shift(lag)

# Target: next-day return
data['Target'] = data['Close'].pct_change().shift(-1)

# Drop NaNs
data = data.dropna().reset_index(drop=True)

In [105]:

# -------------------------
# Step 4: Scale Features
# -------------------------
feature_cols = [
    'Open','High','Low','Close','Volume',
    'SMA_10','SMA_20','EMA_10','EMA_20',
    'RSI','MACD','MACD_signal',
    'Volume_SMA_10','Volume_SMA_20','Volume_Change',
    'Price_Change','Volatility_10','Volatility_20',
    'Momentum_5','Momentum_10'
] + [f'Close_lag{i}' for i in range(1,lags+1)]

scaler = MinMaxScaler()
scaled_features = scaler.fit_transform(data[feature_cols])

target_scaler = MinMaxScaler()
y_scaled = target_scaler.fit_transform(data['Target'].values.reshape(-1,1))


In [106]:
# -------------------------
# Step 5: Create Sequences
# -------------------------
window = 20
X_seq, y_seq = [], []
for i in range(window, len(scaled_features)):
    X_seq.append(scaled_features[i-window:i])
    y_seq.append(y_scaled[i])

X_seq = np.array(X_seq)
y_seq = np.array(y_seq)

In [107]:
# -------------------------
# Step 6: Train/Test Split
# -------------------------
split_ratio = 0.8
split_index = int(len(X_seq) * split_ratio)
X_train, X_test = X_seq[:split_index], X_seq[split_index:]
y_train, y_test = y_seq[:split_index], y_seq[split_index:]


In [108]:
# -------------------------
# Step 7: Build LSTM Model
# -------------------------
model = Sequential([
    LSTM(100, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
    Dropout(0.3),
    LSTM(50),
    Dropout(0.2),
    Dense(1)
])
model.compile(optimizer='adam', loss='mean_squared_error')

  super().__init__(**kwargs)


In [109]:
# -------------------------
# Step 8: Train Model
# -------------------------
history = model.fit(
    X_train, y_train,
    epochs=50,
    batch_size=32,
    validation_split=0.1,
    verbose=1
)

Epoch 1/50
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 36ms/step - loss: 0.0233 - val_loss: 0.0051
Epoch 2/50
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 30ms/step - loss: 0.0079 - val_loss: 0.0039
Epoch 3/50
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 29ms/step - loss: 0.0066 - val_loss: 0.0027
Epoch 4/50
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 29ms/step - loss: 0.0071 - val_loss: 0.0025
Epoch 5/50
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 34ms/step - loss: 0.0069 - val_loss: 0.0038
Epoch 6/50
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 82ms/step - loss: 0.0061 - val_loss: 0.0026
Epoch 7/50
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 50ms/step - loss: 0.0064 - val_loss: 0.0025
Epoch 8/50
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 29ms/step - loss: 0.0059 - val_loss: 0.0026
Epoch 9/50
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━

In [110]:
# -------------------------
# Step 9: Live Prediction (5-min updates)
# -------------------------
# Sentiment Analysis Setup
sentiment_model = pipeline("sentiment-analysis", model="yiyanghkust/finbert-tone")

def fetch_live_data(ticker='RELIANCE.BO', interval='5m', window=20):
    # Fetch last N 5-min candles
    live_data = yf.download(ticker, period='7d', interval=interval)
    live_data = live_data[['Open','High','Low','Close','Volume']].dropna()

    # Compute technical/trend features dynamically
    close = live_data['Close'].squeeze()
    live_data['SMA_10'] = close.rolling(10).mean()
    live_data['SMA_20'] = close.rolling(20).mean()
    live_data['EMA_10'] = close.ewm(span=10, adjust=False).mean()
    live_data['EMA_20'] = close.ewm(span=20, adjust=False).mean()
    live_data['RSI'] = ta.momentum.RSIIndicator(close=close, window=14).rsi()
    macd = ta.trend.MACD(close=close)
    live_data['MACD'] = macd.macd()
    live_data['MACD_signal'] = macd.macd_signal()
    live_data['Volume_SMA_10'] = live_data['Volume'].rolling(10).mean()
    live_data['Volume_SMA_20'] = live_data['Volume'].rolling(20).mean()
    live_data['Volume_Change'] = live_data['Volume'].pct_change()
    live_data['Price_Change'] = live_data['Close'].pct_change()
    live_data['Volatility_10'] = live_data['Price_Change'].rolling(10).std()
    live_data['Volatility_20'] = live_data['Price_Change'].rolling(20).std()
    live_data['Momentum_5'] = live_data['Close'] - live_data['Close'].shift(5)
    live_data['Momentum_10'] = live_data['Close'] - live_data['Close'].shift(10)

    for lag in range(1,lags+1):
        live_data[f'Close_lag{lag}'] = live_data['Close'].shift(lag)

    live_data = live_data.dropna().reset_index(drop=True)

    # Take last 'window' rows for LSTM input
    last_features = live_data[feature_cols].values[-window:]
    scaled_last = scaler.transform(last_features)
    X_live = np.expand_dims(scaled_last, axis=0)

    # Predict next return
    y_pred_scaled = model.predict(X_live)
    y_pred = target_scaler.inverse_transform(y_pred_scaled)[0][0]

    # Fetch sentiment (last few headlines about Reliance)
    headlines = [
        "Reliance posts record quarterly profit",
        "Reliance faces regulatory scrutiny on Jio operations"
    ]
    sentiments = sentiment_model(headlines)
    sentiment_score = np.mean([1 if s['label']=='positive' else -1 if s['label']=='negative' else 0 for s in sentiments])

    return y_pred, sentiment_score

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/533 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/439M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/439M [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

Device set to use cpu


In [111]:
# -------------------------
# Example: Run Live Prediction
# -------------------------
y_pred, sentiment = fetch_live_data()
print(f"Predicted next 5-min return: {y_pred:.5f}")
print(f"Sentiment score: {sentiment:.2f}")

  live_data = yf.download(ticker, period='7d', interval=interval)
[*********************100%***********************]  1 of 1 completed


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 685ms/step
Predicted next 5-min return: -0.00048
Sentiment score: 0.00


In [112]:
from tensorflow.keras.models import load_model

# Assuming your trained model is called 'model'
model.save("reliance_lstm.h5")  # Saves architecture + weights



In [114]:
import joblib
joblib.dump(scaler, "scaler.pkl")          # Feature scaler
joblib.dump(target_scaler, "target_scaler.pkl")  # Target scaler

['target_scaler.pkl']