# Neural Network Training for Financial Predictions

**Goal**: Train TFT and N-HITS models in Colab, export trained weights for Vercel deployment

**Process**:
1. Train models on real financial data in Colab (with GPU)
2. Save trained weights in TensorFlow.js format
3. Download weights to deploy on Vercel
4. Update Vercel endpoints to load these trained weights

In [1]:
# Install dependencies
!pip install tensorflow yfinance pandas numpy scikit-learn tensorflowjs

import tensorflow as tf
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
import json
import os

print(f"TensorFlow: {tf.__version__}")
print(f"GPU: {tf.config.list_physical_devices('GPU')}")

TensorFlow: 2.19.0
GPU: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


## 1. Data Collection and Preprocessing

In [2]:
# Fetch real market data
symbols = ['AAPL', 'MSFT', 'GOOGL', 'TSLA', 'NVDA']
print("Fetching market data...")

all_data = []
for symbol in symbols:
    ticker = yf.Ticker(symbol)
    data = ticker.history(period='2y', interval='1d')
    data['Symbol'] = symbol
    all_data.append(data)
    print(f"{symbol}: {len(data)} days")

combined_data = pd.concat(all_data)
print(f"Total: {len(combined_data)} data points")

Fetching market data...
AAPL: 501 days
MSFT: 501 days
GOOGL: 501 days
TSLA: 501 days
NVDA: 501 days
Total: 2505 data points


In [3]:
# Create sequences for training
def create_sequences(data, seq_length=30):
    X, y = [], []

    for symbol in data['Symbol'].unique():
        symbol_data = data[data['Symbol'] == symbol].sort_index()

        # Features: OHLCV + technical indicators
        features = symbol_data[['Open', 'High', 'Low', 'Close', 'Volume']].values

        # Add price changes
        price_changes = np.diff(symbol_data['Close'].values, prepend=symbol_data['Close'].values[0])
        price_changes = price_changes / symbol_data['Close'].values  # Percentage change

        # Combine features
        enhanced_features = np.column_stack([features, price_changes])

        # Normalize
        scaler = MinMaxScaler()
        scaled_features = scaler.fit_transform(enhanced_features)

        # Create sequences
        for i in range(seq_length, len(scaled_features)):
            X.append(scaled_features[i-seq_length:i])  # Past 30 days

            # Target: next day price change
            current_price = symbol_data.iloc[i-1]['Close']
            next_price = symbol_data.iloc[i]['Close']
            target = (next_price - current_price) / current_price
            y.append(target)

    return np.array(X), np.array(y)

# Create dataset
X, y = create_sequences(combined_data)
print(f"Dataset shape: X={X.shape}, y={y.shape}")

# Train/test split
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

print(f"Train: {X_train.shape}, Test: {X_test.shape}")

Dataset shape: X=(2355, 30, 6), y=(2355,)
Train: (1884, 30, 6), Test: (471, 30, 6)


## 2. Model Architectures

In [4]:
# TFT Model (simplified for training, with TF.js-compatible custom attention)
def create_tft_model(input_shape):
    inputs = tf.keras.layers.Input(shape=input_shape)

    # Variable selection
    context = tf.keras.layers.GlobalAveragePooling1D()(inputs)
    selection_weights = tf.keras.layers.Dense(input_shape[-1], activation='softmax')(context)
    selection_weights = tf.keras.layers.RepeatVector(input_shape[0])(selection_weights)
    selected_features = tf.keras.layers.Multiply()([inputs, selection_weights])

    # LSTM processing
    lstm_out = tf.keras.layers.LSTM(64, return_sequences=True, dropout=0.2)(selected_features)

    # Custom Multi-Head Attention (using supported operations)
    embed_dim = lstm_out.shape[-1]
    num_heads = 4
    key_dim = 16  # Per-head dimension; total projection_dim = key_dim * num_heads = 64
    projection_dim = key_dim * num_heads

    # Linear projections for Query, Key, Value
    query = tf.keras.layers.Dense(projection_dim)(lstm_out)
    key = tf.keras.layers.Dense(projection_dim)(lstm_out)
    value = tf.keras.layers.Dense(projection_dim)(lstm_out)

    # Reshape for multi-head: (batch_size, seq_length, num_heads, key_dim)
    batch_size = tf.shape(lstm_out)[0]
    seq_len = tf.shape(lstm_out)[1]
    query = tf.reshape(query, (batch_size, seq_len, num_heads, key_dim))
    key = tf.reshape(key, (batch_size, seq_len, num_heads, key_dim))
    value = tf.reshape(value, (batch_size, seq_len, num_heads, key_dim))
    query = tf.transpose(query, perm=[0, 2, 1, 3])  # (batch_size, num_heads, seq_length, key_dim)
    key = tf.transpose(key, perm=[0, 2, 1, 3])
    value = tf.transpose(value, perm=[0, 2, 1, 3])

    # Scaled dot-product attention
    scores = tf.matmul(query, key, transpose_b=True) / tf.sqrt(tf.cast(key_dim, tf.float32))
    weights = tf.nn.softmax(scores, axis=-1)
    attention_output = tf.matmul(weights, value)

    # Reshape back: (batch_size, seq_length, projection_dim)
    attention_output = tf.transpose(attention_output, perm=[0, 2, 1, 3])
    attention_output = tf.reshape(attention_output, (batch_size, seq_len, projection_dim))

    # Final projection to match embed_dim
    attention = tf.keras.layers.Dense(embed_dim)(attention_output)

    # Residual connection and normalization
    attention = tf.keras.layers.LayerNormalization()(attention + lstm_out)

    # Output
    pooled = tf.keras.layers.GlobalAveragePooling1D()(attention)
    dense = tf.keras.layers.Dense(32, activation='relu')(pooled)
    output = tf.keras.layers.Dense(1)(dense)

    model = tf.keras.Model(inputs, output, name='TFT')
    return model

tft_model = create_tft_model((X.shape[1], X.shape[2]))
print(f"TFT Model: {tft_model.count_params():,} parameters")

TFT Model: 37,099 parameters


In [5]:
# N-HITS Model (hierarchical blocks)
def create_nhits_model(input_shape):
    inputs = tf.keras.layers.Input(shape=input_shape)

    # Input projection
    x = tf.keras.layers.Dense(128)(inputs)

    # Hierarchical stacks
    stack_outputs = []

    for pool_size in [2, 4, 8]:  # Different time scales
        # Downsample
        downsampled = tf.keras.layers.AveragePooling1D(pool_size, padding='same')(x)

        # MLP blocks
        mlp = tf.keras.layers.Dense(128, activation='relu')(downsampled)
        mlp = tf.keras.layers.Dense(128, activation='relu')(mlp)

        # Upsample back
        upsampled = tf.keras.layers.UpSampling1D(pool_size)(mlp)

        # Ensure same length as original
        if upsampled.shape[1] != input_shape[0]:
            upsampled = upsampled[:, :input_shape[0], :]

        stack_outputs.append(upsampled)

    # Combine hierarchical outputs
    combined = tf.keras.layers.Add()(stack_outputs)

    # Final prediction
    pooled = tf.keras.layers.GlobalAveragePooling1D()(combined)
    dense = tf.keras.layers.Dense(64, activation='relu')(pooled)
    output = tf.keras.layers.Dense(1)(dense)

    model = tf.keras.Model(inputs, output, name='NHITS')
    return model

nhits_model = create_nhits_model((X.shape[1], X.shape[2]))
print(f"N-HITS Model: {nhits_model.count_params():,} parameters")

N-HITS Model: 108,289 parameters


## 3. Training

In [6]:
# Train TFT Model
print("Training TFT...")

tft_model.compile(
    optimizer='adam',
    loss='mse',
    metrics=['mae']
)

tft_history = tft_model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=30,
    batch_size=32,
    verbose=1,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True),
        tf.keras.callbacks.ReduceLROnPlateau(patience=3)
    ]
)

print("TFT Training Complete")

Training TFT...
Epoch 1/30
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 16ms/step - loss: 0.0558 - mae: 0.1557 - val_loss: 0.0012 - val_mae: 0.0256 - learning_rate: 0.0010
Epoch 2/30
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 7.7275e-04 - mae: 0.0195 - val_loss: 0.0011 - val_mae: 0.0241 - learning_rate: 0.0010
Epoch 3/30
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 6.1718e-04 - mae: 0.0177 - val_loss: 0.0011 - val_mae: 0.0243 - learning_rate: 0.0010
Epoch 4/30
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 7.8145e-04 - mae: 0.0201 - val_loss: 0.0011 - val_mae: 0.0237 - learning_rate: 0.0010
Epoch 5/30
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 7.4257e-04 - mae: 0.0192 - val_loss: 0.0017 - val_mae: 0.0320 - learning_rate: 0.0010
Epoch 6/30
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 

In [7]:
# Train N-HITS Model
print("Training N-HITS...")

nhits_model.compile(
    optimizer='adam',
    loss='mse',
    metrics=['mae']
)

nhits_history = nhits_model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=30,
    batch_size=32,
    verbose=1,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True),
        tf.keras.callbacks.ReduceLROnPlateau(patience=3)
    ]
)

print("N-HITS Training Complete")

Training N-HITS...
Epoch 1/30
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 59ms/step - loss: 0.0092 - mae: 0.0651 - val_loss: 0.0011 - val_mae: 0.0236 - learning_rate: 0.0010
Epoch 2/30
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 7ms/step - loss: 7.0707e-04 - mae: 0.0182 - val_loss: 0.0011 - val_mae: 0.0235 - learning_rate: 0.0010
Epoch 3/30
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 6.3137e-04 - mae: 0.0167 - val_loss: 0.0013 - val_mae: 0.0275 - learning_rate: 0.0010
Epoch 4/30
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 7.1727e-04 - mae: 0.0182 - val_loss: 0.0011 - val_mae: 0.0241 - learning_rate: 0.0010
Epoch 5/30
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 6.4599e-04 - mae: 0.0172 - val_loss: 0.0010 - val_mae: 0.0231 - learning_rate: 1.0000e-04
Epoch 6/30
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss

## 4. Evaluation

In [8]:
# Evaluate models
tft_loss, tft_mae = tft_model.evaluate(X_test, y_test, verbose=0)
nhits_loss, nhits_mae = nhits_model.evaluate(X_test, y_test, verbose=0)

print(f"TFT - Loss: {tft_loss:.6f}, MAE: {tft_mae:.6f}")
print(f"N-HITS - Loss: {nhits_loss:.6f}, MAE: {nhits_mae:.6f}")

# Direction accuracy
tft_pred = tft_model.predict(X_test[:100], verbose=0)
nhits_pred = nhits_model.predict(X_test[:100], verbose=0)
actual = y_test[:100]

tft_dir_acc = np.mean(np.sign(tft_pred.flatten()) == np.sign(actual))
nhits_dir_acc = np.mean(np.sign(nhits_pred.flatten()) == np.sign(actual))

print(f"\nDirection Accuracy:")
print(f"TFT: {tft_dir_acc:.1%}")
print(f"N-HITS: {nhits_dir_acc:.1%}")

TFT - Loss: 0.001043, MAE: 0.023067
N-HITS - Loss: 0.001046, MAE: 0.023077

Direction Accuracy:
TFT: 64.0%
N-HITS: 59.0%


## 5. Export for Vercel Deployment

In [9]:
import tensorflowjs as tfjs
import zipfile
import os
import json

# Create export directory
os.makedirs('/content/models', exist_ok=True)

print("Exporting trained models...")

# Save models in native Keras format (.keras) to avoid HDF5 warnings
tft_model.save('/content/models/tft-trained.keras')
nhits_model.save('/content/models/nhits-trained.keras')

# Convert to TensorFlow.js format using CLI for better compatibility
!tensorflowjs_converter --input_format=keras /content/models/tft-trained.keras /content/models/tft-trained
!tensorflowjs_converter --input_format=keras /content/models/nhits-trained.keras /content/models/nhits-trained

# Optional: Quantize for smaller size (add --quantize_float16 if needed for Cloudflare limits)
# !tensorflowjs_converter --input_format=keras --quantize_float16 /content/models/tft-trained.keras /content/models/tft-trained

print("Models exported to TensorFlow.js format")

# Save model metadata (unchanged)
metadata = {
    'tft': {
        'loss': float(tft_loss),
        'mae': float(tft_mae),
        'direction_accuracy': float(tft_dir_acc),
        'parameters': int(tft_model.count_params())
    },
    'nhits': {
        'loss': float(nhits_loss),
        'mae': float(nhits_mae),
        'direction_accuracy': float(nhits_dir_acc),
        'parameters': int(nhits_model.count_params())
    },
    'training_info': {
        'sequence_length': int(X.shape[1]),
        'num_features': int(X.shape[2]),
        'training_samples': int(len(X_train)),
        'test_samples': int(len(X_test)),
        'symbols': symbols
    }
}

with open('/content/models/metadata.json', 'w') as f:
    json.dump(metadata, f, indent=2)

print("Metadata saved")

# The rest (create_zip, etc.) remains unchanged



Exporting trained models...
failed to lookup keras version from the file,
    this is likely a weight only file
failed to lookup keras version from the file,
    this is likely a weight only file
Models exported to TensorFlow.js format
Metadata saved


In [10]:
# Create deployment package
def create_zip():
    with zipfile.ZipFile('/content/trained_models.zip', 'w') as zipf:
        for root, dirs, files in os.walk('/content/models'):
            for file in files:
                file_path = os.path.join(root, file)
                arcname = os.path.relpath(file_path, '/content')
                zipf.write(file_path, arcname)

create_zip()
print("Created trained_models.zip")

# Show what's in the package
print("\nPackage contents:")
with zipfile.ZipFile('/content/trained_models.zip', 'r') as zipf:
    for name in zipf.namelist():
        print(f"  {name}")

print(f"\nPackage size: {os.path.getsize('/content/trained_models.zip') / 1024 / 1024:.1f} MB")

Created trained_models.zip

Package contents:
  models/metadata.json
  models/tft-trained/group1-shard1of1.bin
  models/tft-trained/model.json
  models/nhits-trained/group1-shard1of1.bin
  models/nhits-trained/model.json

Package size: 0.6 MB


In [11]:
# Download the trained models
from google.colab import files

print("Downloading trained models...")
files.download('/content/trained_models.zip')

print("\n🎉 Training Complete!")
print("\nNext steps:")
print("1. Extract trained_models.zip")
print("2. Upload models/ folder to your Vercel project")
print("3. Update predict-tft.js and predict-nhits.js to load these models:")
print("   await tf.loadLayersModel('./models/tft-trained/model.json')")
print("   await tf.loadLayersModel('./models/nhits-trained/model.json')")
print("4. Deploy updated Vercel project")

print(f"\nModel Performance:")
print(f"TFT: {tft_dir_acc:.1%} direction accuracy, {tft_mae:.6f} MAE")
print(f"N-HITS: {nhits_dir_acc:.1%} direction accuracy, {nhits_mae:.6f} MAE")

Downloading trained models...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>


🎉 Training Complete!

Next steps:
1. Extract trained_models.zip
2. Upload models/ folder to your Vercel project
3. Update predict-tft.js and predict-nhits.js to load these models:
   await tf.loadLayersModel('./models/tft-trained/model.json')
   await tf.loadLayersModel('./models/nhits-trained/model.json')
4. Deploy updated Vercel project

Model Performance:
TFT: 64.0% direction accuracy, 0.023067 MAE
N-HITS: 59.0% direction accuracy, 0.023077 MAE
