# Neural Network Training for Financial Predictions

**Goal**: Train TFT and N-HITS models in Colab, export trained weights for Vercel deployment

**Process**:
1. Train models on real financial data in Colab (with GPU)
2. Save trained weights in TensorFlow.js format
3. Download weights to deploy on Vercel
4. Update Vercel endpoints to load these trained weights

In [None]:
# Install dependencies
!pip install tensorflow yfinance pandas numpy scikit-learn tensorflowjs

import tensorflow as tf
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.preprocessing import MinMaxScaler
import json
import os

print(f"TensorFlow: {tf.__version__}")
print(f"GPU: {tf.config.list_physical_devices('GPU')}")

## 1. Data Collection and Preprocessing

In [None]:
# Fetch real market data
symbols = ['AAPL', 'MSFT', 'GOOGL', 'TSLA', 'NVDA']
print("Fetching market data...")

all_data = []
for symbol in symbols:
    ticker = yf.Ticker(symbol)
    data = ticker.history(period='2y', interval='1d')
    data['Symbol'] = symbol
    all_data.append(data)
    print(f"{symbol}: {len(data)} days")

combined_data = pd.concat(all_data)
print(f"Total: {len(combined_data)} data points")

In [None]:
# Create sequences for training
def create_sequences(data, seq_length=30):
    X, y = [], []
    
    for symbol in data['Symbol'].unique():
        symbol_data = data[data['Symbol'] == symbol].sort_index()
        
        # Features: OHLCV + technical indicators
        features = symbol_data[['Open', 'High', 'Low', 'Close', 'Volume']].values
        
        # Add price changes
        price_changes = np.diff(symbol_data['Close'].values, prepend=symbol_data['Close'].values[0])
        price_changes = price_changes / symbol_data['Close'].values  # Percentage change
        
        # Combine features
        enhanced_features = np.column_stack([features, price_changes])
        
        # Normalize
        scaler = MinMaxScaler()
        scaled_features = scaler.fit_transform(enhanced_features)
        
        # Create sequences
        for i in range(seq_length, len(scaled_features)):
            X.append(scaled_features[i-seq_length:i])  # Past 30 days
            
            # Target: next day price change
            current_price = symbol_data.iloc[i-1]['Close']
            next_price = symbol_data.iloc[i]['Close']
            target = (next_price - current_price) / current_price
            y.append(target)
    
    return np.array(X), np.array(y)

# Create dataset
X, y = create_sequences(combined_data)
print(f"Dataset shape: X={X.shape}, y={y.shape}")

# Train/test split
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

print(f"Train: {X_train.shape}, Test: {X_test.shape}")

## 2. Model Architectures

In [None]:
# TFT Model (simplified for training)
def create_tft_model(input_shape):
    inputs = tf.keras.layers.Input(shape=input_shape)
    
    # Variable selection
    context = tf.keras.layers.GlobalAveragePooling1D()(inputs)
    selection_weights = tf.keras.layers.Dense(input_shape[-1], activation='softmax')(context)
    selection_weights = tf.keras.layers.RepeatVector(input_shape[0])(selection_weights)
    selected_features = tf.keras.layers.Multiply()([inputs, selection_weights])
    
    # LSTM processing
    lstm_out = tf.keras.layers.LSTM(64, return_sequences=True, dropout=0.2)(selected_features)
    
    # Multi-head attention
    attention = tf.keras.layers.MultiHeadAttention(num_heads=4, key_dim=16)(lstm_out, lstm_out)
    attention = tf.keras.layers.LayerNormalization()(attention + lstm_out)
    
    # Output
    pooled = tf.keras.layers.GlobalAveragePooling1D()(attention)
    dense = tf.keras.layers.Dense(32, activation='relu')(pooled)
    output = tf.keras.layers.Dense(1)(dense)
    
    model = tf.keras.Model(inputs, output, name='TFT')
    return model

tft_model = create_tft_model((X.shape[1], X.shape[2]))
print(f"TFT Model: {tft_model.count_params():,} parameters")

In [None]:
# N-HITS Model (hierarchical blocks)
def create_nhits_model(input_shape):
    inputs = tf.keras.layers.Input(shape=input_shape)
    
    # Input projection
    x = tf.keras.layers.Dense(128)(inputs)
    
    # Hierarchical stacks
    stack_outputs = []
    
    for pool_size in [2, 4, 8]:  # Different time scales
        # Downsample
        downsampled = tf.keras.layers.AveragePooling1D(pool_size, padding='same')(x)
        
        # MLP blocks
        mlp = tf.keras.layers.Dense(128, activation='relu')(downsampled)
        mlp = tf.keras.layers.Dense(128, activation='relu')(mlp)
        
        # Upsample back
        upsampled = tf.keras.layers.UpSampling1D(pool_size)(mlp)
        
        # Ensure same length as original
        if upsampled.shape[1] != input_shape[0]:
            upsampled = upsampled[:, :input_shape[0], :]
        
        stack_outputs.append(upsampled)
    
    # Combine hierarchical outputs
    combined = tf.keras.layers.Add()(stack_outputs)
    
    # Final prediction
    pooled = tf.keras.layers.GlobalAveragePooling1D()(combined)
    dense = tf.keras.layers.Dense(64, activation='relu')(pooled)
    output = tf.keras.layers.Dense(1)(dense)
    
    model = tf.keras.Model(inputs, output, name='NHITS')
    return model

nhits_model = create_nhits_model((X.shape[1], X.shape[2]))
print(f"N-HITS Model: {nhits_model.count_params():,} parameters")

## 3. Training

In [None]:
# Train TFT Model
print("Training TFT...")

tft_model.compile(
    optimizer='adam',
    loss='mse',
    metrics=['mae']
)

tft_history = tft_model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=30,
    batch_size=32,
    verbose=1,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True),
        tf.keras.callbacks.ReduceLROnPlateau(patience=3)
    ]
)

print("TFT Training Complete")

In [None]:
# Train N-HITS Model
print("Training N-HITS...")

nhits_model.compile(
    optimizer='adam',
    loss='mse',
    metrics=['mae']
)

nhits_history = nhits_model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=30,
    batch_size=32,
    verbose=1,
    callbacks=[
        tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True),
        tf.keras.callbacks.ReduceLROnPlateau(patience=3)
    ]
)

print("N-HITS Training Complete")

## 4. Evaluation

In [None]:
# Evaluate models
tft_loss, tft_mae = tft_model.evaluate(X_test, y_test, verbose=0)
nhits_loss, nhits_mae = nhits_model.evaluate(X_test, y_test, verbose=0)

print(f"TFT - Loss: {tft_loss:.6f}, MAE: {tft_mae:.6f}")
print(f"N-HITS - Loss: {nhits_loss:.6f}, MAE: {nhits_mae:.6f}")

# Direction accuracy
tft_pred = tft_model.predict(X_test[:100], verbose=0)
nhits_pred = nhits_model.predict(X_test[:100], verbose=0)
actual = y_test[:100]

tft_dir_acc = np.mean(np.sign(tft_pred.flatten()) == np.sign(actual))
nhits_dir_acc = np.mean(np.sign(nhits_pred.flatten()) == np.sign(actual))

print(f"\nDirection Accuracy:")
print(f"TFT: {tft_dir_acc:.1%}")
print(f"N-HITS: {nhits_dir_acc:.1%}")

## 5. Export for Vercel Deployment

In [None]:
import tensorflowjs as tfjs
import zipfile

# Create export directory
os.makedirs('/content/models', exist_ok=True)

print("Exporting trained models...")

# Convert to TensorFlow.js format
tfjs.converters.save_keras_model(tft_model, '/content/models/tft-trained')
tfjs.converters.save_keras_model(nhits_model, '/content/models/nhits-trained')

print("Models exported to TensorFlow.js format")

# Save model metadata
metadata = {
    'tft': {
        'loss': float(tft_loss),
        'mae': float(tft_mae),
        'direction_accuracy': float(tft_dir_acc),
        'parameters': int(tft_model.count_params())
    },
    'nhits': {
        'loss': float(nhits_loss),
        'mae': float(nhits_mae),
        'direction_accuracy': float(nhits_dir_acc),
        'parameters': int(nhits_model.count_params())
    },
    'training_info': {
        'sequence_length': int(X.shape[1]),
        'num_features': int(X.shape[2]),
        'training_samples': int(len(X_train)),
        'test_samples': int(len(X_test)),
        'symbols': symbols
    }
}

with open('/content/models/metadata.json', 'w') as f:
    json.dump(metadata, f, indent=2)

print("Metadata saved")

In [None]:
# Create deployment package
def create_zip():
    with zipfile.ZipFile('/content/trained_models.zip', 'w') as zipf:
        for root, dirs, files in os.walk('/content/models'):
            for file in files:
                file_path = os.path.join(root, file)
                arcname = os.path.relpath(file_path, '/content')
                zipf.write(file_path, arcname)

create_zip()
print("Created trained_models.zip")

# Show what's in the package
print("\nPackage contents:")
with zipfile.ZipFile('/content/trained_models.zip', 'r') as zipf:
    for name in zipf.namelist():
        print(f"  {name}")

print(f"\nPackage size: {os.path.getsize('/content/trained_models.zip') / 1024 / 1024:.1f} MB")

In [None]:
# Download the trained models
from google.colab import files

print("Downloading trained models...")
files.download('/content/trained_models.zip')

print("\n🎉 Training Complete!")
print("\nNext steps:")
print("1. Extract trained_models.zip")
print("2. Upload models/ folder to your Vercel project")
print("3. Update predict-tft.js and predict-nhits.js to load these models:")
print("   await tf.loadLayersModel('./models/tft-trained/model.json')")
print("   await tf.loadLayersModel('./models/nhits-trained/model.json')")
print("4. Deploy updated Vercel project")

print(f"\nModel Performance:")
print(f"TFT: {tft_dir_acc:.1%} direction accuracy, {tft_mae:.6f} MAE")
print(f"N-HITS: {nhits_dir_acc:.1%} direction accuracy, {nhits_mae:.6f} MAE")