In [None]:
pip install torch

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import math
import random

# -----------------------------
# Step 1: Collect Data
# -----------------------------
texts = [
    "Balance sheets provide a snapshot of a company's financial condition.",
    "The Federal Reserve announced a new policy to adjust interest rates.",
    "Quarterly earnings reports are critical for assessing company performance.",
    "Investment strategies often rely on technical and fundamental analysis.",
    "Credit ratings are essential for evaluating a company's debt obligations.",
    "Risk management is a cornerstone of sound financial planning.",
    "Portfolio diversification helps mitigate market volatility.",
    "Market trends indicate a potential bull run in the tech sector.",
    "Mergers and acquisitions can reshape industry dynamics.",
    "Economic indicators such as GDP growth and unemployment rates provide insight into market health.",
    "Fiscal policy can influence inflation and consumer spending.",
    "Sovereign bonds are seen as a safe-haven asset during economic downturns.",
    "Equity markets react swiftly to changes in investor sentiment.",
    "Commodities like oil and gold are often used as hedges against inflation.",
    "Algorithmic trading has transformed modern financial markets.",
    "Derivatives are used to hedge against risks in financial portfolios.",
    "Long-term investments can yield substantial returns over time.",
    "Short-term market fluctuations are often driven by investor sentiment.",
    "Asset management firms focus on maximizing returns while managing risk.",
    "Corporate governance is crucial for maintaining investor trust.",
    "Financial modeling helps in forecasting future market trends.",
    "Budget constraints force companies to prioritize their expenditures.",
    "Mergers and acquisitions often lead to significant industry consolidation.",
    "The yield curve is a valuable indicator of economic expectations.",
    "Market liquidity can be a critical factor during financial crises.",
    "Inflation erodes purchasing power over time.",
    "Capital allocation decisions are key to a company's long-term success.",
    "The price-to-earnings ratio is a popular metric for valuing stocks.",
    "Debt-to-equity ratios help investors assess financial leverage.",
    "Regulatory changes can have a profound impact on market behavior.",
    "Exchange rates influence the profitability of multinational companies.",
    "Investor sentiment often swings with major geopolitical events.",
    "Sustainable investing is gaining popularity among socially conscious investors.",
    "The balance between risk and return is central to portfolio management.",
    "Financial derivatives can be complex and require specialized knowledge.",
    "Economic cycles affect asset prices in predictable patterns.",
    "Private equity investments typically have longer time horizons.",
    "Venture capital fuels innovation in emerging industries.",
    "Corporate bonds offer fixed income with varying levels of risk.",
    "Credit default swaps are used to manage exposure to credit risk.",
    "Financial regulations aim to maintain market stability.",
    "Market capitalization provides a measure of a company's size.",
    "The liquidity of an asset can determine its market price.",
    "Quantitative easing is a tool used by central banks to stimulate the economy.",
    "Investor behavior is influenced by both rational analysis and emotion.",
    "Diversification of assets can reduce overall portfolio risk.",
    "Macroeconomic trends play a significant role in investment decisions.",
    "Equity analysts monitor price targets and earnings forecasts.",
    "Financial reports detail revenue, expenses, and net income.",
    "Budget forecasts help businesses plan for future growth.",
    "Interest rate adjustments can signal changes in economic policy.",
    "Asset allocation models evolve as market conditions change.",
    "Economic recovery is often accompanied by rising stock prices.",
    "Capital markets provide funding for corporate expansion.",
    "Global trade dynamics affect commodity prices worldwide."
]

# Shuffle the dataset for variety
random.shuffle(texts)

# -----------------------------
# Step 2: Tokenization
# -----------------------------
class SimpleTokenizer:
    def __init__(self, vocab):
        self.vocab = vocab
        self.vocab_size = len(vocab)
        self.token_to_id = {word: idx for idx, word in enumerate(vocab)}
        self.id_to_token = {idx: word for idx, word in enumerate(vocab)}
        self.pad_token_id = self.token_to_id.get('<PAD>', 0)
        self.unk_token_id = self.token_to_id.get('<UNK>', 1)

    def encode(self, text):
        return [self.token_to_id.get(word, self.unk_token_id) for word in text.split()]

    def decode(self, ids):
        return ' '.join(self.id_to_token.get(idx, '<UNK>') for idx in ids)

vocab = set()
for text in texts:
    vocab.update(text.split())
vocab.add('<PAD>')
vocab.add('<UNK>')
vocab = sorted(list(vocab))
tokenizer = SimpleTokenizer(vocab)

# -----------------------------
# Step 3: Create Dataset with Shifted Targets for Next-Token Prediction
# -----------------------------
class FinancialTextDataset(Dataset):
    def __init__(self, texts, tokenizer, seq_len):
        self.texts = texts
        self.tokenizer = tokenizer
        self.seq_len = seq_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        tokens = self.tokenizer.encode(text)
        tokens = tokens[:self.seq_len + 1]
        if len(tokens) < self.seq_len + 1:
            tokens = tokens + [self.tokenizer.pad_token_id] * (self.seq_len + 1 - len(tokens))
        input_tokens = tokens[:-1]
        target_tokens = tokens[1:]
        return torch.tensor(input_tokens), torch.tensor(target_tokens)

# Hyperparameters
seq_len = 12
embedding_dim = 32
hidden_dim = 64
num_heads = 4
num_layers = 2
num_epochs = 20
batch_size = 4

dataset = FinancialTextDataset(texts, tokenizer, seq_len)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# -----------------------------
# Positional Encoding Module
# -----------------------------
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:, :x.size(1)]
        return self.dropout(x)

# -----------------------------
# Step 4: Define the Transformer Model with Positional Encoding
# -----------------------------
class TinyFinancialTransformer(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, num_heads, num_layers, seq_len):
        super(TinyFinancialTransformer, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.positional_encoding = PositionalEncoding(embedding_dim, dropout=0.1, max_len=seq_len+1)
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=embedding_dim,
            nhead=num_heads,
            dim_feedforward=hidden_dim,
            batch_first=True
        )
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.fc = nn.Linear(embedding_dim, vocab_size)

    def forward(self, x):
        x = self.embedding(x)
        x = self.positional_encoding(x)
        x = self.transformer_encoder(x)
        x = self.fc(x)
        return x

model = TinyFinancialTransformer(
    vocab_size=len(vocab),
    embedding_dim=embedding_dim,
    hidden_dim=hidden_dim,
    num_heads=num_heads,
    num_layers=num_layers,
    seq_len=seq_len
)

optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss(ignore_index=tokenizer.pad_token_id)

# -----------------------------
# Step 5: Training Loop
# -----------------------------
def train(model, dataloader, optimizer, criterion, num_epochs=20):
    model.train()
    for epoch in range(num_epochs):
        epoch_loss = 0.0
        for inputs, targets in dataloader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs.view(-1, outputs.size(-1)), targets.view(-1))
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss/len(dataloader):.4f}")

train(model, dataloader, optimizer, criterion, num_epochs)

# -----------------------------
# Step 6: Inference / Text Generation using Top-k Sampling
# -----------------------------
def generate_text(model, tokenizer, prompt, seq_len=12, max_gen=20, top_k=5):
    model.eval()
    tokens = tokenizer.encode(prompt)
    input_tokens = tokens[-seq_len:]
    for _ in range(max_gen):
        if len(input_tokens) < seq_len:
            padded = [tokenizer.pad_token_id] * (seq_len - len(input_tokens)) + input_tokens
        else:
            padded = input_tokens[-seq_len:]
        input_tensor = torch.tensor(padded).unsqueeze(0)
        with torch.no_grad():
            output = model(input_tensor)
            next_token_logits = output[0, -1, :]
            topk_logits, topk_indices = torch.topk(next_token_logits, top_k)
            probabilities = torch.softmax(topk_logits, dim=-1)
            next_token = topk_indices[torch.multinomial(probabilities, 1).item()].item()
        input_tokens.append(next_token)
    return tokenizer.decode(input_tokens)


Epoch 1/20, Loss: 5.9207
Epoch 2/20, Loss: 5.6919
Epoch 3/20, Loss: 5.5454
Epoch 4/20, Loss: 5.4198
Epoch 5/20, Loss: 5.3082
Epoch 6/20, Loss: 5.2189
Epoch 7/20, Loss: 5.1091
Epoch 8/20, Loss: 4.9861
Epoch 9/20, Loss: 4.8618
Epoch 10/20, Loss: 4.7616
Epoch 11/20, Loss: 4.6176
Epoch 12/20, Loss: 4.4967
Epoch 13/20, Loss: 4.3722
Epoch 14/20, Loss: 4.2475
Epoch 15/20, Loss: 4.1244
Epoch 16/20, Loss: 4.0054
Epoch 17/20, Loss: 3.8859
Epoch 18/20, Loss: 3.7705
Epoch 19/20, Loss: 3.6284
Epoch 20/20, Loss: 3.4963


In [None]:
prompt_examples = [
    "What is a balance sheet",
    "What is the Federal Reserve",
    "What are Investment strategies",
    "What are Credit ratings"
]

for prompt in prompt_examples:
    generated = generate_text(model, tokenizer, prompt, seq_len=seq_len, max_gen=20, top_k=5)
    print(f"Prompt: '{prompt}' -> Generated: '{generated}'")

Prompt: 'What is a balance sheet' -> Generated: '<UNK> is a balance <UNK> by for assessing emotion. is a valuable be complex insight used for analysis and a company's a company's for maintaining'
Prompt: 'What is the Federal Reserve' -> Generated: '<UNK> is the Federal Reserve for valuing time. investor in predictable interest complex provide insight used to hedge in emerging for evaluating an to hedge'
Prompt: 'What are Investment strategies' -> Generated: '<UNK> are Investment strategies businesses geopolitical hedge in emerging swaps of an sentiment valuing time. company's debt often driven of a popular substantial in'
Prompt: 'What are Credit ratings' -> Generated: '<UNK> are Credit ratings central a popular time. of often used as a new policy a company's investments analysis. for maintaining a a company's'


In [None]:
vocab_size = len(vocab)

# Calculate parameters
embedding_params = vocab_size * embedding_dim
transformer_params = 0

# Calculate parameters for each transformer layer
for _ in range(num_layers):
    attention_params = (3 * (embedding_dim * (embedding_dim // num_heads))) + (embedding_dim * embedding_dim) + (2 * embedding_dim)
    transformer_params += attention_params

# Final linear layer parameters
linear_params = embedding_dim * vocab_size

total_params = embedding_params + transformer_params + linear_params

print(f"Total parameters in the TinyFinancialTransformer: {total_params}")


Total parameters in the TinyFinancialTransformer: 24320
