# Phase II: Simulation
## Package Import

In [45]:
import pandas as pd
import networkx as nx
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import accuracy_score, classification_report
from sklearn.neighbors import KNeighborsClassifier
import joblib

## Data Import

In [46]:
df = pd.read_csv('../../data/cleaned-yearly-ETFs.csv')

df = df[(df['price_year'] == 2018) | (df['price_year'] == 2019) | (df['price_year'] == 2020)]

# Prepare data
X = df.drop(columns=['fund_symbol','price_year', 'yearly_risk']).to_numpy(dtype=np.float32)
y = df['yearly_risk'].to_numpy(dtype=np.float32)

## Model Import

In [47]:
# Define NN class
class NeuralNetwork(nn.Module):
    def __init__(self, input_size):
        super().__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(input_size, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 2) 
        )

    def forward(self, x):
        logits = self.linear_relu_stack(x)
        return logits


model_pa = joblib.load('model_PA.joblib')
model_nn = joblib.load('model_nn.joblib')

## Simulation

In [48]:
n = 1000

# random average transaction generation
np.random.seed(42)
sim_open = df['avg_open'].sample(n, replace=True).values 
sim_high = df['avg_high'].sample(n, replace=True).values 
sim_low = df['avg_low'].sample(n, replace=True).values 
sim_close = df['avg_close'].sample(n, replace=True).values 
sim_adj_close = df['avg_adj_close'].sample(n, replace=True).values 
sim_price = df['avg_transaction_price'].sample(n, replace=True).values 
sim_vol = df['avg_transaction_volume'].sample(n, replace=True).values 
sim_value = df['avg_transaction_value'].sample(n, replace=True).values 
sim_loss = df['yearly_loss'].sample(n, replace=True).values 

# Simulation DataFrame
simulation = pd.DataFrame({
    'sim_open': sim_open,
    'sim_high': sim_high,
    'sim_low': sim_low,
    'sim_close': sim_close,
    'sim_adj_close': sim_adj_close,
    'sim_price': sim_price,
    'sim_vol': sim_vol,
    'sim_value': sim_value,
    'sim_loss': sim_loss
})

print(simulation.head())

     sim_open   sim_high    sim_low  sim_close  sim_adj_close  sim_price  \
0   47.298814  33.046853  86.463636  23.975929     143.891621  24.389717   
1   28.624071  37.937928  19.300556  26.691779      27.052063  96.660876   
2  113.173597  79.923434  46.271587  19.342500      31.705777  42.260558   
3   80.071507  31.383730  24.257895  99.273586      38.576151  54.774743   
4   29.232669  33.408458  57.582032  24.998214      63.686786  30.592351   

         sim_vol     sim_value  sim_loss  
0    2229.365079  4.021210e+06 -0.241921  
1    2890.000000  1.047132e+06 -0.075510  
2    9710.714286  4.582998e+05 -0.032150  
3  603821.115538  6.995109e+04 -0.109756  
4   48067.984190  1.469424e+06 -0.168219  


## Value-at-Risk Calculation

In [49]:
# get returns
simulation['returns'] = simulation['sim_open'] - simulation['sim_close']
conf = 0.95

# VaR for each row
simulation['var'] = simulation['returns'].apply(lambda x: np.percentile(simulation['returns'], (1 - conf) * 100))

# Calculate simulation VaR for each row
simulation['simulated_VaR'] = simulation['var'] * simulation['sim_value']

# Get the median VaR
median_var = simulation['var'].median()

# Classify as high or low risk based on median VaR
simulation['risk_sim'] = simulation['var'].apply(lambda x: 1 if x > median_var else 0)


## Preferential Attachment Model Risk Prediction

In [51]:
simulation = simulation.dropna()
simulation = simulation.drop(columns=['var', 'returns'])
predictions = model_pa.predict(simulation)



## Compare Predictions

In [58]:
comparison = simulation['risk_sim'].values == predictions

sum(comparison == True)/len(comparison)

1.0

The prefferential attachment model was completely concordand with Value at Risk (VaR) predictions.

## Neural Network Model Risk Prediction

In [64]:
model_nn.eval()
input_data = torch.tensor(simulation[['sim_open', 'sim_high', 'sim_low', 'sim_close', 'sim_adj_close', 'sim_price', 'sim_vol', 'sim_value', 'sim_loss']].values, dtype=torch.float32)

#forward pass
with torch.no_grad(): 
    outputs = model_nn(input_data)


predictions = torch.sigmoid(outputs)
pred = (predictions > 0.5).int()

simulation['prediction_nn'] = pred[:, 0].numpy()

## Compare Predictions

In [65]:
comparison = simulation['risk_sim'].values == simulation['prediction_nn'].values

sum(comparison == True)/len(comparison)

1.0

The prefferential attachment model was completely concordand with Value at Risk (VaR) predictions.

## Final Comments

In the simualtion, both the neural network model and the prefferential attachment model performed in accordance with the gold standard for risk calculation in finance, Value at Risk.