<h1>KOSPI Prediction using RNN (GRU)<h1\>

<h2>Load data<h2\>

In [7]:

# Data split function

def split_data(price, lookback):
    data_raw = price.to_numpy()
    data = []

    # Create sequences of length 'lookback + 1' (for example when we have 'lookback'=3 each sequence will contain 3 for input 1 for label
    for index in range(len(data_raw) -lookback):
        data.append(data_raw[index: index+lookback +1])

    data = np.array(data)
    train_set_size = data.shape[0] - (test_set_size)

    # Split data into train and test
    x_train = data[:train_set_size, :-1, :]
    y_train = data[:train_set_size, -1,:]

    x_test = data[train_set_size:, :-1, :]
    y_test = data[train_set_size:, -1,:]

    return [x_train, y_train, x_test, y_test]


# Execute split

test_set_start_date = "2020-01-01"
price =data[['close']].copy()
scaler= MinMaxScaler(feature_range=(-1,1))
scaler.fit(data[data['date'] <test_set_start_date[['close']])
price['close'] = scaler.transform(price['close'].values.reshape(-1,1))
test_set_size = len(data[data['date'] >= test_set_start_date])


<h2> Data loader <h2\>

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

x_train = torch.from_numpy(x_train).type(torch.Tensor).to(device)
x_test = torch.from_numpy(x_test).type(torch.Tensor).to(device)
y_train = torch.from_numpy(y_train).type(torch.Tensor).to(device)
y_test = torch.from_numpy(y_test).type(torch.Tensor).to(device)

# Tensor dataset (TensorDataset combines input and target tensors)

train_dataset = TensorDataset(x_train, y_train)
test_dataset = TensorDataset(x_test, y_test)

# Batch size
batch_size = 10000

# Create Dataloader for batching and shuffling
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle= True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle= False)

# Combine train and test sets for unified loader

x_combined = torch.cat((x_train, x_test), dim=0)
y_combined = torch.cat((y_train, y_test), dim=0)
combined_dataset = TensorDataset(x_combined, y_combined)
combined_loader =DataLoader(combined_dataset, batch_size=batch_size, shuffle=False)

<h2> RNN Model <h2\>

In [None]:
class RNNModel(nn.Module):
    def __init__(self, input_dim, hidden_size, num_layers, output_dim):
        super(RNNModel, self).__init__()                                      # super(RNNModel, self) tells Python to treat methods in the parent class (nn.Module) as if they are methods of self.  In python 3 "super().__init__()" is possible too

        self.hidden_size = hidden_size                                         # width
        self.num_layers = num_layers                                           # depth (number of GRU layers to stack)
        self.gru = nn.GRU(input_dim, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_dim)

    def forward(self, x):    # self is a reference to the current instance of the class 
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        out, _ = self.gru(x,h0)
        out = out[:, -1, :]
        out = self.fc(out)
        return out

<h2> Model Train <h2\>

In [None]:
criterion = torch.nn.MSELoss
optimizer = torch.optim.Adam(model.parameteres(), lr=0.01)

hist = np.zeros(num_epochs)
start_time = time.time()

for epoch in range(num_epochs):
    model.train()

    for x_batch, y_batch in train_loader:
        y_train_pred =model(x_batch)
        loss = criterion(y_train_pred, y_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    with torch.no_grad():
        model.eval()
        y_test_pred = model(x_test)
        test_loss = criterion(y_test_pred, y_test)

    if (epoch +1)% 10 ==0:
        print("Epoch", epoch +!, "Train loss:", loss.item(),
              "Test Loss:", test_loss.item())
    hist[epoch] = loss.item()

training_time = time.time() -start_time
print("Training time: {}". format(training_time))

<h2>Create Prediction Dataframe<h2\>

In [None]:
# The model assumes that the errors (or residuals) between the observed interest rate (FFR_t) and the predicted interest rate (Ybar_t)

with torch.no_grad():  # this temporarily disables gradient calculation, which saves memory and computation(After the block, gradients are automatically re-enabled)
    model.eval()
    predictions = []   # predicted value
    targets = []       # true value

    # looping over batches
    for x_batch, y_batch in combined_loader:
        y_pred = model(x_batch)
        predictions.append(y_pred)
        targets.append(y_batch)
        
y_pred = torch.cat(predictions)
y_original = torch.cat(targets)


# Convert predictions and targets to dataframes after inverse scaling
predict = pd.DataFrame(
    scaler.inverse_transform(y_pred.detach().cpu().numpy()),
    columns = ["predicted"]
)
original = pd.DataFrame(
    scaler.inverse_transform(y_original.detach().cpu().numpy()),
    columns = ["original"]   
)

# Adding the date column from `data`
predict["date"] = data [-len(predict):]["date"]
original["date"] = data [-len(original):]["date"]


<h2>RNN Stock price prediction signal<h2\>

In [None]:
# Signal generation 
monitoring_ticker_list = ['1001']
kospi_data = data_loader.load_index_data(ticker_list=monitoring_ticker_list, freq ='d', delay =1) # Bring KOSPI data
kospi_data["predicted_close"] = testPredictPlot[-len(kospi_data):]
kospi_data["predicted_change_pct"] = kospi_data ["predicted_close"].pct_change().bfill()* 100
kospi_data["signal"] = kospi_data["predicted_change_pct"].apply(calculate_signal)

# Function to decide 'Decline' and'Rise'
def calculate_signal(indicator):
    if indicator >=0.6:
        return "rise"
    elif indicator <=-0.6:
        return "decline"
    else:
        return "neutral"

<h2>ETF rebalancing <h2\>

In [None]:
# Accepting 'monitoring_data' as an input variable containing signals,
# the portfolio is adjusted by fully investing in the KODEX ETF when the signal indicates an upward trend,
# partially investing in the KODEX ETF during a neutral trend, and fully investing in the KODEX Inverse ETF during a downward trend

def simulate_market_monitoring_etf(ohlcv_data: pd.Dataframe, monitoring_data: pd.Dataframe):

    # ohlcv_data is a dataFrame containing OHLCV (Open, High, Low, Close, Volume) data,
    # monitoring_data is a dataFrame containing signals (rise, neutral, decline) that indicate market direction

    # rebalance_date captures dates where the signal in monitoring_data changes by comparing each row with the previous row shift(1)
    # month_end holds the end-of-month dates (retrieved by get_month_end) for regular rebalancing
    account = Account(initial cash=100000000)
    broker = Broker()
    rebalance_date = monitoring_data[monitoring_data["signal"] != monitoring_data["signal"].shift(1)].index.tolist()
    month_end = get_month_end(kospi_data.index.min(), kospi_data.index.max())
    rebalance_date += month_end

    # daily trading
    for date, ohlcv in ohlcv_data.groupby(['date']):
        transactions = broker.process_order(dt=date, data=ohlcv, orders=account.orders)
        account.update_position(transactions=transactions)
        account.update_portfolio(dt=date, data=ohlcv)
        account.update_order()

        if date not in rebalance_date:
            continue
        print(date.date())
        
    # adjust portfolio based on signal 
        signal = monitoring_data.loc[date]['signal']
    # numbers are KOSPI ETF tickers
        if signal == "rise":
            weight = {'122630' :1, 
                      '069500' :0,
                      '252670' :0}
        if signal == "neutral":
            weight = {'122630' :0, 
                      '069500' :1,
                      '252670' :0}
        if signal == "decline":
            weight = {'122630' :0, 
                      '069500' :0,
                      '252670' :1}