# imports

In [None]:
import sys
import os
from pathlib import Path

# Get the project root - works in both scripts and notebooks
project_root = Path(os.getcwd())

# Add both project root and Pattern directory to path
sys.path.extend([
    str(project_root)
])

# Now import modules
from Data.Database.db import Database

# Rest of your imports
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
from sklearn.svm import SVC

db = Database(db_name="Data/data.db")

Connected to database: Data/data.db
Using database at: c:\Users\yoonus\Documents\GitHub\Stock_AI_Predictor\Data\data.db


In [87]:
# train the svm model with cluster classification , where the cluster lablel is the target
# get the clusters from the database
clusters = db.get_clusters(1)


clusters_svm = np.vstack(clusters['AVGPricePoints'])  
# create labels for the clusters
labels = np.array([i for i in range(len(clusters_svm))])
# create the SVM model
svm = SVC(kernel='rbf')
# fit the model
svm.fit(clusters_svm, labels)

In [88]:
clusters['AVGPricePoints'][0]

[0.6838937824384881,
 0.32318208969578505,
 0.010419505686378954,
 0.42394994411374126,
 0.9689635719071976]

# Prediction and Evaluation

In [90]:

# Function to predict stock prices based on patterns
def predict_stock_prices(data, lookback, hold_period):
    """
    Predicts stock prices based on the patterns identified by the pip_pattern_miner.
    
    Args:
        pip_miner (Pattern_Miner): The trained pattern miner object.
        data (pd.DataFrame): The stock data (must include 'Close' column).
        lookback (int): The lookback period for patterns.
        hold_period (int): The holding period after a pattern is identified.
    
    Returns:
        predictions (list): Predicted prices for each pattern.
        actual_prices (list): Actual prices corresponding to each prediction.
        pattern_ids (list): IDs of the patterns used for prediction.
        pattern_labels (list): Labels (Bullish, Bearish, Neutral) for each pattern.
    """
    windows = []
    predictions = []
    actual_prices = []
    pattern_ids = []
    pattern_labels = []
    current_prices = []
    returns = []
    
    # Parameters
    lookback = 24  # Your pattern window size
    hold_period = 6  # Your holding period
    n_samples = 1000  # Number of random windows to test
    data_length = len(data)

    # Generate random start indices ensuring enough data for lookback+hold
    valid_range = range(lookback - 1, data_length - hold_period)
    random_indices = np.random.choice(valid_range, size=min(n_samples, len(valid_range)), replace=False)

    # Sort the indices to maintain temporal order (optional)
    random_indices.sort()

    for i in random_indices:
        # Extract the window of data for pattern detection
        window = data['ClosePrice'].iloc[i - lookback + 1: i + 1].to_numpy()
        # get the start and end of the window indix like 2:4
        start_index = i - lookback + 1
        end_index = i + 1
        # store in text like 2:4
        windows.append(str(start_index) + ":" + str(end_index))
   
        # Find the pips (patterns) in the window
        pips_x, pips_y = db.pip_pattern_miner.find_pips(window, 5, 3)
        
        # reshape the pips
        pips_y = np.array(pips_y)
        
        
        # normalize the pips
        pips_y = scaler.fit_transform(pips_y.reshape(-1,1)).flatten()
        
        # get the current price
        current_price = window[-1]
        current_prices.append(current_price)
        
        # predict using the SVM model
        patter_prediction = svm.predict(pips_y.reshape(1, -1))
        
        # get the return of the pattern
        prediction_return = clusters.iloc[patter_prediction].Outcome
        returns.append(prediction_return)
        
        predicted_price = current_price + (prediction_return * current_price)
        
        predicted_price = predicted_price.iloc[-1]  # Get the last value
        
        
        # Append the predicted price (current price + predicted return)
        predictions.append(predicted_price)
        
        # Append the actual price after the hold period
        actual_price = data['ClosePrice'].iloc[i + hold_period]
        actual_prices.append(actual_price)
        
        # Append pattern ID and label
        pattern_ids.append(i)
        pattern_labels.append(clusters.iloc[patter_prediction].Label)
           
    return windows,current_prices,predictions, actual_prices, pattern_ids, pattern_labels , returns

# Function to evaluate the model
def evaluate_model(predictions, actual_prices , current_prices):
    """
    Evaluates the model using R-squared, MAE, RMSE, and Accuracy metrics.
    
    Args:
        predictions (list): Predicted prices.
        actual_prices (list): Actual prices.
    
    Returns:
        r2 (float): R-squared score.
        mae (float): Mean Absolute Error.
        rmse (float): Root Mean Squared Error.
        accuracy (float): Percentage of correct directional predictions.
    """
    # Calculate R-squared, MAE, and RMSE
    r2 = r2_score(actual_prices, predictions)
    mae = mean_absolute_error(actual_prices, predictions)
    rmse = np.sqrt(mean_squared_error(actual_prices, predictions))
    
    # Calculate accuracy (percentage of correct directional predictions)
    correct_direction = 0
    for pred, actual,current in zip(predictions, actual_prices , current_prices):
        if (pred > 0 and actual > current) or (pred < 0 and actual < current):
            correct_direction += 1
    accuracy = (correct_direction / len(predictions)) * 100
    
    return r2, mae, rmse, accuracy

# Main execution for prediction and evaluation
def main_prediction_evaluation(data, lookback, hold_period):
    """
    Runs the prediction and evaluation pipeline.
    
    Args:
        pip_miner (Pattern_Miner): The trained pattern miner object.
        data (pd.DataFrame): The stock data (must include 'Close' column).
        lookback (int): The lookback period for patterns.
        hold_period (int): The holding period after a pattern is identified.
    """
    # Predict stock prices
    windows,current_prices,predictions, actual_prices, pattern_ids, pattern_labels , returns = predict_stock_prices( data, lookback, hold_period)
    
    # Evaluate the model
    r2, mae, rmse, accuracy = evaluate_model(predictions, actual_prices,current_prices)
    
    # Print evaluation metrics
    print(f"R-squared: {r2:.4f}")
    print(f"Mean Absolute Error (MAE): {mae:.4f}")
    print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
    print(f"Accuracy (Directional): {accuracy:.2f}%")
    
    # Before creating DataFrame, ensure pattern_labels is clean strings
    pattern_labels = [str(label).replace('\nName: Label, dtype: object', '').strip() 
                    for label in pattern_labels]
    # Display predictions and actual prices
    results = pd.DataFrame({
        'Window': windows,
         'Pattern ID': pattern_ids,
        'Pattern Label': pattern_labels,
        'Current Price': current_prices,
        #'Expected Return': returns,
        'Predicted Price': predictions,
        'Actual Price': actual_prices
    })
    
    print("\nPrediction Results:")
    print(results.head(100))  # Display the first 10 results

# Example usage
if __name__ == "__main__":
   # get the 1000 candles from the df
    data = db.get_stock_data(1)
    db.close()
    # Run prediction and evaluation
    main_prediction_evaluation( data, lookback=24, hold_period=6)

Closed connection to database: Data/data.db


ValueError: Input contains NaN.