In [1]:
from dotenv import load_dotenv
import os
import http.client
import json
import pandas as pd

class DataLoader:
    def __init__(self):
        load_dotenv()
        self.domain = os.getenv("DOMAIN")
        self.workspace_id = os.getenv("WORKSPACE_ID") 
        self.api_key = os.getenv("API_KEY")
        self.world_bank_dataset_id = os.getenv("WORLD_BANK_DATASET_ID")
        self.agro_gov_dataset_id = os.getenv("AGRO_GOV_DATASET_ID")
        self.headers = {"API_KEY": self.api_key, "Content-Type": "application/json"}

    def _make_request(self, dataset_id):
        conn = http.client.HTTPSConnection(self.domain)
        conn.request(
            "GET",
            f"/api/v1/workspaces/{self.workspace_id}/datasets/{dataset_id}/full",
            headers=self.headers
        )
        res = conn.getresponse()
        data = res.read()
        return json.loads(data.decode("utf-8"))

    def _flatten_data(self, rows):
        flattened_data = []
        for row in rows:
            cells = row["cells"]
            cells["row_id"] = row["row_id"]
            flattened_data.append(cells)
        return flattened_data

    def load_price_data(self):
        parsed_data = self._make_request(self.world_bank_dataset_id)
        flattened_data = self._flatten_data(parsed_data["data"])
        
        # Convert to DataFrame and clean
        price_df = pd.DataFrame(flattened_data)
        price_df = price_df.replace("…", pd.NA)
        price_df = price_df.apply(pd.to_numeric, errors="ignore")
        
        # Keep only Rice and Date columns
        price_df = price_df[["Rice, Viet Namese 5%", "Date"]]
        
        # Convert Date format from YYYYMM to YYYY-MM-DD
        price_df['Date'] = pd.to_datetime(price_df['Date'].astype(str).str.replace('M', '-'), format='%Y-%m') + pd.offsets.MonthBegin(0)
        
        # Sort by Date
        return price_df.sort_values('Date')

    def load_news_data(self):
        parsed_data = self._make_request(self.agro_gov_dataset_id)
        flattened_data = self._flatten_data(parsed_data["data"])
        
        # Convert to DataFrame and clean
        news_df = pd.DataFrame(flattened_data)
        news_df = news_df.replace("…", pd.NA)
        news_df = news_df.apply(pd.to_numeric, errors="ignore")
        
        # Convert DATE column to datetime
        news_df['DATE'] = pd.to_datetime(news_df['DATE'], format='%d | %m | %Y')
        
        # Sort by DATE
        return news_df.sort_values('DATE')

# Create loader instance and load data
loader = DataLoader()
price_df = loader.load_price_data()
news_df = loader.load_news_data()


  price_df = price_df.apply(pd.to_numeric, errors="ignore")
  news_df = news_df.apply(pd.to_numeric, errors="ignore")


# PRICE

In [2]:
price_df

Unnamed: 0,"Rice, Viet Namese 5%",Date
94,,1960-01-01
252,,1960-02-01
675,,1960-03-01
731,,1960-04-01
348,,1960-05-01
...,...,...
702,535.25,2024-08-01
557,540.46,2024-09-01
698,529.84,2024-10-01
160,509.69,2024-11-01


# NEWS

In [3]:
news_df

Unnamed: 0,URL,DATE,TITLE,CONTENT,row_id
3096,https://agro.gov.vn/vn/tID8329_Bung-no-vu-he-t...,2002-04-12,"""Bùng nổ"" vụ hè thu","Đổ xô làm lúa hè thuNgày 9/4, Bộ NN- PTNT tổ c...",3c9d0226-b06c-45f8-aacb-f60fa36d2527
3251,https://agro.gov.vn/vn/tID8332_Dieu-gi-giup-lu...,2002-04-13,"Điều gì giúp lúa ""cải tử hoàn sinh""?",Cứu được lúa... lùn Trên cánh đồng thôn Thanh ...,f6e00a8b-7327-4c4a-8a7e-73f8aa3032e2
2249,https://agro.gov.vn/vn/tID3465_Da-Nang-Khan-tr...,2007-06-07,"Đà Nẵng: Khẩn trương làm đất, kịp thời gieo sạ...",Do thời tiết diễn biến bất thường nên các đợt ...,ed81b512-9a6b-4545-9479-85458c95e2a8
1705,https://agro.gov.vn/vn/tID3482_Nguon-cung-giam...,2007-06-09,Nguồn cung giảm sút đẩy giá gạo châu Á tăng,"Tại Thái lan, nguồn cung thóc khan hiếm sau kh...",4878bced-b60f-4206-97ef-17fb8caa3bc5
3116,https://agro.gov.vn/vn/tID3492_Rice-exporters-...,2007-06-09,Rice exporters urged to end new contracts,"The official estimated that, by the year's hal...",e2a48974-63ce-4c58-b6b6-3e9e541a257d
...,...,...,...,...,...
367,https://agro.gov.vn/vn/tID33770_Viet-Nam-huong...,2024-11-12,Việt Nam hướng tới sản phẩm gạo đáp ứng đòi hỏ...,Nguồn: Vneconomy.vn\n\t\n\t\tThương vụ Việt Na...,f4d68233-7a6b-4d17-a1d9-58ae147bee36
310,https://agro.gov.vn/vn/tID33778_Xuat-khau-ky-l...,2024-11-21,"Xuất khẩu kỷ lục, Việt Nam vẫn nhập khẩu gạo n...",Nguồn: Dantri.com.vn\n\n\tTheo báo cáo mới nhấ...,40cff462-2a42-4fda-a1fc-defc4997ba8d
29,https://agro.gov.vn/vn/tID33791_Xuat-khau-gao-...,2024-12-03,"Xuất khẩu gạo lập kỷ lục mới, vượt mốc 5 tỷ USD",Nguồn: Vneconomy.vn\n\n\tTheo số liệu của Tổng...,dc72537b-2af5-415d-a269-80c3f1847d1e
92,https://agro.gov.vn/vn/tID33821_Viet-Nam-dung-...,2024-12-25,Việt Nam đứng thứ 3 thế giới về xuất khẩu gạo,Nguồn: Vtv.vn\n\n\tTheo báo cáo của Bộ Nông ng...,c75670fc-a094-474c-93b0-90d1bed52337


# STATISTICAL MODELS


In [4]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, RobustScaler, FunctionTransformer
from sklearn.metrics import mean_squared_error
from math import sqrt
import matplotlib.pyplot as plt
import ray
from statsforecast import StatsForecast
from statsforecast.models import AutoARIMA, AutoETS, AutoTheta, AutoCES, AutoTBATS
import random

random.seed(42)

In [5]:
# Create stats_df from price_df and add unique_id column
stats_df = price_df.copy()
stats_df = stats_df.rename(columns={'Rice, Viet Namese 5%': 'y', 'Date': 'ds'})
stats_df['unique_id'] = 'stats'
stats_df = stats_df[['ds', 'y', 'unique_id']]  # Reorder columns
stats_df = stats_df.reset_index(drop=True)  # Reset index to increase incrementally

# Handle missing values by forward filling and then backward filling
stats_df['y'] = stats_df['y'].fillna(method='ffill').fillna(method='bfill')

stats_df

  stats_df['y'] = stats_df['y'].fillna(method='ffill').fillna(method='bfill')


Unnamed: 0,ds,y,unique_id
0,1960-01-01,197.00,stats
1,1960-02-01,197.00,stats
2,1960-03-01,197.00,stats
3,1960-04-01,197.00,stats
4,1960-05-01,197.00,stats
...,...,...,...
775,2024-08-01,535.25,stats
776,2024-09-01,540.46,stats
777,2024-10-01,529.84,stats
778,2024-11-01,509.69,stats


In [6]:


# Number of periods to forecast ahead
forecast_horizon = 6
# Size of each rolling window step
step_size = 1
# Total number of rolling windows for cross-validation
n_windows = 36

# Initialize Ray for parallel processing
ray.init(ignore_reinit_error=True)

# Define the models and forecaster
season_length = 12  # annual seasonality for monthly data
# List of statistical forecasting models with seasonal components
models = [
    AutoARIMA(season_length=season_length),  # Automated ARIMA model selection
    AutoETS(season_length=season_length),    # Automated Exponential Smoothing
    AutoTheta(season_length=season_length),  # Automated Theta method
    AutoCES(season_length=season_length)     # Automated Complex Exponential Smoothing
]

# Create StatsForecast object with parallel processing
def get_stats_forecaster():
    """
    Creates and returns a StatsForecast object with the defined models.
    
    Returns:
        StatsForecast: Configured forecaster with parallel processing enabled
    """
    return StatsForecast(models=models, freq='M', n_jobs=-1)

def prepare_data(df, use_scaler=False):
    """
    Prepares data for forecasting by handling data types and optional scaling.
    
    Args:
        df (pd.DataFrame): Input dataframe with time series data
        use_scaler (bool): Whether to apply MinMax scaling to the target variable
        
    Returns:
        tuple: (Prepared DataFrame, Fitted scaler or None if scaling not used)
    """
    # Ensure 'y' column is numeric
    df = df.copy()
    df['y'] = pd.to_numeric(df['y'], errors='coerce')
    
    # Handle any remaining missing values
    df['y'] = df['y'].fillna(method='ffill').fillna(method='bfill')
    
    # Apply MinMax scaling if requested
    scaler = None
    if use_scaler:
        scaler = MinMaxScaler()
        df['y'] = scaler.fit_transform(df[['y']])
    
    # Split the data into train and test sets based on rolling window parameters
    train_size = len(df) - n_windows * step_size
    train_df = df[:train_size]
    test_df = df[train_size:]
    
    return pd.concat([train_df, test_df]), scaler

def calculate_metrics(actual, predicted):
    """
    Calculates multiple performance metrics for forecast evaluation.
    
    Args:
        actual (array-like): True values
        predicted (array-like): Predicted values
        
    Returns:
        tuple: (RMSE, Directional Accuracy, Turning Point Accuracy)
    """
    actual = np.asarray(actual).flatten()
    predicted = np.asarray(predicted).flatten()

    # Root Mean Square Error
    rmse = sqrt(mean_squared_error(actual, predicted))
    
    # Directional Accuracy - measures correct prediction of up/down movements
    actual_diff = np.diff(actual)
    pred_diff = np.diff(predicted)
    directional_accuracy = np.mean((actual_diff * pred_diff) > 0)
    
    # Turning Point Accuracy - measures correct prediction of trend changes
    actual_turns = (actual_diff[:-1] * actual_diff[1:]) < 0
    pred_turns = (pred_diff[:-1] * pred_diff[1:]) < 0
    turning_point_accuracy = np.mean(actual_turns == pred_turns)
    
    return rmse, directional_accuracy, turning_point_accuracy

@ray.remote
def run_experiment(df, model_names, use_scaler=False):
    """
    Runs forecasting experiment with cross-validation for multiple models.
    
    Args:
        df (pd.DataFrame): Input dataframe with time series data
        model_names (list): List of model names to evaluate
        use_scaler (bool): Whether to apply MinMax scaling
        
    Returns:
        tuple: (Results DataFrame with metrics, Cross-validation DataFrame with predictions)
    """
    # Initialize forecaster and prepare data
    stats_forecaster = get_stats_forecaster()
    prepared_df, scaler = prepare_data(df, use_scaler)
    
    # Prepare for cross-validation
    cv_df = prepared_df[['ds', 'y', 'unique_id']].copy()
    cv_df['y'] = cv_df['y'].astype(float)

    # Perform rolling window cross-validation
    crossvalidation_df = stats_forecaster.cross_validation(
        df=cv_df,
        h=forecast_horizon,
        step_size=step_size,
        n_windows=n_windows
    )

    # Inverse transform predictions if scaling was applied
    if scaler:
        crossvalidation_df['y'] = scaler.inverse_transform(crossvalidation_df[['y']])
        for model in model_names:
            if model in crossvalidation_df.columns:
                crossvalidation_df[model] = scaler.inverse_transform(crossvalidation_df[[model]])

    # Calculate performance metrics for each model
    results = []
    for model in model_names:
        if model in crossvalidation_df.columns:
            rmse, dir_acc, turn_acc = calculate_metrics(
                crossvalidation_df['y'].values,
                crossvalidation_df[model].values
            )
            
            # Calculate weighted score (equal weights for all metrics)
            weighted_score = (rmse + (1 - dir_acc) + (1 - turn_acc)) / 3
            
            results.append({
                'Model': model,
                'RMSE': rmse,
                'Directional_Accuracy': dir_acc,
                'Turning_Point_Accuracy': turn_acc,
                'Weighted_Score': weighted_score
            })
    
    return pd.DataFrame(results), crossvalidation_df

# Define statistical models to evaluate
model_names = ['AutoARIMA', 'AutoETS', 'AutoTheta', 'CES']

# Run parallel experiments with and without data scaling
experiment_ref_no_scale = run_experiment.remote(stats_df, model_names, use_scaler=False)
experiment_ref_with_scale = run_experiment.remote(stats_df, model_names, use_scaler=True)

# Collect results from parallel processes
results_df_no_scale, crossvalidation_df_no_scale = ray.get(experiment_ref_no_scale)
results_df_with_scale, crossvalidation_df_with_scale = ray.get(experiment_ref_with_scale)

# Display performance metrics for both scaling approaches
print("\nModel Performance Metrics (No Scaling):")
print(results_df_no_scale.to_string(index=False))
print("\nModel Performance Metrics (With MinMax Scaling):")
print(results_df_with_scale.to_string(index=False))

# Identify best performing models based on weighted score
best_model_no_scale = results_df_no_scale.loc[results_df_no_scale['Weighted_Score'].idxmin(), 'Model']
best_model_with_scale = results_df_with_scale.loc[results_df_with_scale['Weighted_Score'].idxmin(), 'Model']
print(f"\nBest Model (No Scaling): {best_model_no_scale}")
print(f"Best Model (With MinMax Scaling): {best_model_with_scale}")

# Clean up Ray resources
ray.shutdown()


2025-01-08 22:29:45,284	INFO worker.py:1781 -- Started a local Ray instance.



Model Performance Metrics (No Scaling):
    Model      RMSE  Directional_Accuracy  Turning_Point_Accuracy  Weighted_Score
AutoARIMA 59.881187              0.469767                0.677570       20.244617
  AutoETS 49.253225              0.516279                0.658879       16.692689
AutoTheta 48.181519              0.604651                0.612150       16.321573
      CES 51.997582              0.493023                0.593458       17.637034

Model Performance Metrics (With MinMax Scaling):
    Model      RMSE  Directional_Accuracy  Turning_Point_Accuracy  Weighted_Score
AutoARIMA 58.892318              0.506977                0.691589       19.897918
  AutoETS 48.507817              0.088372                0.565421       16.618008
AutoTheta 48.295634              0.632558                0.640187       16.340963
      CES 55.249885              0.493023                0.658879       18.699328

Best Model (No Scaling): AutoTheta
Best Model (With MinMax Scaling): AutoTheta


# ML MODELS

In [7]:
# Standard libraries
import random
import multiprocessing
from math import sqrt

# Data manipulation
import numpy as np
import pandas as pd

# Machine learning
from sklearn.metrics import mean_squared_error
import optuna

# MLForecast
from mlforecast import MLForecast
from mlforecast.auto import (
    AutoMLForecast,
    AutoElasticNet, 
    AutoXGBoost,
    AutoLightGBM,
    AutoCatboost
)
from mlforecast.target_transforms import LocalStandardScaler
from mlforecast.lag_transforms import ExponentiallyWeightedMean, RollingMean

# Visualization
import matplotlib.pyplot as plt
from utilsforecast.plotting import plot_series

# Core forecasting utilities
from coreforecast.scalers import LocalStandardScaler, LocalMinMaxScaler
from coreforecast.grouped_array import GroupedArray

# Set up multiprocessing and seeds
print(multiprocessing.cpu_count())

def set_seeds(seed=42):
    np.random.seed(seed)
    random.seed(seed)
    optuna.logging.set_verbosity(optuna.logging.WARNING)

set_seeds()
def catboost_model_params(trial: optuna.Trial):
    return {
        'subsample': trial.suggest_float('subsample', 0.5, 1.0)
    }

def calculate_metrics(actual, predicted):
    """Calculate multiple performance metrics for forecasting evaluation.
    
    Args:
        actual (array-like): The actual/true values
        predicted (array-like): The predicted/forecasted values
        
    Returns:
        tuple: A tuple containing:
            - rmse (float): Root Mean Square Error
            - directional_accuracy (float): Proportion of correctly predicted directions (0-1)
            - turning_point_accuracy (float): Proportion of correctly predicted turning points (0-1)
            - weighted_score (float): Combined score weighing all three metrics equally
    """
    # Convert inputs to numpy arrays and flatten
    actual = np.asarray(actual).flatten()
    predicted = np.asarray(predicted).flatten()

    # Calculate RMSE
    rmse = sqrt(mean_squared_error(actual, predicted))
    
    # Calculate directional accuracy (proportion of correctly predicted up/down movements)
    actual_diff = np.diff(actual)
    pred_diff = np.diff(predicted)
    directional_accuracy = np.mean((actual_diff * pred_diff) > 0)
    
    # Calculate turning point accuracy (proportion of correctly predicted trend changes)
    actual_turns = (actual_diff[:-1] * actual_diff[1:]) < 0  # True when direction changes
    pred_turns = (pred_diff[:-1] * pred_diff[1:]) < 0
    turning_point_accuracy = np.mean(actual_turns == pred_turns)
    
    # Calculate weighted score - lower is better
    # Combines RMSE with penalties for poor directional and turning point accuracy
    weighted_score = (rmse + (1 - directional_accuracy) + (1 - turning_point_accuracy)) / 3
    
    return rmse, directional_accuracy, turning_point_accuracy, weighted_score


def run_forecasting_pipeline(stats_df, horizon=6, step_size=1, n_windows=36):
    """Run an automated machine learning forecasting pipeline with multiple models.
    
    This function implements a complete forecasting workflow including:
    - Train/test splitting
    - Data preprocessing and scaling
    - Model training with cross-validation
    - Prediction generation
    - Performance evaluation and visualization
    
    Args:
        stats_df (pd.DataFrame): Input dataframe containing target variable 'y',
            datetime column 'ds', ID column 'unique_id' and optional macro features
        horizon (int, optional): Number of future periods to forecast. Defaults to 3.
        step_size (int, optional): Number of periods between cross-validation windows. Defaults to 3.
        n_windows (int, optional): Number of cross-validation windows. Defaults to 16.
            
    Returns:
        tuple: A tuple containing:
            - auto_mlf (AutoMLForecast): The fitted forecasting model
            - predictions (pd.DataFrame): Future predictions
            - cv_results (dict): Cross-validation results for each model
            - metrics_df (pd.DataFrame): Performance metrics comparison
    """
    # Split data into train and test sets
    # Test set size is determined by number of windows * step size
    train_size = len(stats_df) - n_windows * step_size
    train_df = stats_df[:train_size].copy()
    test_df = stats_df[train_size:].copy()  

    # Basic preprocessing - fill missing values with 0
    processed_df = stats_df.copy()
    processed_df.fillna(0)

    # Identify any exogenous (macro) features by excluding standard columns
    macro_features = processed_df.columns.difference(['unique_id', 'ds', 'y'])
    has_exog = len(macro_features) > 0

    # Scale macro features if present using local min-max scaling
    if has_exog:
        scaler = LocalMinMaxScaler()
        
        # First scale training data
        for feature in macro_features:
            train_values = train_df[feature].values
            indptr = np.array([0, len(train_values)])
            grouped_train = GroupedArray(train_values, indptr)
            scaled_train_values = scaler.fit_transform(grouped_train)
            train_df[feature] = scaled_train_values

        # Then scale full dataset using fitted scaler
        for feature in macro_features:
            full_values = processed_df[feature].values
            indptr = np.array([0, len(full_values)])
            grouped_full = GroupedArray(full_values, indptr)
            scaled_full_values = scaler.transform(grouped_full)
            processed_df[feature] = scaled_full_values

    # Initialize dictionary of models to evaluate
    models = {
        'elasticnet': AutoElasticNet(),  # Linear model with L1/L2 regularization
        'xgboost': AutoXGBoost(),        # Gradient boosting with trees
        'lightgbm': AutoLightGBM(),      # Light gradient boosting
        'catboost': AutoCatboost(config = catboost_model_params)  # Categorical boosting
    }

    # Configure automated ML forecasting framework
    auto_mlf = AutoMLForecast(
        models=models,
        freq='MS',  # Monthly frequency
        season_length=12,  # Annual seasonality
        fit_config=lambda trial: {
            'static_features': [],
            'dropna': True,
            'keep_last_n': None
        },
        num_threads=12  # Parallel processing
    )

    # Fit models with cross-validation
    print("Performing optimization and cross-validation...")
    auto_mlf.fit(
        train_df,
        n_windows=6,
        h=6,
        num_samples=100,
        step_size=1
    )

    # Generate future prediction dataframe
    print("\nGenerating predictions...")
    any_model = next(iter(auto_mlf.models_.values()))
    future_df = any_model.make_future_dataframe(h=horizon)
    
    # Handle future macro features if present
    if has_exog:
        # Get last known values for each series
        last_dates = stats_df.groupby('unique_id')['ds'].max()
        future_values = []
        
        # Create future macro data using last known values
        for idx, row in future_df.iterrows():
            uid = row['unique_id']
            last_known_values = stats_df[stats_df['unique_id'] == uid].loc[
                stats_df['ds'] == last_dates[uid], 
                macro_features
            ].iloc[0]
            
            future_values.append({
                'unique_id': uid,
                'ds': row['ds'],
                **last_known_values
            })
        
        # Scale future macro features
        future_macro_df = pd.DataFrame(future_values)
        for feature in macro_features:
            future_values = future_macro_df[feature].values
            indptr = np.array([0, len(future_values)])
            grouped_future = GroupedArray(future_values, indptr)
            scaled_future_values = scaler.transform(grouped_future)
            future_macro_df[feature] = scaled_future_values
        
        # Generate predictions with exogenous features
        predictions = auto_mlf.predict(horizon, X_df=future_macro_df)
    else:
        # Generate predictions without exogenous features
        predictions = auto_mlf.predict(horizon)

    # Evaluate models using cross-validation
    cv_results = {}
    metrics = {}

    # Loop through each model for evaluation
    for model_name, model in auto_mlf.models_.items():
        # Perform cross-validation on last 48 periods
        cv_df = model.cross_validation(
            df=processed_df,
            n_windows=36,
            h=6,
            step_size=1,
            static_features=[],
            dropna=True,
        )
        cv_results[model_name] = cv_df
        actual = cv_df['y']
        predicted = cv_df[model_name]
        
        # Calculate performance metrics
        rmse, dir_acc, turn_acc, weighted_score = calculate_metrics(actual, predicted)
        metrics[model_name] = {
            'RMSE': rmse,
            'Directional Accuracy': dir_acc,
            'Turning Point Accuracy': turn_acc,
            'Weighted Score': weighted_score
        }

        # Create evaluation plots
        plt.figure(figsize=(15, 6))
        print(f"\nMetrics for {model_name}:")
        print(f"RMSE: {rmse:.4f}")
        print(f"Directional Accuracy: {dir_acc:.4f}")
        print(f"Turning Point Accuracy: {turn_acc:.4f}")
        print(f"Weighted Score: {weighted_score:.4f}")
        
        # Print value ranges for validation
        print(f"\nValue ranges for {model_name}:")
        print("Original data range:", stats_df['y'].min(), "-", stats_df['y'].max())
        print("Predicted data range:", cv_df[model_name].min(), "-", cv_df[model_name].max())
        print("Time range:", cv_df['ds'].min(), "-", cv_df['ds'].max())
    # Create comparison metrics dataframe
    metrics_df = pd.DataFrame(metrics).round(4)
    print("\nModel Comparison Metrics:")
    print(metrics_df)

    # Identify best performing model based on weighted score
    best_model = min(metrics.items(), key=lambda x: x[1]['Weighted Score'])
    print(f"\nBest Model: {best_model[0]} (Weighted Score: {best_model[1]['Weighted Score']:.4f})")

    return auto_mlf, predictions, cv_results, metrics_df

# Run the forecasting pipeline
auto_mlf, predictions, cv_results, metrics_df = run_forecasting_pipeline(stats_df, horizon=6, step_size=1)


12
Performing optimization and cross-validation...


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
Parameters: { "bagging_freq", "min_data_in_leaf" } are not used.

Parameters: { "bagging_freq", "min_data_in_leaf" } are not used.

Parameters: { "bagging_freq", "min_data_in_leaf" } are not used.

Parameters: { "bagging_freq", "min_data_in_leaf" } are not used.

Parameters: { "bagging_freq", "min_data_in_leaf" } are not used.

Parameters: { "bagging_freq", "min_data_in_leaf" } are not used.

Parameters: { "bagging_freq", "min_data_in_leaf" } are not used.

Parameters: { "bagging_freq", "min_data_in_leaf" } are not used.

Parameters: { "bagging_freq", "min_data_in_leaf" } are not used.

Parameters: { "bagging_freq", "min_data_in_leaf" } are not used.

Parameters: { "bagging_freq", "min_data_in_leaf" } are not used.

Parameters: { "bagging_freq", "min_data_in_leaf" } are not 

Learning rate set to 0.03862
0:	learn: 0.9721932	total: 59.7ms	remaining: 59.7s
1:	learn: 0.9396796	total: 60.1ms	remaining: 30s
2:	learn: 0.9093796	total: 60.6ms	remaining: 20.1s
3:	learn: 0.8787695	total: 61ms	remaining: 15.2s
4:	learn: 0.8485333	total: 61.1ms	remaining: 12.2s
5:	learn: 0.8197873	total: 61.5ms	remaining: 10.2s
6:	learn: 0.7947662	total: 62ms	remaining: 8.8s
7:	learn: 0.7718872	total: 62.5ms	remaining: 7.75s
8:	learn: 0.7481623	total: 62.9ms	remaining: 6.93s
9:	learn: 0.7246240	total: 63.3ms	remaining: 6.26s
10:	learn: 0.7017607	total: 63.7ms	remaining: 5.72s
11:	learn: 0.6783532	total: 64ms	remaining: 5.27s
12:	learn: 0.6560367	total: 64.4ms	remaining: 4.89s
13:	learn: 0.6351648	total: 64.8ms	remaining: 4.56s
14:	learn: 0.6143727	total: 65.2ms	remaining: 4.28s
15:	learn: 0.5955690	total: 65.5ms	remaining: 4.03s
16:	learn: 0.5769119	total: 66ms	remaining: 3.81s
17:	learn: 0.5577211	total: 66.2ms	remaining: 3.61s
18:	learn: 0.5392594	total: 66.7ms	remaining: 3.44s
19:	

Parameters: { "bagging_freq", "min_data_in_leaf" } are not used.

Parameters: { "bagging_freq", "min_data_in_leaf" } are not used.

Parameters: { "bagging_freq", "min_data_in_leaf" } are not used.

Parameters: { "bagging_freq", "min_data_in_leaf" } are not used.

Parameters: { "bagging_freq", "min_data_in_leaf" } are not used.

Parameters: { "bagging_freq", "min_data_in_leaf" } are not used.

Parameters: { "bagging_freq", "min_data_in_leaf" } are not used.

Parameters: { "bagging_freq", "min_data_in_leaf" } are not used.

Parameters: { "bagging_freq", "min_data_in_leaf" } are not used.

Parameters: { "bagging_freq", "min_data_in_leaf" } are not used.

Parameters: { "bagging_freq", "min_data_in_leaf" } are not used.

Parameters: { "bagging_freq", "min_data_in_leaf" } are not used.

Parameters: { "bagging_freq", "min_data_in_leaf" } are not used.

Parameters: { "bagging_freq", "min_data_in_leaf" } are not used.

Parameters: { "bagging_freq", "min_data_in_leaf" } are not used.

Parameters


Metrics for xgboost:
RMSE: 66.6464
Directional Accuracy: 0.3628
Turning Point Accuracy: 0.5467
Weighted Score: 22.5789

Value ranges for xgboost:
Original data range: 197.0 - 780.0
Predicted data range: 374.63022040099855 - 569.1019176523782
Time range: 2021-08-01 00:00:00 - 2024-12-01 00:00:00

Metrics for lightgbm:
RMSE: 74.3346
Directional Accuracy: 0.4279
Turning Point Accuracy: 0.5140
Weighted Score: 25.1309

Value ranges for lightgbm:
Original data range: 197.0 - 780.0
Predicted data range: 383.8974135886402 - 514.3409544317992
Time range: 2021-08-01 00:00:00 - 2024-12-01 00:00:00
Learning rate set to 0.038932
0:	learn: 0.9725194	total: 729us	remaining: 728ms
1:	learn: 0.9391099	total: 1.21ms	remaining: 606ms
2:	learn: 0.9066855	total: 1.73ms	remaining: 574ms
3:	learn: 0.8747816	total: 2.22ms	remaining: 552ms
4:	learn: 0.8448603	total: 3.47ms	remaining: 690ms
5:	learn: 0.8161075	total: 4.05ms	remaining: 671ms
6:	learn: 0.7879153	total: 4.5ms	remaining: 639ms
7:	learn: 0.7610949	

<Figure size 1500x600 with 0 Axes>

<Figure size 1500x600 with 0 Axes>

<Figure size 1500x600 with 0 Axes>

<Figure size 1500x600 with 0 Axes>