In [None]:
def convert_to_arrow(csv_path: str):
    """
    Store a given set of series into Arrow format at the specified path.

    Input data can be a CSV file with a 'close' column.
    """
    # Baca file CSV
    data = pd.read_csv(csv_path, parse_dates=['timestamp'])

    # Hitung jumlah total baris
    total_rows = len(data)

    # Hitung 70% dari total baris
    rows_to_take = int(0.7 * total_rows)

    # Ambil 70% pertama dari data
    data = data.head(rows_to_take)

    data['timestamp'] = pd.to_datetime(data['timestamp'])
    data = data.sort_values(by='timestamp')
    
    # Ambil array dari kolom 'close'
    time_series = data['close'].to_numpy()
    
    # Set an arbitrary start time
    start = np.datetime64(data['timestamp'].iloc[0], "s")

    # Buat dataset yang terdiri dari satu seri waktu dengan satu start time
    dataset = {"start": start, "target": time_series}

    return dataset

# 42 LQ45
dataset_name=["ACES","AMRT","ASII","BBRI","BRIS","CPIN","GGRM","ICBP",
              "INKP","ITMG","MDKA","PGAS","SMGR","TOWR","ADRO","ANTM",
              "BBCA","BBTN","BRPT","ESSA","GOTO","INCO","INTP","KLBF",
              "MEDC","PTBA","SRTG","UNTR","AKRA","ARTO","BBNI","BMRI",
              "BUKA","EXCL","HRUM","INDF","ISAT","MAPI","MTEL","SIDO",
              "TLKM","UNVR"]

dataset_train_model =[]

count=0
for ds in dataset_name:
  # Sesuaikan path ke lokasi file CSV dan output yang diinginkan


  data = pd.read_csv(f"/content/drive/MyDrive/Dataset Skripsi/daily-lq45/{ds}.csv", parse_dates=['timestamp'])

  # Hitung jumlah total baris
  total_rows = len(data)

  print(ds,"= ",total_rows)
  count+=1

  dataset_train_model.append(convert_to_arrow(f"/content/drive/MyDrive/Dataset Skripsi/daily-lq45/{ds}.csv"))
count

In [None]:
import pandas as pd
import numpy as np

from gluonts.dataset.split import split
from gluonts.evaluation import Evaluator, make_evaluation_predictions
from gluonts.torch import TemporalFusionTransformerEstimator
from gluonts.dataset.common import ListDataset


def load_and_process_data(
    filename,
    date_column_name,
    index_timezone="America/New_York",
    fillna_method="ffill",
):
    # Load the data
    df = pd.read_csv(filename)

    # Ensure the Date column is a datetime object
    df[date_column_name] = pd.to_datetime(df[date_column_name], utc=True)

    # Convert the timezone of the Date column to the specified timezone
    df[date_column_name] = df[date_column_name].dt.tz_convert(index_timezone)

    # Set Date column as the index
    df.set_index(date_column_name, inplace=True)

    # If Adj Close column exists, keep only this column and drop others
    if "Adj Close" in df.columns:
        df = df[["Adj Close"]]
        # Rename 'Adj Close' to be more specific based on the filename
        new_column_name = filename.split("/")[-1].split("_")[0] + "_Adj_Close"
        df.rename(columns={"Adj Close": new_column_name}, inplace=True)

    # If fillna method is specified, fill the missing values
    if fillna_method:
        df.fillna(method=fillna_method, inplace=True)

    # Check if there are still NaN values and backfill if needed
    if df.isnull().sum().any():
        df.fillna(method="bfill", inplace=True)

    return df


df = load_and_process_data(
    "/content/drive/MyDrive/Dataset Skripsi/ANTM.csv", "timestamp"
)

# Create GluonTS datasets and split it
def split_data(df, prediction_length, windows=1):
    # Ensure the df index is datetime type
    df.index = pd.to_datetime(df.index)

    # Create GluonTS format dataset
    start_timestamp = pd.Period(df.index[0], "D")
    dataset = [
        {
            "start": start_timestamp,
            "target": df["close"].values.astype(np.float32),
        }
    ]

    # Split the data (!note the negative symbol in front of prediction length!)
    train_data, test_gen = split(dataset, offset=-prediction_length)

    # Generate test instances
    test_data = test_gen.generate_instances(prediction_length, windows=windows)

    return dataset, train_data, test_data


prediction_length = 64

# Create the estimator

estimator = TemporalFusionTransformerEstimator(
    freq = "D",
    context_length=512,
    prediction_length = prediction_length,
    num_heads= 4,  # Jumlah heads di self-attention layer, 4 adalah titik awal yang baik
    hidden_dim= 40,  # Ukuran hidden layer untuk menjaga kompleksitas tetap rendah
    lr= 0.001,  # Sama dengan learning rate pada Chronos
    patience= 10,  # Patience untuk scheduler; angka yang moderat
    trainer_kwargs={"max_epochs": 50}
)

# Train on training dataset
dataset, training_data, test_data = split_data(df, prediction_length)

# train model dataset
train_model_data = ListDataset(
    dataset_train_model,
    freq = "D"
)

model = estimator.train(train_model_data, num_workers=0)


# Make forecast
forecast_it, ts_it = make_evaluation_predictions(
    dataset=dataset, predictor=model
)

forecasts = list(forecast_it)
tss = list(ts_it)

# Calculate accuracy metrics
evaluator = Evaluator()
evaluator(tss, forecasts)
agg_metrics, item_metrics = evaluator(
    iter(tss), iter(forecasts), num_series=len(dataset)
)

In [None]:
item_metrics

In [None]:
from gluonts.dataset.split import split


from gluonts.ev.metrics import MASE, MeanWeightedSumQuantileLoss
from gluonts.model.evaluation import evaluate_forecasts

metrics_df = evaluate_forecasts(
    forecasts,
    test_data=test_data,
    metrics=[
        MASE(),
        MeanWeightedSumQuantileLoss(np.arange(0.1, 1.0, 0.1)),
    ],
)

metrics_df