In [3]:
from pathlib import Path
from typing import List, Union

import numpy as np
from gluonts.dataset.arrow import ArrowWriter


def convert_to_arrow(
    path: Union[str, Path],
    time_series: Union[List[np.ndarray], np.ndarray],
    compression: str = "lz4",
):
    """
    Store a given set of series into Arrow format at the specified path.

    Input data can be either a list of 1D numpy arrays, or a single 2D
    numpy array of shape (num_series, time_length).
    """
    assert isinstance(time_series, list) or (
        isinstance(time_series, np.ndarray) and
        time_series.ndim == 2
    )

    # Set an arbitrary start time
    start = np.datetime64("2000-01-01 00:00", "s")

    dataset = [
        {"start": start, "target": ts} for ts in time_series
    ]

    ArrowWriter(compression=compression).write_to_file(
        dataset,
        path=path,
    )


if __name__ == "__main__":
    # Generate 20 random time series of length 1024
    time_series = [np.random.randn(1024) for i in range(20)]
    print(time_series)

    # Convert to GluonTS arrow format
    convert_to_arrow("./arrow_data/noise-data.arrow", time_series=time_series)

[array([ 0.00471127, -0.87703357, -1.77814144, ...,  0.21711742,
        0.6824449 ,  0.79433402]), array([-0.88416185, -0.67742994, -0.57299668, ..., -0.13854467,
       -1.07855125, -0.84565664]), array([-2.3087271 , -0.46575622, -2.00959807, ...,  0.92804894,
        2.16451684, -0.4679564 ]), array([ 1.89416338, -0.78414736, -1.13666841, ..., -0.12561028,
        1.27326486,  0.11685418]), array([-0.59436787,  0.58941964,  0.23066557, ..., -0.34023957,
       -1.21866771,  0.25249599]), array([ 1.41991292, -1.945818  ,  0.32531899, ..., -1.43923915,
       -0.3392343 ,  0.26581192]), array([-0.26276713, -0.85839122, -0.87210831, ..., -1.07359215,
        0.08793539, -1.44327399]), array([-0.79269271,  0.36110837,  0.83297512, ..., -0.53730619,
        0.74786492, -0.27518795]), array([ 1.22323374, -0.03332825,  1.14575547, ..., -0.17670895,
       -1.00923528,  1.36739977]), array([-0.00491114,  1.503291  , -0.58151146, ...,  1.33589366,
        0.86223473, -0.34649358]), array([ 1

In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
from typing import Union
from gluonts.dataset.arrow import ArrowWriter

def convert_to_arrow(path: Union[str, Path], csv_path: str, compression: str = "lz4"):
    data = pd.read_csv(csv_path, parse_dates=['timestamp'])

    # Hitung jumlah total baris
    total_rows = len(data)

    # Hitung 70% dari total baris
    rows_to_take = int(0.7 * total_rows)

    # Ambil 70% pertama dari data
    data = data.head(rows_to_take)

    # Menampilkan hasil
    print(data)

    # time_series = data['close'].values.reshape(-1, 1)  # Reshape for a single time series

    # # Set an arbitrary start time from your data
    # start = data['timestamp'].iloc[0]

    dataset = [{"start": ts, "target": [close]} for ts, close in zip(data['timestamp'], data['close'])]

    # dataset = [{"start": start, "target": ts.flatten()} for ts in time_series]

    ArrowWriter(compression=compression).write_to_file(dataset, path=path)

if __name__ == "__main__":
    convert_to_arrow("./arrow_data/ANTM.arrow", "/home/yogi/chronos-research/dataset/LQ45-daily/ANTM.csv")


      timestamp  open  low  high  close     volume
0    2001-04-16   432  407   436    432          0
1    2001-04-17   432  407   436    432          0
2    2001-04-18   432  407   436    432          0
3    2001-04-19   432  407   436    432          0
4    2001-04-20   432  407   436    432          0
...         ...   ...  ...   ...    ...        ...
3963 2016-06-23   730  730   745    730   39292500
3964 2016-06-24   730  705   750    730  190710000
3965 2016-06-27   730  730   745    735   70339400
3966 2016-06-28   740  720   745    720   58690300
3967 2016-06-29   720  720   740    720  105793700

[3968 rows x 6 columns]


In [8]:
# *****USE THIS*****

from pathlib import Path
from typing import List, Union

import numpy as np
import pandas as pd
from gluonts.dataset.arrow import ArrowWriter

def convert_to_arrow(path: Union[str, Path], csv_path: str, compression: str = "lz4"):
    """
    Store a given set of series into Arrow format at the specified path.

    Input data can be a CSV file with a 'close' column.
    """
    # Baca file CSV
    data = pd.read_csv(csv_path, parse_dates=['timestamp'])

    # Hitung jumlah total baris
    total_rows = len(data)

    # Hitung 70% dari total baris
    rows_to_take = int(0.7 * total_rows)

    # Ambil 70% pertama dari data
    data = data.head(rows_to_take)

    data['timestamp'] = pd.to_datetime(data['timestamp'])
    data = data.sort_values(by='timestamp')
    
    # Ambil array dari kolom 'close'
    time_series = data['close'].to_numpy()
    
    # Set an arbitrary start time
    start = np.datetime64(data['timestamp'].iloc[0], "s")

    # Buat dataset yang terdiri dari satu seri waktu dengan satu start time
    dataset = [{"start": start, "target": time_series}]

    # Tulis dataset ke file dengan format arrow menggunakan ArrowWriter
    ArrowWriter(compression=compression).write_to_file(dataset, path=path)

if __name__ == "__main__":
    # Sesuaikan path ke lokasi file CSV dan output yang diinginkan
    convert_to_arrow("/home/yogi/chronos-research/arrow_data/test.arrow", "/home/yogi/chronos-research/dataset/LQ45-daily/ANTM.csv")


In [22]:
# *****USE THIS*****

from pathlib import Path
from typing import List, Union

import numpy as np
import pandas as pd
from gluonts.dataset.arrow import ArrowWriter

def convert_to_arrow(csv_path: str):
    """
    Store a given set of series into Arrow format at the specified path.

    Input data can be a CSV file with a 'close' column.
    """
    # Baca file CSV
    data = pd.read_csv(csv_path, parse_dates=['timestamp'])

    # Hitung jumlah total baris
    total_rows = len(data)

    # Hitung 70% dari total baris
    rows_to_take = int(0.7 * total_rows)

    # Ambil 70% pertama dari data
    data = data.head(rows_to_take)

    data['timestamp'] = pd.to_datetime(data['timestamp'])
    data = data.sort_values(by='timestamp')
    
    # Ambil array dari kolom 'close'
    time_series = data['close'].to_numpy()
    
    # Set an arbitrary start time
    start = np.datetime64(data['timestamp'].iloc[0], "s")

    # Buat dataset yang terdiri dari satu seri waktu dengan satu start time
    dataset = {"start": start, "target": time_series}

    return dataset

if __name__ == "__main__":
    dataset_name=["ACES","AMRT","ASII","BBRI","BRIS","CPIN","GGRM","ICBP",
                  "INKP","ITMG","MDKA","PGAS","SMGR","TOWR","ADRO","ANTM",
                  "BBCA","BBTN","BRPT","ESSA","GOTO","INCO","INTP","KLBF",
                  "MEDC","PTBA","SRTG","UNTR","AKRA","ARTO","BBNI","BMRI",
                  "BUKA","EXCL","HRUM","INDF","ISAT","MAPI","MTEL","SIDO",
                  "TLKM","UNVR"]

    dataset =[]

    for ds in dataset_name:
        # Sesuaikan path ke lokasi file CSV dan output yang diinginkan
        dataset.append(convert_to_arrow(f"/home/yogi/chronos-research/dataset/LQ45-daily/{ds}.csv"))

    # Tulis dataset ke file dengan format arrow menggunakan ArrowWriter
    ArrowWriter(compression="lz4").write_to_file(dataset, path="/home/yogi/chronos-research/arrow_data/training.arrow")


In [18]:
import pyarrow as pa

def read_arrow_file(file_path):
    with pa.OSFile(file_path, 'rb') as f:
        reader = pa.ipc.open_file(f)
        table = reader.read_all()
    print(table.to_pandas())

if __name__ == "__main__":
    # read_arrow_file("./arrow_data/noise-data.arrow")
    read_arrow_file("/home/yogi/chronos-research/arrow_data/noise-data.arrow")
    read_arrow_file("/home/yogi/chronos-research/arrow_data/training.arrow")


        start                                             target
0  2000-01-01  [0.004711269550239207, -0.8770335680325148, -1...
1  2000-01-01  [-0.8841618518925635, -0.6774299429829096, -0....
2  2000-01-01  [-2.3087271033101517, -0.4657562171381893, -2....
3  2000-01-01  [1.8941633827896915, -0.7841473550389715, -1.1...
4  2000-01-01  [-0.5943678675791243, 0.5894196369908294, 0.23...
5  2000-01-01  [1.4199129205538275, -1.9458179988061894, 0.32...
6  2000-01-01  [-0.2627671290535644, -0.8583912247861986, -0....
7  2000-01-01  [-0.7926927119098559, 0.3611083673682498, 0.83...
8  2000-01-01  [1.2232337375981264, -0.033328254807924146, 1....
9  2000-01-01  [-0.0049111366678351994, 1.5032909958332126, -...
10 2000-01-01  [1.0057099719862919, -1.05383766194054, 1.5192...
11 2000-01-01  [0.4267434250158014, -1.1357304273660396, 0.86...
12 2000-01-01  [1.4965456609573944, 1.5644004783960737, 0.231...
13 2000-01-01  [0.5558481200728136, -0.5371069049176778, 1.00...
14 2000-01-01  [0.0241066