In [13]:
from pathlib import Path
from typing import List, Union

import numpy as np
from gluonts.dataset.arrow import ArrowWriter


def convert_to_arrow(
    path: Union[str, Path],
    time_series: Union[List[np.ndarray], np.ndarray],
    compression: str = "lz4",
):
    """
    Store a given set of series into Arrow format at the specified path.

    Input data can be either a list of 1D numpy arrays, or a single 2D
    numpy array of shape (num_series, time_length).
    """
    assert isinstance(time_series, list) or (
        isinstance(time_series, np.ndarray) and
        time_series.ndim == 2
    )

    # Set an arbitrary start time
    start = np.datetime64("2000-01-01 00:00", "s")

    dataset = [
        {"start": start, "target": ts} for ts in time_series
    ]

    ArrowWriter(compression=compression).write_to_file(
        dataset,
        path=path,
    )


if __name__ == "__main__":
    # Generate 20 random time series of length 1024
    time_series = [np.random.randn(1024) for i in range(20)]
    print(time_series)

    # Convert to GluonTS arrow format
    convert_to_arrow("./noise-data.arrow", time_series=time_series)

[array([ 0.50165494,  1.7897512 , -0.5940724 , ..., -0.43886152,
        0.39709067, -0.57292336]), array([-1.58148946,  0.40143474,  0.21221903, ..., -0.18165908,
        1.24679008, -0.83689237]), array([-0.65975602,  1.3715957 ,  0.15030615, ..., -1.01335784,
       -0.29593055,  0.47608535]), array([ 0.91793419, -0.44559979,  0.93085933, ..., -0.37700094,
       -0.46427626, -1.66937125]), array([-0.80485825,  1.8633274 , -0.06572231, ..., -0.73983522,
       -0.90444912, -0.31907126]), array([ 1.48592183,  1.13282133, -2.42576376, ...,  0.80791638,
        1.75514259, -0.5436871 ]), array([ 0.65676413, -0.70270695,  1.05428413, ..., -0.09151376,
        0.46035718,  0.59671006]), array([-0.23259128,  0.76289045, -1.06860306, ...,  0.19952845,
       -0.47303101, -1.41628235]), array([ 0.83863678, -0.09669562, -0.81030927, ..., -0.14206192,
        0.58938698,  1.71047099]), array([0.64161921, 0.18479172, 0.82606914, ..., 0.35696378, 0.86301506,
       1.31781263]), array([ 0.51684

In [16]:
import pandas as pd
import numpy as np
from pathlib import Path
from typing import Union
from gluonts.dataset.arrow import ArrowWriter

def convert_to_arrow(path: Union[str, Path], csv_path: str, compression: str = "lz4"):
    data = pd.read_csv(csv_path, parse_dates=['timestamp'])
    time_series = data['close'].values.reshape(-1, 1)  # Reshape for a single time series
    print(time_series)

    # Set an arbitrary start time from your data
    start = data['timestamp'].iloc[0]

    dataset = [{"start": ts, "target": [close]} for ts, close in zip(data['timestamp'], data['close'])]

    # dataset = [{"start": start, "target": ts.flatten()} for ts in time_series]

    ArrowWriter(compression=compression).write_to_file(dataset, path=path)

if __name__ == "__main__":
    convert_to_arrow("./ANTM.arrow", "/home/yogi/chronos-research/dataset/LQ45-daily/ANTM.csv")


[[ 432]
 [ 432]
 [ 432]
 ...
 [2050]
 [1965]
 [1985]]


In [17]:
import pyarrow as pa

def read_arrow_file(file_path):
    with pa.OSFile(file_path, 'rb') as f:
        reader = pa.ipc.open_file(f)
        table = reader.read_all()
    print(table.to_pandas())

if __name__ == "__main__":
    read_arrow_file("./noise-data.arrow")
    read_arrow_file("./ANTM.arrow")


        start                                             target
0  2000-01-01  [0.5016549431674596, 1.7897512035232783, -0.59...
1  2000-01-01  [-1.5814894608833907, 0.40143473982788325, 0.2...
2  2000-01-01  [-0.6597560204814046, 1.3715957049221443, 0.15...
3  2000-01-01  [0.917934185527543, -0.4455997878535175, 0.930...
4  2000-01-01  [-0.8048582474194529, 1.8633273983583762, -0.0...
5  2000-01-01  [1.4859218321583265, 1.1328213287603701, -2.42...
6  2000-01-01  [0.6567641291936093, -0.7027069457053794, 1.05...
7  2000-01-01  [-0.2325912849890312, 0.7628904538397839, -1.0...
8  2000-01-01  [0.8386367845709006, -0.09669562021500169, -0....
9  2000-01-01  [0.6416192056058148, 0.1847917158612974, 0.826...
10 2000-01-01  [0.5168421843488489, -0.287007004555241, 0.424...
11 2000-01-01  [0.17755608947758034, -1.6891597491197632, 0.2...
12 2000-01-01  [-0.33879894384776715, -0.7315882804757746, -0...
13 2000-01-01  [1.3499070902351704, 0.2721617004427162, -0.45...
14 2000-01-01  [-0.491399