In [None]:
from pathlib import Path
from typing import List, Union

import numpy as np
from gluonts.dataset.arrow import ArrowWriter

def convert_to_arrow(
    path: Union[str, Path],
    time_series: Union[List[np.ndarray], np.ndarray],
    compression: str = "lz4",
):
    """
    Store a given set of series into Arrow format at the specified path.

    Input data can be either a list of 1D numpy arrays, or a single 2D
    numpy array of shape (num_series, time_length).
    """
    assert isinstance(time_series, list) or (
        isinstance(time_series, np.ndarray) and
        time_series.ndim == 2
    )

    # Set an arbitrary start time
    start = np.datetime64("2000-01-01 00:00", "s")

    dataset = [
        {"start": start, "target": ts} for ts in time_series
    ]

    ArrowWriter(compression=compression).write_to_file(
        dataset,
        path=path,
    )


if __name__ == "__main__":
    # Generate 20 random time series of length 1024
    time_series = [np.random.randn(1024) for i in range(20)]
    print(time_series)

    # Convert to GluonTS arrow format
    convert_to_arrow("./arrow_data/noise-data.arrow", time_series=time_series)

[array([ 0.00471127, -0.87703357, -1.77814144, ...,  0.21711742,
        0.6824449 ,  0.79433402]), array([-0.88416185, -0.67742994, -0.57299668, ..., -0.13854467,
       -1.07855125, -0.84565664]), array([-2.3087271 , -0.46575622, -2.00959807, ...,  0.92804894,
        2.16451684, -0.4679564 ]), array([ 1.89416338, -0.78414736, -1.13666841, ..., -0.12561028,
        1.27326486,  0.11685418]), array([-0.59436787,  0.58941964,  0.23066557, ..., -0.34023957,
       -1.21866771,  0.25249599]), array([ 1.41991292, -1.945818  ,  0.32531899, ..., -1.43923915,
       -0.3392343 ,  0.26581192]), array([-0.26276713, -0.85839122, -0.87210831, ..., -1.07359215,
        0.08793539, -1.44327399]), array([-0.79269271,  0.36110837,  0.83297512, ..., -0.53730619,
        0.74786492, -0.27518795]), array([ 1.22323374, -0.03332825,  1.14575547, ..., -0.17670895,
       -1.00923528,  1.36739977]), array([-0.00491114,  1.503291  , -0.58151146, ...,  1.33589366,
        0.86223473, -0.34649358]), array([ 1

In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
from typing import Union
from gluonts.dataset.arrow import ArrowWriter

def convert_to_arrow(path: Union[str, Path], csv_path: str, compression: str = "lz4"):
    data = pd.read_csv(csv_path, parse_dates=['timestamp'])

    # Hitung jumlah total baris
    total_rows = len(data)

    # Hitung 70% dari total baris
    rows_to_take = int(0.7 * total_rows)

    # Ambil 70% pertama dari data
    data = data.head(rows_to_take)

    # Menampilkan hasil
    print(data)

    # time_series = data['close'].values.reshape(-1, 1)  # Reshape for a single time series

    # # Set an arbitrary start time from your data
    # start = data['timestamp'].iloc[0]

    dataset = [{"start": ts, "target": [close]} for ts, close in zip(data['timestamp'], data['close'])]

    # dataset = [{"start": start, "target": ts.flatten()} for ts in time_series]

    ArrowWriter(compression=compression).write_to_file(dataset, path=path)

if __name__ == "__main__":
    convert_to_arrow("./arrow_data/ANTM.arrow", "/home/yogi/chronos-research/dataset/LQ45-daily/ANTM.csv")

      timestamp  open  low  high  close     volume
0    2001-04-16   432  407   436    432          0
1    2001-04-17   432  407   436    432          0
2    2001-04-18   432  407   436    432          0
3    2001-04-19   432  407   436    432          0
4    2001-04-20   432  407   436    432          0
...         ...   ...  ...   ...    ...        ...
3963 2016-06-23   730  730   745    730   39292500
3964 2016-06-24   730  705   750    730  190710000
3965 2016-06-27   730  730   745    735   70339400
3966 2016-06-28   740  720   745    720   58690300
3967 2016-06-29   720  720   740    720  105793700

[3968 rows x 6 columns]


In [None]:
# *****USE THIS*****

from pathlib import Path
from typing import List, Union

import numpy as np
import pandas as pd
from gluonts.dataset.arrow import ArrowWriter

def convert_to_arrow(path: Union[str, Path], csv_path: str, compression: str = "lz4"):
    """
    Store a given set of series into Arrow format at the specified path.

    Input data can be a CSV file with a 'close' column.
    """
    # Baca file CSV
    data = pd.read_csv(csv_path, parse_dates=['timestamp'])

    # Hitung jumlah total baris
    total_rows = len(data)

    # Hitung 70% dari total baris
    rows_to_take = int(0.7 * total_rows)

    # Ambil 70% pertama dari data
    data = data.head(rows_to_take)

    data['timestamp'] = pd.to_datetime(data['timestamp'])
    data = data.sort_values(by='timestamp')
    
    # Ambil array dari kolom 'close'
    time_series = data['close'].to_numpy()
    
    # Set an arbitrary start time
    start = np.datetime64(data['timestamp'].iloc[0], "s")

    # Buat dataset yang terdiri dari satu seri waktu dengan satu start time
    dataset = [{"start": start, "target": time_series}]

    # Tulis dataset ke file dengan format arrow menggunakan ArrowWriter
    ArrowWriter(compression=compression).write_to_file(dataset, path=path)

if __name__ == "__main__":
    # Sesuaikan path ke lokasi file CSV dan output yang diinginkan
    convert_to_arrow("/home/yogi/chronos-research/arrow_data/test.arrow", "/home/yogi/chronos-research/dataset/LQ45-daily/ANTM.csv")

In [10]:
# *****USE THIS*****

from pathlib import Path
from typing import List, Union

import numpy as np
import pandas as pd
from gluonts.dataset.arrow import ArrowWriter

def convert_to_arrow(csv_path: str):
    """
    Store a given set of series into Arrow format at the specified path.

    Input data can be a CSV file with a 'close' column.
    """
    # Baca file CSV
    data = pd.read_csv(csv_path, parse_dates=['timestamp'])

    # Hitung jumlah total baris
    total_rows = len(data)

    # Hitung 70% dari total baris
    rows_to_take = int(0.7 * total_rows)

    # Ambil 70% pertama dari data
    data = data.head(rows_to_take)

    data['timestamp'] = pd.to_datetime(data['timestamp'])
    data = data.sort_values(by='timestamp')
    
    # Ambil array dari kolom 'close'
    time_series = data['close'].to_numpy()
    
    # Set an arbitrary start time
    start = np.datetime64(data['timestamp'].iloc[0], "s")

    # Buat dataset yang terdiri dari satu seri waktu dengan satu start time
    dataset = {"start": start, "target": time_series}

    return dataset

if __name__ == "__main__":
    # 42 LQ45
    # dataset_name=["ACES","AMRT","ASII","BBRI","BRIS","CPIN","GGRM","ICBP",
    #               "INKP","ITMG","MDKA","PGAS","SMGR","TOWR","ADRO","ANTM",
    #               "BBCA","BBTN","BRPT","ESSA","GOTO","INCO","INTP","KLBF",
    #               "MEDC","PTBA","SRTG","UNTR","AKRA","ARTO","BBNI","BMRI",
    #               "BUKA","EXCL","HRUM","INDF","ISAT","MAPI","MTEL","SIDO",
    #               "TLKM","UNVR"]
    
    # All data
    # dataset_name=["AALI","ABBA","ABDA","ABMM","ACES","ACST","ADCP","ADES","ADHI","ADMF","ADMG","ADMR","ADRO","AGAR","AGII","AGRO","AGRS","AHAP","AIMS","AISA","AKKU","AKPI","AKRA","AKSI","ALDO","ALKA","ALMI","ALTO","AMAG","AMAN","AMAR","AMFG","AMIN","AMMS","AMOR","AMRT","ANDI","ANJT","ANTM","APEX","APIC","APII","APLI","APLN","ARCI","ARGO","ARII","ARKA","ARKO","ARMY","ARNA","ARTA","ARTI","ARTO","ASBI","ASDM","ASGR","ASHA","ASII","ASJT","ASLC","ASMI","ASPI","ASRI","ASRM","ASSA","ATAP","ATIC","AUTO","AVIA","AXIO","AYLS","BABP","BACA","BAJA","BALI","BANK","BAPA","BAPI","BATA","BAUT","BAYU","BBCA","BBHI","BBKP","BBLD","BBMD","BBNI","BBRI","BBRM","BBSI","BBSS","BBTN","BBYB","BCAP","BCIC","BCIP","BDMN","BEBS","BEEF","BEKS","BELI","BELL","BESS","BEST","BFIN","BGTG","BHAT","BHIT","BIKA","BIKE","BIMA","BINA","BINO","BIPI","BIPP","BIRD","BISI","BJBR","BJTM","BKDP","BKSL","BKSW","BLTA","BLTZ","BLUE","BMAS","BMHS","BMRI","BMSR","BMTR","BNBA","BNBR","BNGA","BNII","BNLI","BOBA","BOGA","BOLA","BOLT","BOSS","BPFI","BPII","BPTR","BRAM","BRIS","BRMS","BRNA","BRPT","BSBK","BSDE","BSIM","BSML","BSSR","BSWD","BTEK","BTEL","BTON","BTPN","BTPS","BUAH","BUDI","BUKA","BUKK","BULL","BUMI","BUVA","BVIC","BWPT","BYAN","CAKK","CAMP","CANI","CARE","CARS","CASA","CASH","CASS","CBMF","CBUT","CCSI","CEKA","CENT","CFIN","CHEM","CINT","CITA","CITY","CLAY","CLEO","CLPI","CMNP","CMNT","CMPP","CMRY","CNKO","CNTX","COAL","COCO","COWL","CPIN","CPRI","CPRO","CRAB","CSAP","CSIS","CSMI","CSRA","CTBN","CTRA","CTTH","DADA","DART","DAYA","DCII","DEAL","DEFI","DEPO","DEWA","DEWI","DFAM","DGIK","DGNS","DIGI","DILD","DIVA","DKFT","DLTA","DMAS","DMMX","DMND","DNAR","DNET","DOID","DPNS","DPUM","DRMA","DSFI","DSNG","DSSA","DUCK","DUTI","DVLA","DWGL","DYAN","EAST","ECII","EDGE","EKAD","ELPI","ELSA","ELTY","EMDE","EMTK","ENAK","ENRG","ENVY","ENZO","EPAC","EPMT","ERAA","ERTX","ESIP","ESSA","ESTA","ESTI","ETWA","EURO","EXCL","FAPA","FAST","FASW","FILM","FIMP","FIRE","FISH","FITT","FLMC","FMII","FOOD","FORU","FORZ","FPNI","FREN","FUJI","GAMA","GDST","GDYR","GEMA","GEMS","GGRM","GGRP","GHON","GIAA","GJTL","GLOB","GLVA","GMFI","GMTD","GOLD","GOLL","GOOD","GOTO","GPRA","GPSO","GSMF","GTBO","GTSI","GULA","GWSA","GZCO","HADE","HAIS","HATM","HDFA","HDIT","HEAL","HELI","HERO","HEXA","HITS","HKMU","HMSP","HOKI","HOME","HOMI","HOPE","HOTL","HRME","HRTA","HRUM","IATA","IBFN","IBOS","IBST","ICBP","ICON","IDEA","IDPR","IFII","IFSH","IGAR","IIKP","IKAI","IKAN","IKBI","IMAS","IMJS","IMPC","INAF","INAI","INCF","INCI","INCO","INDF","INDO","INDR","INDS","INDX","INDY","INKP","INOV","INPC","INPP","INPS","INRU","INTA","INTD","INTP","IPAC","IPCC","IPCM","IPOL","IPPE","IPTV","IRRA","ISAP","ISAT","ISSP","ITIC","ITMA","ITMG","JARR","JAST","JAWA","JAYA","JECC","JGLE","JIHD","JKON","JKSW","JMAS","JPFA","JRPT","JSKY","JSMR","JSPT","JTPE","KAEF","KARW","KAYU","KBAG","KBLI","KBLM","KBLV","KBRI","KDSI","KDTN","KEEN","KEJU","KETR","KIAS","KICI","KIJA","KINO","KIOS","KJEN","KKES","KKGI","KLBF","KLIN","KMDS","KMTR","KOBX","KOIN","KONI","KOPI","KOTA","KPAL","KPAS","KPIG","KRAH","KRAS","KREN","KRYA","KUAS","LABA","LAND","LAPD","LCGP","LCKM","LEAD","LFLO","LIFE","LINK","LION","LMAS","LMPI","LMSH","LPCK","LPGI","LPIN","LPKR","LPLI","LPPF","LPPS","LRNA","LSIP","LTLS","LUCK","LUCY","MABA","MAGP","MAIN","MAMI","MAPA","MAPB","MAPI","MARI","MARK","MASA","MASB","MAYA","MBAP","MBSS","MBTO","MCAS","MCOL","MCOR","MDIA","MDKA","MDKI","MDLN","MDRN","MEDC","MEDS","MEGA","MERK","META","MFIN","MFMI","MGLV","MGNA","MGRO","MICE","MIDI","MIKA","MINA","MIRA","MITI","MKNT","MKPI","MKTR","MLBI","MLIA","MLPL","MLPT","MMIX","MMLP","MNCN","MOLI","MORA","MPMX","MPOW","MPPA","MPRO","MRAT","MREI","MSIN","MSKY","MTDL","MTEL","MTFN","MTLA","MTMH","MTPS","MTRA","MTSM","MTWI","MYOH","MYOR","MYRX","MYTX","NANO","NASA","NASI","NATO","NELY","NETV","NFCX","NICK","NICL","NIKL","NINE","NIRO","NISP","NOBU","NPGF","NRCA","NTBK","NUSA","NZIA","OASA","OBMD","OCAP","OILS","OKAS","OLIV","OMED","OMRE","OPMS","PADA","PADI","PALM","PAMG","PANI","PANR","PANS","PBID","PBRX","PBSA","PCAR","PDES","PDPP","PEGE","PEHA","PGAS","PGJO","PGLI","PGUN","PICO","PJAA","PKPK","PLAN","PLAS","PLIN","PMJS","PMMP","PNBN","PNBS","PNGO","PNIN","PNLF","PNSE","POLA","POLI","POLL","POLU","POLY","POOL","PORT","POSA","POWR","PPGL","PPRE","PPRO","PRAS","PRAY","PRDA","PRIM","PSAB","PSDN","PSGO","PSKT","PSSI","PTBA","PTDU","PTIS","PTPP","PTPW","PTRO","PTSN","PTSP","PUDP","PURA","PURE","PURI","PWON","PYFA","PZZA","RAFI","RAJA","RALS","RANC","RBMS","RCCC","RDTX","REAL","RELI","RICY","RIGS","RIMO","RISE","RMBA","RMKE","ROCK","RODA","RONY","ROTI","RSGK","RUIS","RUNS","SAFE","SAME","SAMF","SAPX","SATU","SBAT","SBMA","SCCO","SCMA","SCNP","SCPI","SDMU","SDPC","SDRA","SEMA","SFAN","SGER","SGRO","SHID","SHIP","SICO","SIDO","SILO","SIMA","SIMP","SINI","SIPD","SKBM","SKLT","SKRN","SKYB","SLIS","SMAR","SMBR","SMCB","SMDM","SMDR","SMGR","SMKL","SMKM","SMMA","SMMT","SMRA","SMRU","SMSM","SNLK","SOCI","SOFA","SOHO","SONA","SOSS","SOTS","SPMA","SPTO","SQMI","SRAJ","SRIL","SRSN","SRTG","SSIA","SSMS","SSTM","STAA","STAR","STTP","SULI","SUPR","SURE","SWAT","SWID","TALF","TAMA","TAMU","TAPG","TARA","TAXI","TAYS","TBIG","TBLA","TBMS","TCID","TCPI","TDPM","TEBE","TECH","TELE","TFAS","TFCO","TGKA","TGRA","TIFA","TINS","TIRA","TIRT","TKIM","TLDN","TLKM","TMAS","TMPO","TNCA","TOBA","TOOL","TOPS","TOTL","TOTO","TOWR","TOYS","TPIA","TPMA","TRAM","TRGU","TRIM","TRIN","TRIS","TRJA","TRST","TRUE","TRUK","TRUS","TSPC","TUGU","TURI","UANG","UCID","UFOE","ULTJ","UNIC","UNIQ","UNIT","UNSP","UNTR","UNVR","URBN","UVCR","VICI","VICO","VINS","VIVA","VOKS","VRNA","VTNY","WAPO","WEGE","WEHA","WGSH","WICO","WIFI","WIIM","WIKA","WINR","WINS","WIRG","WMPP","WMUU","WOMF","WOOD","WOWS","WSBP","WSKT","WTON","YELO","YPAS","YULE","ZATA","ZBRA","ZINC","ZONE","ZYRX"]

    # Kompas100
    # dataset_name=["AALI","ACES","ADHI","ADRO","AGRO","AKRA","ANTM","APLN","ASII","ASRI","BBCA","BBKP","BBNI","BBRI","BBTN","BDMN","BEST","BJBR","BJTM","BKSL","BMRI","BMTR","BNGA","BNLI","BRPT","BSDE","BTPN","BUMI","CLEO","CPIN","CTRA","DMAS","DOID","ELSA","ERAA","ESSA","EXCL","FASW","GGRM","GJTL","HMSP","HOKI","HRUM","ICBP","IMAS","INCO","INDF","INDY","INKP","INTP","ISAT","ITMG","JPFA","JSMR","KLBF","LINK","LPKR","LPPF","LSIP","MAIN","MAPI","MEDC","MIKA","MNCN","MYOR","PBRX","PGAS","PNBN","PPRO","PTBA","PTPP","PTRO","PWON","RALS","SCMA","SIDO","SILO","SIMP","SMDR","SMGR","SMRA","SMSM","SRIL","SSIA","SSMS","TBIG","TBLA","TINS","TKIM","TLKM","TOPS","TOWR","TPIA","UNTR","UNVR","WIKA","WOOD","WSBP","WSKT","WTON"]
    
    # Kompas 100 + sector (3000 row)
    # dataset_name=['AALI', 'ACES', 'ADHI', 'ADRO', 'AGRO', 'AKRA', 'ANTM', 'APLN', 'ASII', 'ASRI', 'BBCA', 'BBKP', 'BBNI', 'BBRI', 'BBTN', 'BDMN', 'BEST', 'BJBR', 'BJTM', 'BKSL', 'BMRI', 'BMTR', 'BNGA', 'BNLI', 'BRPT', 'BSDE', 'BTPN', 'BUMI', 'CLEO', 'CPIN', 'CTRA', 'DMAS', 'DOID', 'ELSA', 'ERAA', 'ESSA', 'EXCL', 'FASW', 'GGRM', 'GJTL', 'HMSP', 'HOKI', 'HRUM', 'ICBP', 'IMAS', 'INCO', 'INDF', 'INDY', 'INKP', 'INTP', 'ISAT', 'ITMG', 'JPFA', 'JSMR', 'KLBF', 'LINK', 'LPKR', 'LPPF', 'LSIP', 'MAIN', 'MAPI', 'MEDC', 'MIKA', 'MNCN', 'MYOR', 'PBRX', 'PGAS', 'PNBN', 'PPRO', 'PTBA', 'PTPP', 'PTRO', 'PWON', 'RALS', 'SCMA', 'SIDO', 'SILO', 'SIMP', 'SMDR', 'SMGR', 'SMRA', 'SMSM', 'SRIL', 'SSIA', 'SSMS', 'TBIG', 'TBLA', 'TINS', 'TKIM', 'TLKM', 'TOPS', 'TOWR', 'TPIA', 'UNTR', 'UNVR', 'WIKA', 'WOOD', 'WSBP', 'WSKT', 'WTON', 'FREN', 'BACA', 'APLI', 'DNET', 'KRAS', 'SMCB', 'ENRG', 'INAF', 'TSPC', 'SQMI', 'SGRO', 'SMAR', 'BWPT', 'META', 'DGIK', 'BUVA', 'DILD', 'MTLA', 'GIAA', 'MBSS', 'PYFA', 'KAEF']
    
    # Kompas 100 + sector (4000 row)
    dataset_name=['ASII', 'BBRI', 'CPIN', 'GGRM', 'INKP', 'PGAS', 'SMGR', 'ANTM', 'BBCA', 'BRPT', 'INCO', 'INTP', 'KLBF', 'MEDC', 'PTBA', 'UNTR', 'AKRA', 'BBNI', 'BMRI', 'EXCL', 'INDF', 'ISAT', 'MAPI', 'TLKM', 'UNVR', 'AALI', 'ADHI', 'AGRO', 'BBKP', 'BDMN', 'BKSL', 'BMTR', 'BNGA', 'BNLI', 'BUMI', 'CTRA', 'DOID', 'FASW', 'GJTL', 'HMSP', 'IMAS', 'JPFA', 'LPKR', 'LPPF', 'LSIP', 'MAIN', 'MNCN', 'MYOR', 'PBRX', 'PNBN', 'PTRO', 'PWON', 'SCMA', 'SMDR', 'SMRA', 'SMSM', 'SSIA', 'TBLA', 'TINS', 'TKIM', 'FREN', 'APLI', 'DNET', 'SMCB', 'ENRG', 'INAF', 'TSPC', 'SQMI', 'SGRO', 'SMAR', 'META', 'DILD', 'PYFA', 'KAEF']

    dataset =[]

    count=0
    for ds in dataset_name:
        # Sesuaikan path ke lokasi file CSV dan output yang diinginkan
        
        
        data = pd.read_csv(f"/home/yogi/chronos-research/dataset/daily-all/{ds}.csv", parse_dates=['timestamp'])

        # Hitung jumlah total baris
        total_rows = len(data)
        
        if(total_rows<4000):
            continue
        
        print(ds,"= ",total_rows)
        count+=1

        dataset.append(convert_to_arrow(f"/home/yogi/chronos-research/dataset/daily-all/{ds}.csv"))

    print(count)
    
    # Tulis dataset ke file dengan format arrow menggunakan ArrowWriter
    ArrowWriter(compression="lz4").write_to_file(dataset, path="/home/yogi/chronos-research/arrow_data/kompas100-4000row.arrow")


ASII =  5670
BBRI =  5000
CPIN =  5670
GGRM =  5670
INKP =  5670
PGAS =  4975
SMGR =  5670
ANTM =  5670
BBCA =  5670
BRPT =  5670
INCO =  5670
INTP =  5670
KLBF =  5670
MEDC =  5670
PTBA =  5230
UNTR =  5670
AKRA =  5670
BBNI =  5670
BMRI =  5085
EXCL =  4507
INDF =  5670
ISAT =  5670
MAPI =  4738
TLKM =  5670
UNVR =  5670
AALI =  5670
ADHI =  4907
AGRO =  5066
BBKP =  4305
BDMN =  5670
BKSL =  5670
BMTR =  5670
BNGA =  5670
BNLI =  5670
BUMI =  5670
CTRA =  5670
DOID =  5626
FASW =  5670
GJTL =  5670
HMSP =  5670
IMAS =  5670
JPFA =  5670
LPKR =  5670
LPPF =  5670
LSIP =  5670
MAIN =  4411
MNCN =  4056
MYOR =  5670
PBRX =  5670
PNBN =  5670
PTRO =  5670
PWON =  5670
SCMA =  5344
SMDR =  5670
SMRA =  5670
SMSM =  5670
SSIA =  5670
TBLA =  5670
TINS =  5670
TKIM =  5670
FREN =  4203
APLI =  5670
DNET =  5670
SMCB =  5670
ENRG =  4850
INAF =  5669
TSPC =  5670
SQMI =  4822
SGRO =  4060
SMAR =  5670
META =  5603
DILD =  5670
PYFA =  5539
KAEF =  5613
74


In [None]:
import pyarrow as pa

def read_arrow_file(file_path):
    with pa.OSFile(file_path, 'rb') as f:
        reader = pa.ipc.open_file(f)
        table = reader.read_all()
    print(table.to_pandas())

if __name__ == "__main__":
    # read_arrow_file("./arrow_data/noise-data.arrow")
    read_arrow_file("/home/yogi/chronos-research/Retrieval-Augmented-Time-Series-Forecasting/RAF_finetune_datasets/output2_AALI.arrow")


       start                                             target
0 2017-09-15  [4.426485504051877, 4.426485504051877, 4.42648...


TypeError: 'NoneType' object is not subscriptable

In [7]:
# dataset_first=["AALI","ACES","ADHI","ADRO","AGRO","AKRA","ANTM","APLN","ASII","ASRI","BBCA","BBKP","BBNI","BBRI","BBTN","BDMN","BEST","BJBR","BJTM","BKSL","BMRI","BMTR","BNGA","BNLI","BRPT","BSDE","BTPN","BUMI","CLEO","CPIN","CTRA","DMAS","DOID","ELSA","ERAA","ESSA","EXCL","FASW","GGRM","GJTL","HMSP","HOKI","HRUM","ICBP","IMAS","INCO","INDF","INDY","INKP","INTP","ISAT","ITMG","JPFA","JSMR","KLBF","LINK","LPKR","LPPF","LSIP","MAIN","MAPI","MEDC","MIKA","MNCN","MYOR","PBRX","PGAS","PNBN","PPRO","PTBA","PTPP","PTRO","PWON","RALS","SCMA","SIDO","SILO","SIMP","SMDR","SMGR","SMRA","SMSM","SRIL","SSIA","SSMS","TBIG","TBLA","TINS","TKIM","TLKM","TOPS","TOWR","TPIA","UNTR","UNVR","WIKA","WOOD","WSBP","WSKT","WTON"]
dataset_first=["ACES","AMRT","ASII","BBRI","BRIS","CPIN","GGRM","ICBP",
                  "INKP","ITMG","MDKA","PGAS","SMGR","TOWR","ADRO","ANTM",
                  "BBCA","BBTN","BRPT","ESSA","GOTO","INCO","INTP","KLBF",
                  "MEDC","PTBA","SRTG","UNTR","AKRA","ARTO","BBNI","BMRI",
                  "BUKA","EXCL","HRUM","INDF","ISAT","MAPI","MTEL","SIDO",
                  "TLKM","UNVR"]

# dataset_second =["BBCA","BMRI","BBRI","BBNI","BTPN","BJBR","BNGA","BDMN","PNBN","ARTO","TLKM","LINK","ISAT","FREN","MDIA","BACA","APLI","DNET","EDGE","EXCL","INCO","ANTM","SMGR","UNTR","TINS","PTBA","PGAS","KRAS","INTP","SMCB","MEDC","ADRO","AKRA","ITMG","PTBA","ELSA","ENRG","TPIA","HRUM","SMDR","KLBF","SIDO","UNVR","MYOR","INAF","TSPC","SQMI","ROTI","AALI","LSIP","SGRO","SMAR","SSMS","DSNG","BWPT","GOLL","SIMP","SMGR","WSKT","WIKA","PTPP","INKP","TKIM","ISSP","INTP","BRPT","KRAS","JSMR","PGAS","ADHI","WTON","META","CITY","DGIK","BUVA","TOWR","SMRU","CTRA","BSDE","PWON","SMRA","ASRI","DMAS","APLN","DILD","LPKR","MTLA","GIAA","ALTO","BMTR","TAXI","SMBR","IPCC","HAIS","MBSS","KLBF","INAF","PYFA","KAEF","SIDO","MIKA","HEAL","SAME","SQMI","PRDA"]
dataset_second = ['AALI', 'ACES', 'ADHI', 'ADRO', 'AGRO', 'AKRA', 'ANTM', 'APLN', 'ASII', 'ASRI', 'BBCA', 'BBKP', 'BBNI', 'BBRI', 'BBTN', 'BDMN', 'BEST', 'BJBR', 'BJTM', 'BKSL', 'BMRI', 'BMTR', 'BNGA', 'BNLI', 'BRPT', 'BSDE', 'BTPN', 'BUMI', 'CLEO', 'CPIN', 'CTRA', 'DMAS', 'DOID', 'ELSA', 'ERAA', 'ESSA', 'EXCL', 'FASW', 'GGRM', 'GJTL', 'HMSP', 'HOKI', 'HRUM', 'ICBP', 'IMAS', 'INCO', 'INDF', 'INDY', 'INKP', 'INTP', 'ISAT', 'ITMG', 'JPFA', 'JSMR', 'KLBF', 'LINK', 'LPKR', 'LPPF', 'LSIP', 'MAIN', 'MAPI', 'MEDC', 'MIKA', 'MNCN', 'MYOR', 'PBRX', 'PGAS', 'PNBN', 'PPRO', 'PTBA', 'PTPP', 'PTRO', 'PWON', 'RALS', 'SCMA', 'SIDO', 'SILO', 'SIMP', 'SMDR', 'SMGR', 'SMRA', 'SMSM', 'SRIL', 'SSIA', 'SSMS', 'TBIG', 'TBLA', 'TINS', 'TKIM', 'TLKM', 'TOPS', 'TOWR', 'TPIA', 'UNTR', 'UNVR', 'WIKA', 'WOOD', 'WSBP', 'WSKT', 'WTON', 'FREN', 'BACA', 'APLI', 'DNET', 'KRAS', 'SMCB', 'ENRG', 'INAF', 'TSPC', 'SQMI', 'SGRO', 'SMAR', 'BWPT', 'META', 'DGIK', 'BUVA', 'DILD', 'MTLA', 'GIAA', 'MBSS', 'PYFA', 'KAEF']
new=[]
count=0
for ds in dataset_second:
    if ds in dataset_first:
        continue
    
    data = pd.read_csv(f"/home/yogi/chronos-research/dataset/daily-all/{ds}.csv", parse_dates=['timestamp'])

    # Hitung jumlah total baris
    total_rows = len(data)
    
    if(total_rows<4000):
        continue
    
    dataset_first.append(ds)
    new.append(ds)
    print(ds,"= ",total_rows)
    count+=1


print(dataset_first)
print("jumlah total=",len(dataset_first))
print("Count= ",count)

AALI =  5670
ADHI =  4907
AGRO =  5066
BBKP =  4305
BDMN =  5670
BKSL =  5670
BMTR =  5670
BNGA =  5670
BNLI =  5670
BUMI =  5670
CTRA =  5670
DOID =  5626
FASW =  5670
GJTL =  5670
HMSP =  5670
IMAS =  5670
JPFA =  5670
LPKR =  5670
LPPF =  5670
LSIP =  5670
MAIN =  4411
MNCN =  4056
MYOR =  5670
PBRX =  5670
PNBN =  5670
PTRO =  5670
PWON =  5670
SCMA =  5344
SMDR =  5670
SMRA =  5670
SMSM =  5670
SSIA =  5670
TBLA =  5670
TINS =  5670
TKIM =  5670
FREN =  4203
APLI =  5670
DNET =  5670
SMCB =  5670
ENRG =  4850
INAF =  5669
TSPC =  5670
SQMI =  4822
SGRO =  4060
SMAR =  5670
META =  5603
DILD =  5670
PYFA =  5539
KAEF =  5613
['ACES', 'AMRT', 'ASII', 'BBRI', 'BRIS', 'CPIN', 'GGRM', 'ICBP', 'INKP', 'ITMG', 'MDKA', 'PGAS', 'SMGR', 'TOWR', 'ADRO', 'ANTM', 'BBCA', 'BBTN', 'BRPT', 'ESSA', 'GOTO', 'INCO', 'INTP', 'KLBF', 'MEDC', 'PTBA', 'SRTG', 'UNTR', 'AKRA', 'ARTO', 'BBNI', 'BMRI', 'BUKA', 'EXCL', 'HRUM', 'INDF', 'ISAT', 'MAPI', 'MTEL', 'SIDO', 'TLKM', 'UNVR', 'AALI', 'ADHI', 'AGRO', 'B

In [8]:
import pandas as pd

dataset =['ACES', 'AMRT', 'ASII', 'BBRI', 'BRIS', 'CPIN', 'GGRM', 'ICBP', 'INKP', 'ITMG', 'MDKA', 'PGAS', 'SMGR', 'TOWR', 'ADRO', 'ANTM', 'BBCA', 'BBTN', 'BRPT', 'ESSA', 'GOTO', 'INCO', 'INTP', 'KLBF', 'MEDC', 'PTBA', 'SRTG', 'UNTR', 'AKRA', 'ARTO', 'BBNI', 'BMRI', 'BUKA', 'EXCL', 'HRUM', 'INDF', 'ISAT', 'MAPI', 'MTEL', 'SIDO', 'TLKM', 'UNVR', 'AALI', 'ADHI', 'AGRO', 'BBKP', 'BDMN', 'BKSL', 'BMTR', 'BNGA', 'BNLI', 'BUMI', 'CTRA', 'DOID', 'FASW', 'GJTL', 'HMSP', 'IMAS', 'JPFA', 'LPKR', 'LPPF', 'LSIP', 'MAIN', 'MNCN', 'MYOR', 'PBRX', 'PNBN', 'PTRO', 'PWON', 'SCMA', 'SMDR', 'SMRA', 'SMSM', 'SSIA', 'TBLA', 'TINS', 'TKIM', 'FREN', 'APLI', 'DNET', 'SMCB', 'ENRG', 'INAF', 'TSPC', 'SQMI', 'SGRO', 'SMAR', 'META', 'DILD', 'PYFA', 'KAEF']
dataset_new=[]

count=0
for ds in dataset:
    
    data = pd.read_csv(f"/home/yogi/chronos-research/dataset/daily-all/{ds}.csv", parse_dates=['timestamp'])

    # Hitung jumlah total baris
    total_rows = len(data)
    
    if(total_rows<4000):
        continue
    
    dataset_new.append(ds)
    print(ds,"= ",total_rows)
    count+=1

print(dataset_new)
print("jumlah total=",len(dataset_new))

ASII =  5670
BBRI =  5000
CPIN =  5670
GGRM =  5670
INKP =  5670
PGAS =  4975
SMGR =  5670
ANTM =  5670
BBCA =  5670
BRPT =  5670
INCO =  5670
INTP =  5670
KLBF =  5670
MEDC =  5670
PTBA =  5230
UNTR =  5670
AKRA =  5670
BBNI =  5670
BMRI =  5085
EXCL =  4507
INDF =  5670
ISAT =  5670
MAPI =  4738
TLKM =  5670
UNVR =  5670
AALI =  5670
ADHI =  4907
AGRO =  5066
BBKP =  4305
BDMN =  5670
BKSL =  5670
BMTR =  5670
BNGA =  5670
BNLI =  5670
BUMI =  5670
CTRA =  5670
DOID =  5626
FASW =  5670
GJTL =  5670
HMSP =  5670
IMAS =  5670
JPFA =  5670
LPKR =  5670
LPPF =  5670
LSIP =  5670
MAIN =  4411
MNCN =  4056
MYOR =  5670
PBRX =  5670
PNBN =  5670
PTRO =  5670
PWON =  5670
SCMA =  5344
SMDR =  5670
SMRA =  5670
SMSM =  5670
SSIA =  5670
TBLA =  5670
TINS =  5670
TKIM =  5670
FREN =  4203
APLI =  5670
DNET =  5670
SMCB =  5670
ENRG =  4850
INAF =  5669
TSPC =  5670
SQMI =  4822
SGRO =  4060
SMAR =  5670
META =  5603
DILD =  5670
PYFA =  5539
KAEF =  5613
['ASII', 'BBRI', 'CPIN', 'GGRM', 'INKP