In [1]:
import sys

sys.path.append("../src")

In [2]:
import datetime
import random

import pandas as pd

from tsimporter import ParquetImporter

In [3]:
# 1. Prepare sample dataset.

number_of_data = 3000
df = pd.DataFrame()
df["time"] = pd.date_range("2021-01-01", periods=number_of_data, freq=datetime.timedelta(hours=1))
df["data1"] = [random.random() for _ in range(number_of_data)]
df["label"] = [True if random.random() > 0.5 else False for _ in range(number_of_data)]


df.iloc[:1000].to_parquet("data1.parquet")
df.iloc[1000:2000].to_parquet("data2.parquet")
df.iloc[2000:].to_parquet("data3.parquet")

In [4]:
# 2. Make an importer instance.

column_mappings = [
    {"source": "time", "target": "time", "dtype": "TIMESTAMP", "primary_key": True},
    {"source": "data1", "target": "v1", "dtype": "FLOAT"},
    {"source": "label", "target": "flag", "dtype": "BOOL"},
]
importer = ParquetImporter(
    dbname="your_dbname",
    user="your_user",
    password="your_password",
    host="host.docker.internal",
    port=5432,
    table_name="my_table",
    column_mappings=column_mappings,
)

2024-11-02 16:03:50,170 - INFO - Connection to host.docker.internal:5432 established successfully.


In [5]:
# 3. Call import_files method.

importer.import_files(["data1.parquet", "data2.parquet", "data3.parquet"])

2024-11-02 16:03:50,198 - INFO - Connection to host.docker.internal:5432 established successfully.
2024-11-02 16:03:50,393 - INFO - data1.parquet was imported to postgres successfully.
2024-11-02 16:03:50,425 - INFO - data2.parquet was imported to postgres successfully.
2024-11-02 16:03:50,457 - INFO - data3.parquet was imported to postgres successfully.
2024-11-02 16:03:50,462 - INFO - Data importing succeeded, total data now: 3000.


In [6]:
# 4. Cleanup.

from pathlib import Path

importer.close_connection()
Path("data1.parquet").unlink()
Path("data2.parquet").unlink()
Path("data3.parquet").unlink()