In [1]:
from pathlib import Path
import pandas as pd

In [2]:
def iter_csv_files(root: Path) -> list[Path]:
    return [p for p in root.rglob("*.csv") if p.is_file()]

In [3]:
def convert_csv_to_parquet(src_root: Path, dst_root: Path) -> None:
    csv_files = iter_csv_files(src_root)
    if not csv_files:
        print("No CSV files found under %s", src_root)
        return

    for csv_path in csv_files:
        rel_path = csv_path.relative_to(src_root)
        parquet_path = dst_root / rel_path.with_suffix(".parquet")
        parquet_path.parent.mkdir(parents=True, exist_ok=True)

        df = pd.read_csv(csv_path)

        df.to_parquet(parquet_path, index=False)

In [4]:
src_root = Path("../dataset/vct_2024").resolve()
dst_root = Path("../data/raw/vct_2024").resolve()

In [5]:
convert_csv_to_parquet(src_root, dst_root)