In [2]:
# Feather conversion for od_use.csv -> od_use.feather (O, D, t_O, t_D, t_OD only)
# Requirements: pandas >= 1.0, pyarrow installed (pip install pyarrow)

import os
from pathlib import Path
import pandas as pd
from tqdm import tqdm

SUBGRAPH_DIR = Path("subgraphs/subgraphs")
USECOLS = ["O", "D", "t_O", "t_D", "t_OD"]
FEATHER_NAME = "od_use.feather"
CSV_NAME = "od_use.csv"

# optional: enforce dtypes to avoid surprises
DTYPES = {
    "O": "string",
    "D": "string",
    "t_O": "float64",
    "t_D": "float64",
    "t_OD": "float64",
}

converted, skipped, failed = 0, 0, 0
problems = []

for ego_dir in tqdm(sorted(p for p in SUBGRAPH_DIR.iterdir() if p.is_dir()), desc="Converting"):
    csv_path = ego_dir / CSV_NAME
    fea_path = ego_dir / FEATHER_NAME

    if not csv_path.exists():
        skipped += 1
        continue

    try:
        # Read only needed columns
        df = pd.read_csv(csv_path, usecols=USECOLS, dtype=DTYPES)

        # Write feather (fast, columnar). Compression optional; feather is usually uncompressed for speed.
        # If you want compression, switch to Parquet instead (.to_parquet).
        df.to_feather(fea_path)  # requires pyarrow

        # Basic sanity check: non-empty file and row count matches
        if fea_path.exists():
            # Remove the original CSV to save disk space
            os.remove(csv_path)
            converted += 1
        else:
            failed += 1
            problems.append((ego_dir.name, "feather write check failed"))

    except Exception as e:
        failed += 1
        problems.append((ego_dir.name, str(e)))

print(f"\nDone. Converted: {converted}, Skipped (no CSV): {skipped}, Failed: {failed}")
if problems:
    print("Examples with issues (up to 10):")
    for eg in problems[:10]:
        print("  ", eg)

Converting: 100%|██████████| 5088/5088 [18:07<00:00,  4.68it/s]  


Done. Converted: 5088, Skipped (no CSV): 0, Failed: 0



