In [1]:
%load_ext autoreload
%autoreload 2

# File Operations

## Create Test Files

In [38]:
import pandas as pd
import yaml
import pyarrow as pa
import pyarrow.feather as feather
import pyarrow.parquet as pq
import sqlite3
import json
import pickle
import os

cols = ["name", "apple", "orange", "banana"]
rows =[
    ["Alice", 1, 0, 1],
    ["Bob", 0, 1, 0],
    ["Charlie", 1, 1, 0],
]
df = pd.DataFrame(data=rows, columns=cols)

def create_sql_text_file(df, path):
    with open(path, 'w', encoding='utf-8') as f:
        f.write("SELECT 1;\nSELECT 2;\n")

# Define file formats and corresponding write functions
file_formats ={
    "csv": lambda df, path: df.to_csv(path, index=False),
    "txt": lambda df, path: df.to_csv(path, index=False, sep='\t'),
    "text": lambda df, path: df.to_csv(path, index=False, sep='\t'),
    "log": lambda df, path: df.to_csv(path, index=False, sep='\t'),
    "sql": create_sql_text_file,  # Use our custom function for SQL text files
    "json": lambda df, path: df.to_json(path, orient='records', lines=False),
    "yaml": lambda df, path: yaml.dump(df.to_dict(orient="records"), open(path, "w")),
    "yml": lambda df, path: yaml.dump(df.to_dict(orient="records"), open(path, "w")),
    "arrow": lambda df, path: pa.Table.from_pandas(df.reset_index(drop=True)).to_arrow().to_pybytes(),
    "pickle": lambda df, path: df.to_pickle(path),
    "pkl": lambda df, path: df.to_pickle(path),
    "parquet": lambda df, path: df.to_parquet(path, index=False),
    "feather": lambda df, path: feather.write_feather(df.reset_index(drop=True), path),
}

base_path = "./test_files/test"
os.makedirs(base_path, exist_ok = True)
for ext, write_func in file_formats.items():
    file_path = f"{base_path}.{ext}"
    if ext == "arrow":
        table = pa.Table.from_pandas(df.reset_index(drop=True))
        with open(file_path, "wb") as f:
            feather.write_feather(df.reset_index(drop=True), file_path)
    elif ext == "yaml" or ext == "yml":
        with open(file_path, "w") as f:
            yaml.dump(df.to_dict(orient="records"), f)
    else:
        write_func(df, file_path)

print("Files written successfully.")

Files written successfully.


In [39]:
from utilities import FileIO as file_io

In [30]:
# fileio_mapping = {
#     "csv": CSVFileIO,
#     "txt": TextFileIO,
#     "text": TextFileIO,
#     "log": TextFileIO,
#     "sql": SQLFileIO,
#     "json": JsonFileIO,
#     "yaml": YamlFileIO,
#     "yml": YamlFileIO,
#     "arrow": ArrowFileIO,
#     "feather": FeatherFileIO,
#     "parquet": ParquetFileIO,
#     "pickle": PickleFileIO,
#     "pkl": PickleFileIO,
# }

file_types = ["csv",
              "txt",
              "text",
              "log",
              "sql",
              "json",
              "yaml",
              "yml",
              "arrow",
              "feather",
              "parquet",
              "pickle",
              "pkl"]

In [None]:
for f_type in file_types:
    path = f"./test_files/test.{f_type}"

    # Existence
    print(f"{f_type} exists:", file_io.fexists(path))

csv exists: True
txt exists: True
text exists: True
log exists: True
sql exists: True
json exists: True
yaml exists: True
yml exists: True
arrow exists: True
feather exists: True
parquet exists: True
pickle exists: True
pkl exists: True


In [None]:
for f_type in file_types:
    path = f"./test_files/test.{f_type}"

    # Info
    print(f"{f_type} info:", file_io.finfo(path))

csv info: {'name': 'c:/Users/Lisa Tan/Desktop/Projects/utilities/examples/logger/test_files/test.csv', 'size': 65, 'type': 'file', 'created': 1756889645.1869767, 'islink': False, 'mode': 33206, 'uid': 0, 'gid': 0, 'mtime': 1756889645.1869767, 'ino': 2533274791923254, 'nlink': 1}
txt info: {'name': 'c:/Users/Lisa Tan/Desktop/Projects/utilities/examples/logger/test_files/test.txt', 'size': 65, 'type': 'file', 'created': 1756889645.1869767, 'islink': False, 'mode': 33206, 'uid': 0, 'gid': 0, 'mtime': 1756889645.1869767, 'ino': 2814749768633920, 'nlink': 1}
text info: {'name': 'c:/Users/Lisa Tan/Desktop/Projects/utilities/examples/logger/test_files/test.text', 'size': 65, 'type': 'file', 'created': 1756889645.1912436, 'islink': False, 'mode': 33206, 'uid': 0, 'gid': 0, 'mtime': 1756889645.1912436, 'ino': 2533274791923270, 'nlink': 1}
log info: {'name': 'c:/Users/Lisa Tan/Desktop/Projects/utilities/examples/logger/test_files/test.log', 'size': 65, 'type': 'file', 'created': 1756889645.19124

In [None]:
for f_type in file_types:
    path = f"./test_files/test.{f_type}"

    # Read
    data = file_io.fread(path)
    print(f"{f_type} read:", type(data))

csv read: <class 'pandas.core.frame.DataFrame'>
txt read: <class 'str'>
text read: <class 'str'>
log read: <class 'str'>
sql read: <class 'str'>
json read: <class 'list'>
yaml read: <class 'list'>
yml read: <class 'list'>
arrow read: <class 'pandas.core.frame.DataFrame'>
feather read: <class 'pandas.core.frame.DataFrame'>
parquet read: <class 'pandas.core.frame.DataFrame'>
pickle read: <class 'pandas.core.frame.DataFrame'>
pkl read: <class 'pandas.core.frame.DataFrame'>


In [46]:
for f_type in file_types:
    path = f"./test_files/test.{f_type}"
    copy_path = f"./test_files/test_copy.{f_type}"

    data = file_io.fread(path)
    # Only write if the type matches the file type requirements
    if f_type in ["txt", "text", "log", "sql"]:
        if isinstance(data, str):
            file_io.fwrite(copy_path, data)
    elif f_type in ["csv", "feather", "parquet", "arrow"]:
        if hasattr(data, "to_csv") or hasattr(data, "to_parquet"):
            file_io.fwrite(copy_path, data)
    else:
        file_io.fwrite(copy_path, data)
    print(f"{f_type} written to copy:", file_io.fexists(copy_path))

csv written to copy: True
txt written to copy: True
text written to copy: True
log written to copy: True
sql written to copy: True
json written to copy: True
yaml written to copy: True
yml written to copy: True
arrow written to copy: True
feather written to copy: True
parquet written to copy: True
pickle written to copy: True
pkl written to copy: True


In [None]:
for f_type in file_types:
    path = f"./test_files/test.{f_type}"
    fcopy_path = f"./test_files/test_fcopy.{f_type}"

    # Copy
    file_io.fcopy(path, fcopy_path)
    print(f"{f_type} copied to fcopy:", file_io.fexists(fcopy_path))

csv copied to fcopy: True
txt copied to fcopy: True
text copied to fcopy: True
log copied to fcopy: True
sql copied to fcopy: True
json copied to fcopy: True
yaml copied to fcopy: True
yml copied to fcopy: True
arrow copied to fcopy: True
feather copied to fcopy: True
parquet copied to fcopy: True
pickle copied to fcopy: True
pkl copied to fcopy: True


In [49]:
for f_type in file_types:
    path = f"./test_files/test.{f_type}"
    copy_path = f"./test_files/test_copy.{f_type}"
    fcopy_path = f"./test_files/test_fcopy.{f_type}"

    # Delete
    file_io.fdelete(copy_path)
    print(f"{f_type} deleted from copy:", not file_io.fexists(copy_path))
    file_io.fdelete(fcopy_path)
    print(f"{f_type} deleted from fcopy:", not file_io.fexists(fcopy_path))


csv deleted from copy: True
csv deleted from fcopy: True
txt deleted from copy: True
txt deleted from fcopy: True
text deleted from copy: True
text deleted from fcopy: True
log deleted from copy: True
log deleted from fcopy: True
sql deleted from copy: True
sql deleted from fcopy: True
json deleted from copy: True
json deleted from fcopy: True
yaml deleted from copy: True
yaml deleted from fcopy: True
yml deleted from copy: True
yml deleted from fcopy: True
arrow deleted from copy: True
arrow deleted from fcopy: True
feather deleted from copy: True
feather deleted from fcopy: True
parquet deleted from copy: True
parquet deleted from fcopy: True
pickle deleted from copy: True
pickle deleted from fcopy: True
pkl deleted from copy: True
pkl deleted from fcopy: True


#### Clean up test files

In [51]:
parent_folder = os.path.dirname(path)
file_io.fdelete(parent_folder)