## Save in two formats
- Save to data/raw/ as CSV and to data/processed/ as Parquet.

In [5]:
import numpy as np
import pandas as pd
from dotenv import load_dotenv,dotenv_values
from pathlib import Path
import os
import sys
sys.path.append("..")
from src.utils import *

#generate data
dates = pd.date_range('2024-01-01', periods=20, freq='D')
df = pd.DataFrame({'date': dates, 'ticker': ['AAPL']*20, 'price': 150 + np.random.randn(20).cumsum()})
df.head()
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   date    20 non-null     datetime64[ns]
 1   ticker  20 non-null     object        
 2   price   20 non-null     float64       
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 612.0+ bytes


In [6]:
import pathlib
import os

# save data
fname_csv = get_filename("sample",{},"csv")
fname_parquet = get_filename("sample",{},"parquet")
write_df(df,False,"csv",fname_csv)
write_df(df,True,"parquet",fname_parquet)

Successfully saved file to:  ../data/raw
Successfully saved file to:  ../data/processed


## Reload and Validate

In [7]:
# Reload data
df_csv = read_df(False,"csv",fname_csv)
df_parquet = read_df(True,"parquet",fname_parquet)

# Validate
def validate( df_reload, df_original ):
    msgs = {"Shape Validation" : "Passed", "Data type validation" : "Passed"}
    if( df_reload.shape != df_original.shape ):
        msgs["Shape Validation"] = "Failed"
    for key in df_original.columns:
        if( df_original[key].dtype != df_reload[key].dtype ):
            msgs["Data type validation"] = "Failed"
    return msgs 

validation_csv = validate( df_csv, df )
validation_parquet = validate( df_parquet, df )
print( f"CSV reload validation: {validation_csv}")
print( f"Parquet reload validation: {validation_parquet}")

CSV reload validation: {'Shape Validation': 'Passed', 'Data type validation': 'Failed'}
Parquet reload validation: {'Shape Validation': 'Passed', 'Data type validation': 'Passed'}
