In [54]:
import pandas as pd
import os
from mike_config import TFrame, crypto_lst, time_frame

In [55]:
def read_csv(symbol: str, timestamp: str):
    dir_path = 'D:/GitHub/tryStock/data/' + symbol.replace("/", "") + '/' + timestamp + '/'
    print(f"Working on {dir_path}...")
    file_list = os.listdir(dir_path)
    df = pd.DataFrame()
    for f in file_list:
        if f == "featured.csv":
            continue
        if len(df) == 0:
            df = pd.read_csv(dir_path + f)
        else:
            df = pd.concat([df, pd.read_csv(dir_path + f)])
    return df


def remove_date(df: pd.DataFrame):
    df_filtered = df.filter(regex=r'^(?!.*date).*$')
    return df_filtered

In [56]:
def calc_data(df: pd.DataFrame, name: str):
    temp = df.filter(like=name, axis=1)
    columns = temp.iloc[:, :temp.shape[1] * 4 // 5].columns
    row_mean = df[columns].mean(axis=1)
    row_median = df[columns].median(axis=1)
    row_std = df[columns].std(axis=1)
    row_var = df[columns].var(axis=1)
    row_max = df[columns].max(axis=1)
    row_min = df[columns].min(axis=1)
    percentile_25 = df[columns].quantile(0.25, axis=1)
    percentile_75 = df[columns].quantile(0.75, axis=1)
    place = df.shape[1] * 4 // 5
    df.insert(place, name + '_mean',  row_mean.tolist())
    df.insert(place, name + '_median',  row_median.tolist())
    df.insert(place, name + '_std',  row_std.tolist())
    df.insert(place, name + '_var',  row_var.tolist())
    df.insert(place, name + '_max',  row_max.tolist())
    df.insert(place, name + '_min',  row_min.tolist())
    df.insert(place, name + '_25p',  percentile_25.tolist())
    df.insert(place, name + '_75p',  percentile_75.tolist())
    return df

In [57]:
def store_file(symbol: str, timestamp: str, df: pd.DataFrame):
    actual_path = 'D:/GitHub/tryStock/data/' + symbol.replace("/", "") + '/' + timestamp + '/featured.csv'
    if os.path.isfile(actual_path):
        os.remove(actual_path)
        print("CSV file removed.")
    df.to_csv(actual_path, index=False)
    print(f"CSV file created successfully. --- {actual_path}")

In [64]:
def main():
    for s in crypto_lst:
        for t in [obj.name for obj in time_frame]:
            df = read_csv(s, t)
            df_without_date = remove_date(df)
            for n in ['open', 'high', 'low', 'close', 'volume']:
                df_without_date = calc_data(df_without_date, n)
            # Shuffle the DataFrame
            shuffled_df = df_without_date.sample(frac=1, random_state=42)
            # Reset the index of the shuffled DataFrame
            shuffled_df = shuffled_df.reset_index(drop=True)
            store_file(s, t, shuffled_df)
            del df, df_without_date, shuffled_df

In [65]:
main()

Working on D:/GitHub/tryStock/data/BTCUSDT/5m/...
CSV file removed.
CSV file created successfully. --- D:/GitHub/tryStock/data/BTCUSDT/5m/featured.csv
Working on D:/GitHub/tryStock/data/BTCUSDT/15m/...
CSV file removed.
CSV file created successfully. --- D:/GitHub/tryStock/data/BTCUSDT/15m/featured.csv
Working on D:/GitHub/tryStock/data/BTCUSDT/30m/...
CSV file removed.
CSV file created successfully. --- D:/GitHub/tryStock/data/BTCUSDT/30m/featured.csv
Working on D:/GitHub/tryStock/data/ETHUSDT/5m/...
CSV file removed.
CSV file created successfully. --- D:/GitHub/tryStock/data/ETHUSDT/5m/featured.csv
Working on D:/GitHub/tryStock/data/ETHUSDT/15m/...
CSV file removed.
CSV file created successfully. --- D:/GitHub/tryStock/data/ETHUSDT/15m/featured.csv
Working on D:/GitHub/tryStock/data/ETHUSDT/30m/...
CSV file removed.
CSV file created successfully. --- D:/GitHub/tryStock/data/ETHUSDT/30m/featured.csv
Working on D:/GitHub/tryStock/data/BNBUSDT/5m/...
CSV file removed.
CSV file created s