In [4]:
from datetime import datetime

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import xgboost as xgb
import yfinance as yf

In [5]:
from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)

In [6]:
df = pd.read_parquet("../data/stock_data.parquet")
events = pd.read_parquet("../data/events.parquet")
dividends = pd.read_parquet("../data/dividends.parquet")
splits = pd.read_parquet("../data/splits.parquet")
# df

In [7]:

def fib_retracement(data: pd.DataFrame, high: str, low: str, target_column: str, levels: list = [0.236, 0.382, 0.618, 1.0]):
    for level in levels:
        data[f"{target_column}_fib_{level}"] = data[high] - (data[high] - data[low]) * level

    return data


def create_lag_features(data: pd.DataFrame, target_column, lag_steps=1):
    if isinstance(lag_steps, int):
        for i in range(1, lag_steps + 1):
            data[f"{target_column}_lag_{i}"] = data[target_column].shift(i)

    if isinstance(lag_steps, list):
        for i in lag_steps:
            data[f"{target_column}_lag_{i}"] = data[target_column].shift(i)

    return data


def simple_moving_average(data: pd.DataFrame, target_column: str | list, window_size: int | list = 3):
    if isinstance(target_column, list) and isinstance(window_size, list):
        for col in target_column:
            for window in window_size:
                data[f"{col}_rolling_mean_{window}"] = data[col].rolling(window=window).mean()

    elif isinstance(target_column, str):
        data[f"{target_column}_rolling_mean_{window_size}"] = data[target_column].rolling(window=window_size).mean()

    elif isinstance(target_column, list):
        for col in target_column:
            data[f"{col}_rolling_mean_{window_size}"] = data[col].rolling(window=window_size).mean()

    return data


def exponential_moving_average(data: pd.DataFrame, target_column: str | list, window_size: int | list = 3):
    if isinstance(target_column, str):
        target_column = [target_column]
    if isinstance(window_size, int):
        window_size = [window_size]

    for col in target_column:
        for window in window_size:
            data[f"{col}_rolling_exp_mean_{window}"] = data[col].ewm(span=window).mean()

    return data


def bollinger_bands(data: pd.DataFrame, target_column: str, window_size: int = 20):
    data[f"{target_column}_rolling_mean_{window_size}"] = data[target_column].rolling(window=window_size).mean()
    data[f"{target_column}_rolling_std_{window_size}"] = data[target_column].rolling(window=window_size).std()
    data[f"{target_column}_bollinger_upper_{window_size}"] = data[f"{target_column}_rolling_mean_{window_size}"] + 2 * data[
        f"{target_column}_rolling_std_{window_size}"]
    data[f"{target_column}_bollinger_lower_{window_size}"] = data[f"{target_column}_rolling_mean_{window_size}"] - 2 * data[
        f"{target_column}_rolling_std_{window_size}"]

    return data

# def plot_bollinger_bands(data: pd.DataFrame, target_column: str, window_size: int = 20):
#     data[f"{target_column}_rolling_mean_{window_size}"] = data[target_column].rolling(window=window_size).mean()
#     data[f"{target_column}_rolling_std_{window_size}"] = data[target_column].rolling(window=window_size).std()
#     data[f"{target_column}_bollinger_upper_{window_size}"] = data[f"{target_column}_rolling_mean_{window_size}"] + 2 * data[
#         f"{target_column}_rolling_std_{window_size}"]
#     data[f"{target_column}_bollinger_lower_{window_size}"] = data[f"{target_column}_rolling_mean_{window_size}"] - 2 * data[
#         f"{target_column}_rolling_std_{window_size}"]

#     plt.figure(figsize=(12, 6))
#     plt.plot(data[target_column], label="Close Price", color="blue")
#     plt.plot(data[f"{target_column}_rolling_mean_{window_size}"], label="Rolling Mean", color="red")
#     plt.plot(data[f"{target_column}_bollinger_upper_{window_size}"], label="Bollinger Upper", color="green")
#     plt.plot(data[f"{target_column}_bollinger_lower_{window_size}"], label="Bollinger Lower", color="green")
#     plt.title(f"{target_column} Bollinger Bands")
#     plt.legend()
#     plt.show()

#     return data


def average_true_range(data: pd.DataFrame, window_size: int = 14):
    data[f"high_low_{window_size}"] = data["high"] - data["low"]
    data[f"high_close_{window_size}"] = np.abs(data["high"] - data["close"].shift(1))
    data[f"low_close_{window_size}"] = np.abs(data["low"] - data["close"].shift(1))
    data[f"true_range_{window_size}"] = np.max(
        [data[f"high_low_{window_size}"], data[f"high_close_{window_size}"], data[f"low_close_{window_size}"]], axis=0)
    data[f"average_true_range_{window_size}"] = data[f"true_range_{window_size}"].rolling(window=window_size).mean()

    return data


def relative_strength_index(data: pd.DataFrame, column: str = "close", window_size: int = 14):
    delta = data[column].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=window_size).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=window_size).mean()
    rs = gain / loss
    data[f"rsi_{window_size}"] = 100 - (100 / (1 + rs))

    return data


def macd(data: pd.DataFrame, column: str = "close", short_window: int = 12, long_window: int = 26, signal_window: int = 9):
    data[f"short_ema_{short_window}"] = data[column].ewm(span=short_window, adjust=False).mean()
    data[f"long_ema_{long_window}"] = data[column].ewm(span=long_window, adjust=False).mean()
    data[f"macd_{short_window}_{long_window}"] = data[f"short_ema_{short_window}"] - data[f"long_ema_{long_window}"]
    data[f"signal_{signal_window}"] = data[f"macd_{short_window}_{long_window}"].ewm(span=signal_window, adjust=False).mean()
    data[f"macd_hist_{short_window}_{long_window}_{signal_window}"] = data[f"macd_{short_window}_{long_window}"] - data[f"signal_{signal_window}"]

    return data


def stochastic_oscillator(data: pd.DataFrame, window_size: int = 14):
    data[f"stochastic_oscillator_{window_size}"] = (data["close"] - data["low"].rolling(window=window_size).min()) / (
        data["high"].rolling(window=window_size).max() - data["low"].rolling(window=window_size).min())

    return data


def williams_r(data: pd.DataFrame, window_size: int = 14):
    data[f"williams_r_{window_size}"] = (data["high"].rolling(window=window_size).max() - data["close"]) / (
        data["high"].rolling(window=window_size).max() - data["low"].rolling(window=window_size).min())

    return data


def money_flow_index(data: pd.DataFrame, window_size: int = 14):
    typical_price = (data["high"] + data["low"] + data["close"]) / 3
    raw_money_flow = typical_price * data["volume"]
    money_flow_ratio = raw_money_flow.rolling(window=window_size).sum() / (typical_price * data["volume"]).rolling(
        window=window_size).sum()
    data[f"money_flow_index_{window_size}"] = 100 - (100 / (1 + money_flow_ratio))

    return data


def on_balance_volume(data: pd.DataFrame, offset: int = 1):
    data[f"on_balance_volume_{offset}"] = np.where(
        data["close"] > data["close"].shift(offset), data["volume"], -data["volume"])
    data[f"on_balance_volume_{offset}"] = data[f"on_balance_volume_{offset}"].cumsum()

    return data


def volume_weighted_average_price(data: pd.DataFrame, window_size: int = 14):
    data[f"volume_weighted_average_price_{window_size}"] = (
        data["close"] * data["volume"]).rolling(window=window_size).sum() / data["volume"].rolling(window=window_size).sum()

    return data


def volitility(data: pd.DataFrame, column: str = "close", window_size: int = 14):
    data[f"volitility_{column}_{window_size}"] = data[column].rolling(window=window_size).std()
    data[f"volitility_pct_change_{window_size}_{column}"] = data[column].pct_change().rolling(window=window_size).std()

    return data


def apply_technical_indicators(data: pd.DataFrame, target_column: str | list = "close", window_size: int | list = 14):

    if isinstance(target_column, str):
        target_column = [target_column]
    if isinstance(window_size, int):
        window_size = [window_size]

    for col in target_column:
        for window in window_size:
            data = volitility(data, col, window)
            data = average_true_range(data, window)
            data = simple_moving_average(data, col, window)
            data = exponential_moving_average(data, col, window)
            data = relative_strength_index(data, col, window)
            data = macd(data, col)
            data = stochastic_oscillator(data, window)
            data = williams_r(data, window)
            data = money_flow_index(data, window)
            data = on_balance_volume(data)
            data = volume_weighted_average_price(data, window)
            data = bollinger_bands(data, col, window)
            data = fib_retracement(data, "high", "low", col)

    return data


def forward_fill_with_decay(df: pd.DataFrame, column, decay_factor):
    """
    Forward fills missing values in a specified column with a decay factor applied.

    Parameters:
    - df: The DataFrame containing the data.
    - column: The column name (or index) to apply forward fill with decay.
    - decay_factor: The factor by which the previous value decays (0 < decay_factor < 1).

    Returns:
    - The DataFrame with missing values filled with decayed forward fill.
    """
    if not 0 < decay_factor < 1:
        raise ValueError("Decay factor must be between 0 and 1.")

    df = df.copy()  # Avoid modifying the original DataFrame
    df[column] = df[column].astype(float)  # Ensure column is of float type for NaNs

    # Initialize variables
    last_value = np.nan
    decay = 1.0

    for i in range(len(df)):
        if pd.notna(df.at[i, column]):
            # Update the last value and reset decay
            last_value = df.at[i, column]
            decay = 1.0
            df.at[i, column] = last_value * decay
            decay *= decay_factor  # Apply the decay factor for the next value

        elif pd.isna(df.at[i, column]) and pd.notna(last_value):
            # Apply decay to the last value and fill the current cell
            df.at[i, column] = last_value * decay
            decay *= decay_factor  # Apply the decay factor for the next value

    return df


def apply_event(data: pd.DataFrame, event: pd.DataFrame, prefix_name, falloff: bool = True, decay_factor: float = 0.99) -> pd.DataFrame:
    """Join Events df to data df based on Date"""
    event = event.copy(deep=True)

    event["date"] = pd.to_datetime(event["date"])

    # Rename Events columns to event_* to avoid conflicts
    for column in event.columns:
        if column != "date":
            event.rename(columns={column: f"event_{column}"}, inplace=True)

    df = data.merge(event, on="date", how="left").copy(deep=True)

    df["event_decay"] = np.where(pd.notna(df["event_name"]), 1, np.nan)
    df["event_name"] = df["event_name"].ffill()
    df["event_value"] = df["event_value"].ffill()
    df["event_sentiment"] = df["event_sentiment"].ffill()

    df = df.rename(columns={
        "event_name": f"{prefix_name}_event_name",
        "event_value": f"{prefix_name}_event_value",
        "event_sentiment": f"{prefix_name}_event_sentiment",
        "event_decay": f"{prefix_name}_event_decay"
    })

    # forward fill sentiment with decay
    if falloff:
        df = forward_fill_with_decay(df, f"{prefix_name}_event_decay", decay_factor)

    return df


def day_of_week(data: pd.DataFrame) -> pd.DataFrame:
    data["day_of_week"] = data["date"].dt.dayofweek.astype(int)
    return data


def day_of_month(data: pd.DataFrame) -> pd.DataFrame:
    data["day_of_month"] = data["date"].dt.day.astype(int)
    return data


def month(data: pd.DataFrame) -> pd.DataFrame:
    data["month"] = data["date"].dt.month.astype(int)
    return data


def year(data: pd.DataFrame) -> pd.DataFrame:
    data["year"] = data["date"].dt.year.astype(int)
    return data


def week_of_year(data: pd.DataFrame) -> pd.DataFrame:
    data["week_of_year"] = data["date"].dt.isocalendar().week.astype(int)
    return data


def is_not_nan_column_and_default(data: pd.DataFrame, column) -> pd.DataFrame:
    if isinstance(column, list):
        for col in column:
            data[f"is_nan_{col}"] = data[col].notna().astype(int)
            data[col] = data[col].fillna(0)
        return data

    if isinstance(column, str):
        data[f"is_nan_{column}"] = data[column].notna().astype(int)
        data[column] = data[column].fillna(0)
        return data


def future_value(data: pd.DataFrame, column: str, offset: int, drop_recent: bool = True) -> pd.DataFrame:
    data[f"target_{column}"] = data[column].shift(-offset)
    # Drop last X offset rows
    if drop_recent:  # Drop to remove NaNs
        data = drop_recent_with_offset(data, offset)

    return data


def drop_recent_with_offset(data: pd.DataFrame, offset: int) -> pd.DataFrame:
    return data[:-offset]


def percent_change_between_columns(data: pd.DataFrame, column: str, column2: str) -> pd.DataFrame:
    data[f"percent_change_{column}"] = (data[column2] - data[column]) / data[column]
    return data


def percent_change(data: pd.DataFrame, column: str, offset: int | list = 1, drop_recent: bool = True) -> pd.DataFrame:
    if isinstance(offset, int):
        offset = [offset]

    for off in offset:
        data[f"{column}_percent_change_{off}"] = (data[column].shift(-off) - data[column]) / data[column]

    if drop_recent:  # Drop to remove NaNs
        data = drop_recent_with_offset(data, max(offset))

    return data

SyntaxError: unterminated string literal (detected at line 104) (3757692847.py, line 104)

In [None]:
df

Unnamed: 0,date,open,high,low,close,adj close,volume
0,2004-08-10,0.542679,0.563214,0.541964,0.562857,0.475869,351036000
1,2004-08-11,0.555357,0.555893,0.540357,0.553750,0.468169,322392000
2,2004-08-12,0.543750,0.550893,0.540714,0.542321,0.458507,226200800
3,2004-08-13,0.546429,0.558571,0.542857,0.550714,0.465603,328048000
4,2004-08-16,0.553571,0.566429,0.547143,0.549643,0.464697,435674400
...,...,...,...,...,...,...,...
5030,2024-08-05,199.089996,213.500000,196.000000,209.270004,209.270004,119548600
5031,2024-08-06,205.300003,209.990005,201.070007,207.229996,207.229996,69660500
5032,2024-08-07,206.899994,213.639999,206.389999,209.820007,209.820007,63516400
5033,2024-08-08,213.110001,214.199997,208.830002,213.309998,213.309998,47161100


In [None]:
# plot_bollinger_bands(df, "close")

In [None]:
df = apply_technical_indicators(df, ["open", "close", "high", "low", "volume"], [7, 14, 30])
df

Unnamed: 0,date,open,high,low,close,adj close,volume,volitility_open_7,volitility_pct_change_7_open,high_low_7,...,volume_rolling_std_14,volume_bollinger_upper_14,volume_bollinger_lower_14,volitility_volume_30,volitility_pct_change_30_volume,volume_rolling_mean_30,volume_rolling_exp_mean_30,volume_rolling_std_30,volume_bollinger_upper_30,volume_bollinger_lower_30
0,2004-08-10,0.542679,0.563214,0.541964,0.562857,0.475869,351036000,,,0.021250,...,,,,,,,3.510360e+08,,,
1,2004-08-11,0.555357,0.555893,0.540357,0.553750,0.468169,322392000,,,0.015536,...,,,,,,,3.362366e+08,,,
2,2004-08-12,0.543750,0.550893,0.540714,0.542321,0.458507,226200800,,,0.010179,...,,,,,,,2.970865e+08,,,
3,2004-08-13,0.546429,0.558571,0.542857,0.550714,0.465603,328048000,,,0.015714,...,,,,,,,3.056175e+08,,,
4,2004-08-16,0.553571,0.566429,0.547143,0.549643,0.464697,435674400,,,0.019286,...,,,,,,,3.352086e+08,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5030,2024-08-05,199.089996,213.500000,196.000000,209.270004,209.270004,119548600,8.237186,0.036997,17.500000,...,2.439763e+07,1.081577e+08,1.056720e+07,1.842251e+07,0.328359,5.922292e+07,6.500188e+07,1.842251e+07,9.606794e+07,2.237789e+07
5031,2024-08-06,205.300003,209.990005,201.070007,207.229996,207.229996,69660500,9.265285,0.040549,8.919998,...,2.454090e+07,1.093239e+08,1.116026e+07,1.808478e+07,0.312469,5.885403e+07,6.530244e+07,1.808478e+07,9.502359e+07,2.268448e+07
5032,2024-08-07,206.899994,213.639999,206.389999,209.820007,209.820007,63516400,9.693821,0.041003,7.250000,...,2.450438e+07,1.090710e+08,1.105343e+07,1.809966e+07,0.307006,5.908078e+07,6.518721e+07,1.809966e+07,9.528010e+07,2.288147e+07
5033,2024-08-08,213.110001,214.199997,208.830002,213.309998,213.309998,47161100,9.380378,0.042942,5.369995,...,2.457822e+07,1.090765e+08,1.076359e+07,1.817486e+07,0.310946,5.844571e+07,6.402423e+07,1.817486e+07,9.479543e+07,2.209600e+07


In [None]:
# %%capture
df = percent_change(df, "close", [1, 7, 14, 30])
# df = future_value(df, "close", 1)
df

Unnamed: 0,date,open,high,low,close,adj close,volume,volitility_open_7,volitility_pct_change_7_open,high_low_7,...,volitility_pct_change_30_volume,volume_rolling_mean_30,volume_rolling_exp_mean_30,volume_rolling_std_30,volume_bollinger_upper_30,volume_bollinger_lower_30,close_percent_change_1,close_percent_change_7,close_percent_change_14,close_percent_change_30
0,2004-08-10,0.542679,0.563214,0.541964,0.562857,0.475869,351036000,,,0.021250,...,,,3.510360e+08,,,,-0.016180,-0.025697,0.082488,0.171321
1,2004-08-11,0.555357,0.555893,0.540357,0.553750,0.468169,322392000,,,0.015536,...,,,3.362366e+08,,,,-0.020639,-0.006772,0.112222,0.201871
2,2004-08-12,0.543750,0.550893,0.540714,0.542321,0.458507,226200800,,,0.010179,...,,,2.970865e+08,,,,0.015476,0.023379,0.180771,0.227858
3,2004-08-13,0.546429,0.558571,0.542857,0.550714,0.465603,328048000,,,0.015714,...,,,3.056175e+08,,,,-0.001945,0.035993,0.156292,0.216927
4,2004-08-16,0.553571,0.566429,0.547143,0.549643,0.464697,435674400,,,0.019286,...,,,3.352086e+08,,,,0.002924,0.073750,0.144574,0.235868
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5000,2024-06-21,210.389999,211.889999,207.110001,207.490005,207.490005,246421400,3.274162,0.032110,4.779999,...,0.465386,7.385993e+07,8.790852e+07,4.903756e+07,1.719351e+08,-2.421519e+07,0.003133,0.061593,0.111090,0.008579
5001,2024-06-24,207.720001,212.699997,206.589996,208.139999,208.139999,80727000,3.174396,0.019971,6.110001,...,0.488045,7.491807e+07,8.744519e+07,4.882428e+07,1.725666e+08,-2.273049e+07,0.004468,0.064428,0.126165,-0.004372
5002,2024-06-25,209.149994,211.380005,208.610001,209.070007,209.070007,56713900,3.384621,0.013439,2.770004,...,0.493489,7.511655e+07,8.546253e+07,4.873471e+07,1.725860e+08,-2.235286e+07,0.019993,0.082604,0.123164,0.003587
5003,2024-06-26,211.500000,214.860001,210.639999,213.250000,213.250000,66213200,3.319522,0.014561,4.220001,...,0.489928,7.492216e+07,8.422064e+07,4.875900e+07,1.724402e+08,-2.259585e+07,0.003986,0.068324,0.073294,0.000281


In [None]:


df = apply_event(df, dividends, prefix_name="dividends", falloff=True, decay_factor=0.95)
df = apply_event(df, splits, prefix_name="splits", falloff=True, decay_factor=0.95)
df = is_not_nan_column_and_default(df, ["dividends_event_value", "dividends_event_sentiment"])
df = is_not_nan_column_and_default(df, ["splits_event_value", "splits_event_sentiment"])
# df

df = create_lag_features(df, "open", lag_steps=list(range(1, 60)))
df = create_lag_features(df, "close", lag_steps=list(range(1, 60)))
df = create_lag_features(df, "high", lag_steps=list(range(1, 60)))
df = create_lag_features(df, "low", lag_steps=list(range(1, 60)))
df = create_lag_features(df, "volume", lag_steps=list(range(1, 60)))
df = create_lag_features(df, "close_percent_change_1", lag_steps=[1, 2, 3, 4, 5, 6, 7, 10, 14, 21, 30, 60])

df = day_of_week(df)
df = day_of_month(df)
df = month(df)
df = year(df)
df = week_of_year(df)


# df = create_lag_features(df, "event_name", lag_steps=list(range(1, 60)))
df

Unnamed: 0,date,open,high,low,close,adj close,volume,volitility_open_7,volitility_pct_change_7_open,high_low_7,...,close_percent_change_1_lag_10,close_percent_change_1_lag_14,close_percent_change_1_lag_21,close_percent_change_1_lag_30,close_percent_change_1_lag_60,day_of_week,day_of_month,month,year,week_of_year
0,2004-08-10,0.542679,0.563214,0.541964,0.562857,0.475869,351036000,,,0.021250,...,,,,,,1,10,8,2004,33
1,2004-08-11,0.555357,0.555893,0.540357,0.553750,0.468169,322392000,,,0.015536,...,,,,,,2,11,8,2004,33
2,2004-08-12,0.543750,0.550893,0.540714,0.542321,0.458507,226200800,,,0.010179,...,,,,,,3,12,8,2004,33
3,2004-08-13,0.546429,0.558571,0.542857,0.550714,0.465603,328048000,,,0.015714,...,,,,,,4,13,8,2004,33
4,2004-08-16,0.553571,0.566429,0.547143,0.549643,0.464697,435674400,,,0.019286,...,,,,,,0,16,8,2004,34
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5000,2024-06-21,210.389999,211.889999,207.110001,207.490005,207.490005,246421400,3.274162,0.032110,4.779999,...,0.012392,0.009259,-0.007538,0.010014,0.021213,4,21,6,2024,25
5001,2024-06-24,207.720001,212.699997,206.589996,208.139999,208.139999,80727000,3.174396,0.019971,6.110001,...,-0.019148,0.001649,-0.021058,-0.008235,-0.010559,0,24,6,2024,26
5002,2024-06-25,209.149994,211.380005,208.610001,209.070007,209.070007,56713900,3.384621,0.013439,2.770004,...,0.072649,0.007821,0.016588,0.017645,-0.008456,1,25,6,2024,26
5003,2024-06-26,211.500000,214.860001,210.639999,213.250000,213.250000,66213200,3.319522,0.014561,4.220001,...,0.028578,-0.007097,0.000053,0.006173,-0.006999,2,26,6,2024,26


In [None]:
df.filter(like= "open")

Unnamed: 0,open,volitility_open_7,volitility_pct_change_7_open,open_rolling_mean_7,open_rolling_exp_mean_7,open_rolling_std_7,open_bollinger_upper_7,open_bollinger_lower_7,open_fib_0.236,open_fib_0.382,...,open_lag_50,open_lag_51,open_lag_52,open_lag_53,open_lag_54,open_lag_55,open_lag_56,open_lag_57,open_lag_58,open_lag_59
0,0.542679,,,,0.542679,,,,0.558199,0.555097,...,,,,,,,,,,
1,0.555357,,,,0.549924,,,,0.552227,0.549958,...,,,,,,,,,,
2,0.543750,,,,0.547254,,,,0.548491,0.547005,...,,,,,,,,,,
3,0.546429,,,,0.546952,,,,0.554862,0.552568,...,,,,,,,,,,
4,0.553571,,,,0.549122,,,,0.561878,0.559062,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5000,210.389999,3.274162,0.032110,213.034284,210.637411,3.274162,219.582608,206.485961,210.761920,210.064040,...,168.800003,168.699997,169.029999,169.589996,170.289993,168.789993,169.080002,171.190002,171.750000,170.410004
5001,207.720001,3.174396,0.019971,213.084285,209.908059,3.174396,219.433077,206.735493,211.258037,210.365977,...,168.339996,168.800003,168.699997,169.029999,169.589996,170.289993,168.789993,169.080002,171.190002,171.750000
5002,209.149994,3.384621,0.013439,212.285712,209.718542,3.384621,219.054954,205.516471,210.726284,210.321863,...,174.259995,168.339996,168.800003,168.699997,169.029999,169.589996,170.289993,168.789993,169.080002,171.190002
5003,211.500000,3.319522,0.014561,211.949997,210.163907,3.319522,218.589040,205.310954,213.864080,213.247960,...,175.360001,174.259995,168.339996,168.800003,168.699997,169.029999,169.589996,170.289993,168.789993,169.080002


In [None]:
df.to_parquet("../data/dataset.parquet")