In [None]:
# Import necessary libraries
import os
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

In [None]:
def get_business_days(year):
    """
    Generate a list of all business days (Monday to Friday) for a given year.

    Args:
        year (int): The year (e.g., 2010).

    Returns:
        list: A list of dates in 'YYYY-MM-DD' format for all business days of the year.
    """
    start_date = datetime(year, 1, 1)
    end_date = datetime(year, 12, 31)
    current_date = start_date
    business_days = []
    while current_date <= end_date:
        if current_date.weekday() < 5:  # Monday to Friday
            business_days.append(current_date.strftime("%Y-%m-%d"))
        current_date += timedelta(days=1)
    return business_days

In [None]:
def process_commodity(commodity, input_folder, output_folder, year):
    """
    Create a CSV file for a single commodity by concatenating all valid parquet files for the specified year.

    Args:
        commodity (str): The commodity code (e.g., 'CL', 'GC').
        input_folder (str): Path to the folder containing commodity subfolders.
        output_folder (str): Path to save the combined CSV.
        year (int): The year to process (e.g., 2010).

    Returns:
        None: Saves the concatenated DataFrame as a CSV.
    """
    # Get all business days for the year
    dates = get_business_days(year)
    dataframes = []

    # Path to the commodity-specific folder
    commodity_folder = os.path.join(input_folder, commodity)

    # Process each date
    for date in dates:
        file_name = f"{date}-{commodity}-bbo_month_chain.parquet"
        file_path = os.path.join(commodity_folder, file_name)
        if os.path.exists(file_path):
            try:
                # Attempt to read the Parquet file
                df = pd.read_parquet(file_path)
                dataframes.append(df)
            except Exception as e:
                print(f"Skipping invalid file: {file_path}. Error: {e}")

    # Concatenate and save if there are any valid DataFrames
    if dataframes:
        concatenated_df = pd.concat(dataframes, ignore_index=True)
        output_csv = os.path.join(output_folder, f"{commodity}_2010.csv")
        concatenated_df.to_csv(output_csv, index=False)
        print(f"CSV created for {commodity}: {output_csv}")
    else:
        print(f"No valid data found for {commodity} in {year}.")

In [None]:
def process_all_commodities(input_folder, output_folder, commodities, year):
    """
    Create CSV files for all specified commodities by processing all days of the specified year.

    Args:
        input_folder (str): Path to the folder containing commodity subfolders.
        output_folder (str): Path to save the combined CSVs.
        commodities (list): List of commodity codes (e.g., ['CL', 'GC', 'NG']).
        year (int): The year to process (e.g., 2010).

    Returns:
        None: Processes and saves CSV files for all commodities.
    """
    os.makedirs(output_folder, exist_ok=True)
    for commodity in commodities:
        print(f"Processing commodity: {commodity}")
        process_commodity(commodity, input_folder, output_folder, year)

In [None]:
if __name__ == "__main__":
    import os
    import pandas as pd
    from datetime import datetime, timedelta

    # Input and output folders
    input_folder = "Commodities"
    output_folder = "Commodities"

    # List of commodity codes
    commodities = ['CL', 'GC', 'HG', 'HO', 'NG', 'PA', 'PL', 'RB', 'SI', 'ZC', 'ZO']
    
    # Year to process
    year = 2010

    # Process all commodities for the year 2010
    process_all_commodities(input_folder, output_folder, commodities, year)

In [None]:
def readjust_csv(input_csv, output_csv):
    """
    Readjust a CSV file by:
    - Splitting the 'index' column into 'date' and 'time_utc'.
    - Renaming columns ('bid', 'bidQ', 'ask', 'askQ').
    - Saving the modified DataFrame to a new CSV.

    Args:
        input_csv (str): Path to the input CSV file.
        output_csv (str): Path to save the readjusted CSV file.

    Returns:
        None: Saves the readjusted DataFrame as a new CSV.
    """
    # Load the CSV into a DataFrame
    df = pd.read_csv(input_csv)

    # Convert the 'index' column to datetime with fallback to handle inconsistent formats
    try:
        df['index'] = pd.to_datetime(df['index'], format='ISO8601', utc=True, errors='coerce')
    except Exception as e:
        raise ValueError(f"Error converting 'index' in {input_csv}: {e}")

    # Check for missing or invalid timestamps
    if df['index'].isnull().any():
        raise ValueError(f"Invalid or missing datetime values found in 'index' column of {input_csv}.")

    # Extract 'date' and 'time_utc' columns
    df['date'] = df['index'].dt.date
    df['time_utc'] = df['index'].dt.strftime('%H:%M:%S.%f').str.rstrip('0').str.rstrip('.')

    # Drop the original 'index' column
    df.drop(columns=['index'], inplace=True)

    # Rename columns
    df.rename(
        columns={
            "bid": "bid_price",
            "bidQ": "bid_quantity",
            "ask": "ask_price",
            "askQ": "ask_quantity",
        },
        inplace=True,
    )

    # Reorder columns to place 'date' and 'time_utc' at the beginning
    cols = ['date', 'time_utc'] + [col for col in df.columns if col not in ['date', 'time_utc']]
    df = df[cols]

    # Save the readjusted DataFrame to a new CSV
    df.to_csv(output_csv, index=False)
    print(f"Readjusted CSV saved: {output_csv}")

In [None]:
def readjust_all_csvs(input_folder, output_folder, commodities):
    """
    Readjust all CSV files for the specified commodities.

    Args:
        input_folder (str): Path to the folder containing the raw CSV files.
        output_folder (str): Path to save the readjusted CSV files.
        commodities (list): List of commodity codes (e.g., ['CL', 'GC', 'NG']).

    Returns:
        None: Readjusts and saves CSV files for all commodities.
    """
    # Ensure the output folder exists
    os.makedirs(output_folder, exist_ok=True)

    for commodity in commodities:
        input_csv = os.path.join(input_folder, f"{commodity}_2010.csv")
        output_csv = os.path.join(output_folder, f"{commodity}_2010_readjust.csv")

        if os.path.exists(input_csv):
            print(f"Readjusting {input_csv}...")
            readjust_csv(input_csv, output_csv)
        else:
            print(f"Input CSV not found for {commodity}: {input_csv}")

In [None]:
if __name__ == "__main__":
    import os
    import pandas as pd

    # Input and output folders
    input_folder = "Commodities"
    output_folder = "Commodities"

    # List of commodity codes
    #commodities = ['CL', 'GC', 'HG', 'HO', 'NG', 'PA', 'PL', 'RB', 'SI', 'ZC', 'ZO']
    commodities = ['CL']
    
    # Readjust all CSV files
    readjust_all_csvs(input_folder, output_folder, commodities)

In [None]:
def transform_csv(input_csv, output_csv):
    """
    Transform a CSV to aggregate rows by minute, ensuring:
    - Weighted averages for bid_price and ask_price.
    - Total sums for bid_quantity and ask_quantity.
    - Only includes rows where data exists in the original CSV.

    Args:
        input_csv (str): Path to the input CSV file.
        output_csv (str): Path to save the transformed CSV file.

    Returns:
        None: Saves the transformed DataFrame as a new CSV.
    """
    # Load the CSV into a DataFrame
    df = pd.read_csv(input_csv)

    # Combine 'date' and 'time_utc' into a single datetime column
    df['datetime'] = pd.to_datetime(df['date'] + ' ' + df['time_utc'], errors='coerce')

    # Drop rows with invalid datetime values
    df = df.dropna(subset=['datetime'])

    # Floor datetime to the nearest minute to group rows
    df['minute'] = df['datetime'].dt.floor('min')

    # Group rows by minute and aggregate
    grouped = df.groupby('minute', group_keys=False).apply(
        lambda x: pd.Series({
            'bid_price': (x['bid_price'] * x['bid_quantity']).sum() / x['bid_quantity'].sum() if x['bid_quantity'].sum() > 0 else None,
            'bid_quantity': x['bid_quantity'].sum(),
            'ask_price': (x['ask_price'] * x['ask_quantity']).sum() / x['ask_quantity'].sum() if x['ask_quantity'].sum() > 0 else None,
            'ask_quantity': x['ask_quantity'].sum(),
        })
    ).reset_index()

    # Extract 'date' and 'time_utc' from the 'minute' column
    grouped.rename(columns={'minute': 'datetime'}, inplace=True)
    grouped['date'] = grouped['datetime'].dt.date
    grouped['time_utc'] = grouped['datetime'].dt.strftime('%H:%M:%S')

    # Reorder columns
    grouped = grouped[['date', 'time_utc', 'bid_price', 'bid_quantity', 'ask_price', 'ask_quantity']]

    # Save the transformed DataFrame to a new CSV
    grouped.to_csv(output_csv, index=False)
    print(f"Transformed CSV saved: {output_csv}")

In [None]:
def transform_all_csvs(input_folder, output_folder, commodities):
    """
    Transform all CSV files for the specified commodities to aggregate rows by minute.

    Args:
        input_folder (str): Path to the folder containing the raw CSV files.
        output_folder (str): Path to save the transformed CSV files.
        commodities (list): List of commodity codes (e.g., ['CL', 'GC', 'NG']).

    Returns:
        None: Transforms and saves CSV files for all commodities.
    """
    # Ensure the output folder exists
    os.makedirs(output_folder, exist_ok=True)

    for commodity in commodities:
        input_csv = os.path.join(input_folder, f"{commodity}_2010_readjust.csv")
        output_csv = os.path.join(output_folder, f"{commodity}_2010_transformed.csv")

        if os.path.exists(input_csv):
            print(f"Transforming {input_csv}...")
            transform_csv(input_csv, output_csv)
        else:
            print(f"Input CSV not found for {commodity}: {input_csv}")

In [None]:
if __name__ == "__main__":
    import os
    import pandas as pd

    # Input and output folders
    input_folder = "Commodities"
    output_folder = "Commodities"

    # List of commodity codes
    commodities = ['CL', 'GC', 'HG', 'HO', 'NG', 'PA', 'PL', 'RB', 'SI', 'ZC', 'ZO']

    # Transform all CSV files
    transform_all_csvs(input_folder, output_folder, commodities)

In [None]:
def clean_transformed_csv(input_csv, output_csv, volatility_window=10):
    """
    Clean a transformed CSV by adding calculated columns, including:
    - mid_price: (bid_price + ask_price) / 2
    - order_density: bid_quantity + ask_quantity
    - spread: ask_price - bid_price
    - log_return: log(mid_price_t / mid_price_t-1)
    - volatility: rolling standard deviation of log_return.

    Args:
        input_csv (str): Path to the transformed CSV file.
        output_csv (str): Path to save the cleaned CSV file.
        volatility_window (int): Rolling window size for volatility calculation.

    Returns:
        None: Saves the cleaned DataFrame as a new CSV.
    """
    # Load the transformed CSV into a DataFrame
    df = pd.read_csv(input_csv)

    # Calculate mid_price
    df['mid_price'] = (df['bid_price'] + df['ask_price']) / 2

    # Calculate order_density
    df['order_density'] = df['bid_quantity'] + df['ask_quantity']

    # Calculate spread
    df['spread'] = df['ask_price'] - df['bid_price']

    # Calculate log_return
    df['log_return'] = np.log(df['mid_price'] / df['mid_price'].shift(1))

    # Calculate volatility (rolling standard deviation of log_return)
    df['volatility'] = df['log_return'].rolling(window=volatility_window, min_periods=1).std()

    # Save the cleaned DataFrame to a new CSV
    df.to_csv(output_csv, index=False)
    print(f"Cleaned CSV with volatility saved: {output_csv}")

In [None]:
def clean_all_transformed_csvs(input_folder, output_folder, commodities):
    """
    Clean all transformed CSV files by adding additional calculated columns.

    Args:
        input_folder (str): Path to the folder containing transformed CSV files.
        output_folder (str): Path to save the cleaned CSV files.
        commodities (list): List of commodity codes (e.g., ['CL', 'GC', 'NG']).

    Returns:
        None: Cleans and saves CSV files for all commodities.
    """
    # Ensure the output folder exists
    os.makedirs(output_folder, exist_ok=True)

    for commodity in commodities:
        input_csv = os.path.join(input_folder, f"{commodity}_2010_transformed.csv")
        output_csv = os.path.join(output_folder, f"{commodity}_2010_cleaned.csv")

        if os.path.exists(input_csv):
            print(f"Cleaning {input_csv}...")
            clean_transformed_csv(input_csv, output_csv)
        else:
            print(f"Transformed CSV not found for {commodity}: {input_csv}")

In [None]:
if __name__ == "__main__":
    import os
    import pandas as pd

    # Input and output folders
    input_folder = "Commodities"
    output_folder = "Commodities"

    # List of commodity codes
    commodities = ['CL', 'GC', 'HG', 'HO', 'NG', 'PA', 'PL', 'RB', 'SI', 'ZC', 'ZO']

    # Clean all transformed CSV files
    clean_all_transformed_csvs(input_folder, output_folder, commodities)

In [78]:
def display_cleaned_csvs(input_folder, commodities, commodity_names):
    """
    Display all cleaned CSVs as DataFrames with a descriptive message.

    Args:
        input_folder (str): Path to the folder containing cleaned CSV files.
        commodities (list): List of commodity codes (e.g., ['CL', 'GC', 'NG']).
        commodity_names (dict): Dictionary mapping commodity codes to their descriptions.

    Returns:
        None: Displays DataFrames with descriptive messages.
    """
    for commodity in commodities:
        cleaned_csv = os.path.join(input_folder, f"{commodity}_2010_cleaned.csv")

        if os.path.exists(cleaned_csv):
            description = commodity_names.get(commodity, "Unknown description")
            print(f"\nDisplaying the cleaned dataframe corresponding to the {commodity} commodity: {description}")
            df = pd.read_csv(cleaned_csv)
            display(df)  # Display the DataFrame in the notebook
        else:
            print(f"Cleaned CSV not found for {commodity}: {cleaned_csv}")

# Specify the folder and commodities
input_folder = "Commodities"
commodities = ['CL', 'GC', 'HG', 'HO', 'NG', 'PA', 'PL', 'RB', 'SI', 'ZC', 'ZO']

# Provide commodity descriptions
commodity_names = {
    'CL': 'Crude Oil',
    'GC': 'Gold',
    'HG': 'Copper',
    'HO': 'Heating Oil',
    'NG': 'Natural Gas',
    'PA': 'Palladium',
    'PL': 'Platinum',
    'RB': 'Gasoline',
    'SI': 'Silver',
    'ZC': 'Corn',
    'ZO': 'Oats',
}

# Display all cleaned CSVs
display_cleaned_csvs(input_folder, commodities, commodity_names)


Displaying the cleaned dataframe corresponding to the CL commodity: Crude Oil


Unnamed: 0,date,time_utc,bid_price,bid_quantity,ask_price,ask_quantity,mid_price,order_density,spread,log_return,volatility
0,2010-01-04,05:00:00,13354.607377,30.0,13357.908855,168.0,13356.258116,198.0,3.301478,,
1,2010-01-04,05:01:00,13354.607377,4.0,13357.918710,52.0,13356.263044,56.0,3.311333,3.689341e-07,
2,2010-01-04,05:02:00,13354.607377,5.0,13357.918710,75.0,13356.263044,80.0,3.311333,0.000000e+00,2.608758e-07
3,2010-01-04,05:03:00,13353.613977,5.0,13357.888607,55.0,13355.751292,60.0,4.274630,-3.831621e-05,2.222914e-05
4,2010-01-04,05:04:00,13352.951710,3.0,13356.263044,9.0,13354.607377,12.0,3.311333,-8.565330e-05,4.076551e-05
...,...,...,...,...,...,...,...,...,...,...,...
152159,2010-11-19,19:26:00,2222.966763,2812.0,2223.666796,2379.0,2223.316780,5191.0,0.700033,-1.579401e-04,5.140127e-04
152160,2010-11-19,19:27:00,2224.705619,5730.0,2225.212281,4631.0,2224.958950,10361.0,0.506662,7.383401e-04,5.608628e-04
152161,2010-11-19,19:28:00,2225.532741,2662.0,2226.412829,3171.0,2225.972785,5833.0,0.880089,4.555608e-04,5.144656e-04
152162,2010-11-19,19:29:00,2226.396814,1084.0,2227.867892,9774.0,2227.132353,10858.0,1.471079,5.207908e-04,4.913597e-04



Displaying the cleaned dataframe corresponding to the GC commodity: Gold


Unnamed: 0,date,time_utc,bid_price,bid_quantity,ask_price,ask_quantity,mid_price,order_density,spread,log_return,volatility
0,2010-01-04,05:00:00,718.647778,383.0,719.000040,364.0,718.823909,747.0,0.352262,,
1,2010-01-04,05:01:00,718.830086,1070.0,719.122249,591.0,718.976168,1661.0,0.292163,0.000212,
2,2010-01-04,05:02:00,718.735651,150.0,719.030552,207.0,718.883101,357.0,0.294901,-0.000129,0.000241
3,2010-01-04,05:03:00,718.597532,218.0,719.021307,305.0,718.809419,523.0,0.423775,-0.000103,0.000190
4,2010-01-04,05:04:00,718.718644,175.0,719.180632,165.0,718.949638,340.0,0.461988,0.000195,0.000185
...,...,...,...,...,...,...,...,...,...,...,...
266521,2010-12-29,18:27:00,959.952673,534.0,960.274910,160.0,960.113792,694.0,0.322237,-0.000041,0.000174
266522,2010-12-29,18:28:00,959.897906,689.0,960.108552,121.0,960.003229,810.0,0.210646,-0.000115,0.000176
266523,2010-12-29,18:29:00,959.694780,1643.0,960.059987,392.0,959.877383,2035.0,0.365207,-0.000131,0.000176
266524,2010-12-29,18:30:00,954.681960,6.0,960.274992,31.0,957.478476,37.0,5.593033,-0.002502,0.000800



Displaying the cleaned dataframe corresponding to the HG commodity: Copper


Unnamed: 0,date,time_utc,bid_price,bid_quantity,ask_price,ask_quantity,mid_price,order_density,spread,log_return,volatility
0,2010-11-26,05:00:00,373.720989,374.0,373.904728,624.0,373.812858,998.0,0.183738,,
1,2010-11-26,05:01:00,373.750000,70.0,373.913536,181.0,373.831768,251.0,0.163536,0.000051,
2,2010-11-26,05:02:00,373.751570,223.0,373.903611,180.0,373.827590,403.0,0.152042,-0.000011,0.000044
3,2010-11-26,05:03:00,373.852757,526.0,374.000000,245.0,373.926378,771.0,0.147243,0.000264,0.000145
4,2010-11-26,05:04:00,373.730451,133.0,373.856067,239.0,373.793259,372.0,0.125616,-0.000356,0.000257
...,...,...,...,...,...,...,...,...,...,...,...
23740,2010-12-29,17:56:00,429.901587,63.0,430.357389,203.0,430.129488,266.0,0.455802,0.005157,0.003461
23741,2010-12-29,17:57:00,430.117544,57.0,430.522121,165.0,430.319833,222.0,0.404577,0.000442,0.003463
23742,2010-12-29,17:58:00,425.103061,147.0,430.838261,690.0,427.970661,837.0,5.735200,-0.005474,0.003177
23743,2010-12-29,17:59:00,427.420039,257.0,430.912304,829.0,429.166171,1086.0,3.492265,0.002790,0.003148



Displaying the cleaned dataframe corresponding to the HO commodity: Heating Oil


Unnamed: 0,date,time_utc,bid_price,bid_quantity,ask_price,ask_quantity,mid_price,order_density,spread,log_return,volatility
0,2010-01-04,05:13:00,93.438540,3.0,93.483556,8.0,93.461048,11.0,0.045016,,
1,2010-01-04,05:14:00,93.422567,1.0,93.483556,1.0,93.453061,2.0,0.060989,-0.000085,
2,2010-01-04,05:15:00,93.370290,1.0,93.483556,1.0,93.426923,2.0,0.113266,-0.000280,0.000137
3,2010-01-04,05:25:00,93.339795,15.0,93.548902,8.0,93.444349,23.0,0.209106,0.000186,0.000234
4,2010-01-04,05:26:00,93.374646,7.0,93.553258,4.0,93.463952,11.0,0.178612,0.000210,0.000234
...,...,...,...,...,...,...,...,...,...,...,...
170953,2010-11-30,19:26:00,4.086745,850.0,4.091561,698.0,4.089153,1548.0,0.004815,-0.000168,0.001073
170954,2010-11-30,19:27:00,4.071108,496.0,4.088014,772.0,4.079561,1268.0,0.016906,-0.002348,0.001241
170955,2010-11-30,19:28:00,4.064868,1200.0,4.077605,979.0,4.071237,2179.0,0.012737,-0.002043,0.001185
170956,2010-11-30,19:29:00,4.066956,743.0,4.078904,1384.0,4.072930,2127.0,0.011948,0.000416,0.001229



Displaying the cleaned dataframe corresponding to the NG commodity: Natural Gas


Unnamed: 0,date,time_utc,bid_price,bid_quantity,ask_price,ask_quantity,mid_price,order_density,spread,log_return,volatility
0,2010-01-04,05:00:00,0.000917,82.0,0.000918,20.0,0.000918,102.0,1.017796e-06,,
1,2010-01-04,05:01:00,0.000917,30.0,0.000918,18.0,0.000918,48.0,9.569052e-07,0.000406,
2,2010-01-04,05:02:00,0.000917,47.0,0.000919,38.0,0.000918,85.0,1.208202e-06,0.000214,0.000136
3,2010-01-04,05:03:00,0.000917,6.0,0.000918,3.0,0.000918,9.0,9.355219e-07,-0.000261,0.000343
4,2010-01-04,05:04:00,0.000917,4.0,0.000918,2.0,0.000918,6.0,7.216883e-07,-0.000146,0.000310
...,...,...,...,...,...,...,...,...,...,...,...
156254,2010-11-24,19:26:00,0.001188,1134.0,0.001190,998.0,0.001189,2132.0,1.787907e-06,0.000453,0.001013
156255,2010-11-24,19:27:00,0.001186,787.0,0.001187,498.0,0.001186,1285.0,1.166307e-06,-0.002000,0.001106
156256,2010-11-24,19:28:00,0.001184,1569.0,0.001186,594.0,0.001185,2163.0,1.329664e-06,-0.001156,0.001095
156257,2010-11-24,19:29:00,0.001183,3882.0,0.001188,681.0,0.001186,4563.0,4.974143e-06,0.000551,0.001158



Displaying the cleaned dataframe corresponding to the PA commodity: Palladium


Unnamed: 0,date,time_utc,bid_price,bid_quantity,ask_price,ask_quantity,mid_price,order_density,spread,log_return,volatility
0,2010-01-04,05:00:00,413.000000,20.0,413.300000,9.0,413.150000,29.0,0.300000,,
1,2010-01-04,05:12:00,413.375000,2.0,413.700000,2.0,413.537500,4.0,0.325000,0.000937,
2,2010-01-04,05:16:00,413.500000,1.0,413.700000,1.0,413.600000,2.0,0.200000,0.000151,0.000556
3,2010-01-04,05:25:00,413.450000,4.0,413.716667,3.0,413.583333,7.0,0.266667,-0.000040,0.000518
4,2010-01-04,05:26:00,413.000000,10.0,413.450000,3.0,413.225000,13.0,0.450000,-0.000867,0.000741
...,...,...,...,...,...,...,...,...,...,...,...
178416,2010-12-29,17:55:00,792.271429,7.0,795.085714,14.0,793.678571,21.0,2.814286,0.000020,0.000119
178417,2010-12-29,17:57:00,787.285714,28.0,794.996552,58.0,791.141133,86.0,7.710837,-0.003202,0.001031
178418,2010-12-29,17:58:00,786.917143,70.0,793.951678,149.0,790.434410,219.0,7.034535,-0.000894,0.001025
178419,2010-12-29,17:59:00,790.090909,22.0,793.748684,38.0,791.919797,60.0,3.657775,0.001877,0.001253



Displaying the cleaned dataframe corresponding to the PL commodity: Platinum


Unnamed: 0,date,time_utc,bid_price,bid_quantity,ask_price,ask_quantity,mid_price,order_density,spread,log_return,volatility
0,2010-01-04,05:00:00,1483.6720,100.0,1491.70,100.0,1487.68600,200.0,8.0280,,
1,2010-01-04,05:01:00,1483.5875,8.0,1491.70,8.0,1487.64375,16.0,8.1125,-0.000028,
2,2010-01-04,05:02:00,1482.6000,1.0,1491.70,1.0,1487.15000,2.0,9.1000,-0.000332,0.000215
3,2010-01-04,05:04:00,1482.6000,2.0,1491.55,2.0,1487.07500,4.0,8.9500,-0.000050,0.000169
4,2010-01-04,05:07:00,1482.8500,2.0,1491.50,2.0,1487.17500,4.0,8.6500,0.000067,0.000172
...,...,...,...,...,...,...,...,...,...,...,...
143977,2010-12-23,21:31:00,1664.5000,2.0,1739.60,1.0,1702.05000,3.0,75.1000,0.000000,0.000279
143978,2010-12-23,21:34:00,1664.5000,1.0,1739.60,1.0,1702.05000,2.0,75.1000,0.000000,0.000279
143979,2010-12-23,21:37:00,1664.7000,1.0,1739.60,1.0,1702.15000,2.0,74.9000,0.000059,0.000275
143980,2010-12-23,21:40:00,1665.3000,1.0,1739.60,1.0,1702.45000,2.0,74.3000,0.000176,0.000238



Displaying the cleaned dataframe corresponding to the RB commodity: Gasoline


Unnamed: 0,date,time_utc,bid_price,bid_quantity,ask_price,ask_quantity,mid_price,order_density,spread,log_return,volatility
0,2010-01-04,05:04:00,2.091329,7.0,2.095000,27.0,2.093164,34.0,0.003671,,
1,2010-01-04,05:07:00,2.091300,4.0,2.094900,11.0,2.093100,15.0,0.003600,-0.000031,
2,2010-01-04,05:08:00,2.091300,2.0,2.095000,9.0,2.093150,11.0,0.003700,0.000024,0.000039
3,2010-01-04,05:20:00,2.091400,1.0,2.095000,9.0,2.093200,10.0,0.003600,0.000024,0.000032
4,2010-01-04,05:21:00,2.091300,2.0,2.095000,9.0,2.093150,11.0,0.003700,-0.000024,0.000030
...,...,...,...,...,...,...,...,...,...,...,...
167886,2010-11-30,19:26:00,2.245919,351.0,2.248455,262.0,2.247187,613.0,0.002536,-0.001287,0.001655
167887,2010-11-30,19:27:00,2.247248,427.0,2.248960,243.0,2.248104,670.0,0.001712,0.000408,0.001655
167888,2010-11-30,19:28:00,2.240292,497.0,2.242843,576.0,2.241567,1073.0,0.002552,-0.002912,0.001605
167889,2010-11-30,19:29:00,2.237412,945.0,2.242326,793.0,2.239869,1738.0,0.004915,-0.000758,0.001537



Displaying the cleaned dataframe corresponding to the SI commodity: Silver


Unnamed: 0,date,time_utc,bid_price,bid_quantity,ask_price,ask_quantity,mid_price,order_density,spread,log_return,volatility
0,2010-01-04,05:00:00,460.969054,118.0,461.169980,177.0,461.069517,295.0,0.200926,,
1,2010-01-04,05:01:00,460.930783,72.0,461.169055,67.0,461.049919,139.0,0.238272,-0.000043,
2,2010-01-04,05:02:00,460.908202,54.0,461.179165,56.0,461.043684,110.0,0.270963,-0.000014,0.000020
3,2010-01-04,05:03:00,460.908202,54.0,461.142216,66.0,461.025209,120.0,0.234014,-0.000040,0.000016
4,2010-01-04,05:04:00,460.885919,152.0,461.169724,287.0,461.027822,439.0,0.283805,0.000006,0.000023
...,...,...,...,...,...,...,...,...,...,...,...
269503,2010-12-29,18:21:00,908.863998,84.0,910.110916,52.0,909.487457,136.0,1.246918,0.000280,0.000316
269504,2010-12-29,18:22:00,909.221081,77.0,910.476066,43.0,909.848574,120.0,1.254985,0.000397,0.000328
269505,2010-12-29,18:23:00,909.747470,228.0,911.069709,206.0,910.408589,434.0,1.322239,0.000615,0.000364
269506,2010-12-29,18:24:00,909.866630,205.0,911.214482,153.0,910.540556,358.0,1.347852,0.000145,0.000364



Displaying the cleaned dataframe corresponding to the ZC commodity: Corn


Unnamed: 0,date,time_utc,bid_price,bid_quantity,ask_price,ask_quantity,mid_price,order_density,spread,log_return,volatility
0,2010-01-04,06:00:00,278.981368,36.0,279.147924,26.0,279.064646,62.0,0.166556,,
1,2010-01-04,06:01:00,278.981368,45.0,279.147924,42.0,279.064646,87.0,0.166556,2.220446e-16,
2,2010-01-04,06:02:00,278.981368,27.0,279.147924,6.0,279.064646,33.0,0.166556,-2.220446e-16,3.140185e-16
3,2010-01-04,06:05:00,278.981368,19.0,279.147924,5.0,279.064646,24.0,0.166556,2.220446e-16,2.563950e-16
4,2010-01-04,06:06:00,278.981368,18.0,279.147924,7.0,279.064646,25.0,0.166556,-2.220446e-16,2.563950e-16
...,...,...,...,...,...,...,...,...,...,...,...
102373,2010-12-14,17:59:00,456.825328,259.0,465.786032,113.0,461.305680,372.0,8.960704,-7.451205e-03,2.370608e-03
102374,2010-12-14,18:00:00,456.375455,192.0,467.784841,16.0,462.080148,208.0,11.409386,1.677454e-03,2.471062e-03
102375,2010-12-14,18:01:00,456.375455,36.0,472.277918,4.0,464.326686,40.0,15.902463,4.850014e-03,3.024645e-03
102376,2010-12-14,18:16:00,433.556682,2.0,476.569059,2.0,455.062870,4.0,43.012377,-2.015278e-02,7.015643e-03



Displaying the cleaned dataframe corresponding to the ZO commodity: Oats


Unnamed: 0,date,time_utc,bid_price,bid_quantity,ask_price,ask_quantity,mid_price,order_density,spread,log_return,volatility
0,2010-01-04,00:09:00,276.750,3.0,277.25,2.0,277.0000,5.0,0.500,,
1,2010-01-04,00:46:00,276.125,2.0,277.25,2.0,276.6875,4.0,1.125,-0.001129,
2,2010-01-04,01:31:00,276.250,1.0,277.25,6.0,276.7500,7.0,1.000,0.000226,0.000958
3,2010-01-04,01:56:00,276.250,1.0,277.00,1.0,276.6250,2.0,0.750,-0.000452,0.000677
4,2010-01-04,02:18:00,276.250,1.0,277.00,2.0,276.6250,3.0,0.750,0.000000,0.000597
...,...,...,...,...,...,...,...,...,...,...,...
29826,2010-12-14,17:29:00,375.250,4.0,387.00,3.0,381.1250,7.0,11.750,-0.000060,0.000385
29827,2010-12-14,17:43:00,375.250,4.0,387.00,2.0,381.1250,6.0,11.750,0.000000,0.000277
29828,2010-12-14,17:45:00,375.250,2.0,387.00,2.0,381.1250,4.0,11.750,0.000000,0.000266
29829,2010-12-14,18:01:00,374.500,2.0,387.25,1.0,380.8750,3.0,12.750,-0.000656,0.000284
