In [2]:
import pandas as pd


In [45]:
trade = pd.read_parquet('rel_trade.parquet')
order = pd.read_parquet('rel_order.parquet')

In [59]:
order.head()

Unnamed: 0,TimeStamp,Side,Price,Volume_Disclosed,Price_Type,Symbol,Original_Qty,Order Number,IOC Flag,Activity Type
16358228,79631194602407,B,866.0,0.0,Limit,RELIANCE,1.0,1200000000001802,N,1
16358230,79631194602436,S,1055.0,0.0,Limit,RELIANCE,1.0,1200000000001804,N,1
16358250,79631194602723,S,960.0,0.0,Limit,RELIANCE,24.0,1200000000001826,N,1
16358258,79631194602855,B,950.0,0.0,Limit,RELIANCE,1.0,1200000000001835,N,1
16358259,79631194602868,B,950.0,0.0,Limit,RELIANCE,1.0,1200000000001836,N,1


In [27]:
trade.head()

Unnamed: 0,trade_time,price,quantity,buy_order_number,sell_order_number
5096248,79631223195796,965.0,1,1200000000056413,1200000000064315
5096249,79631223195797,965.0,1,1200000000056512,1200000000064315
5096250,79631223195798,965.0,10,1200000000010246,1200000000064315
5096251,79631223195799,965.0,3,1200000000039060,1200000000064315
5096252,79631223195800,965.0,47,1200000000031797,1200000000064315


In [40]:
import pandas as pd
import csv

def create_quote_stream_corrected(
    order_log_path='rel_order.parquet',
    trade_log_path='rel_trade.parquet',
    quote_output_path='quote_stream.csv'
):
    """
    Corrected version to process logs and create a quote stream.
    Implements robust "delete-then-add" logic for modified orders to prevent
    a corrupted live book and ensure all events are processed correctly.
    """
    # --- Step 1: Loading and Preparing Data ---
    print("--- Step 1: Loading and Preparing Data ---")
    try:
        order_df = pd.read_parquet(order_log_path)
        trade_df = pd.read_parquet(trade_log_path)
        for col in ['Order Number', 'buy_order_number', 'sell_order_number']:
            if col in order_df.columns: order_df[col] = order_df[col].astype(str)
            if col in trade_df.columns: trade_df[col] = trade_df[col].astype(str)
    except FileNotFoundError as e:
        print(f"\nError: Data file not found. Make sure '{e.filename}' is in the correct directory.")
        return

    order_df_copy = order_df[order_df['Price_Type'] != 'Trigger'].copy()
    order_df_copy.rename(columns={'Timestamp': 'timestamp', 'Order Number': 'order_number'}, inplace=True)
    def activity_to_event(act_type):
        if act_type == 3: return 'CANCEL'
        if act_type == 4: return 'MODIFY'
        return 'NEW'
    order_df_copy['event_type'] = order_df_copy['Activity Type'].apply(activity_to_event)
    order_events = order_df_copy[['timestamp', 'event_type', 'order_number', 'Side', 'Price', 'Volume_Disclosed', 'Original_Qty']]

    trade_df_copy = trade_df.copy()
    trade_df_copy.rename(columns={'trade_time': 'timestamp'}, inplace=True)
    trade_df_copy['event_type'] = 'TRADE'
    trade_events = trade_df_copy[['timestamp', 'event_type', 'price', 'quantity', 'buy_order_number', 'sell_order_number']]

    # --- Step 2: Combining and Sorting All Market Events ---
    print("\n--- Step 2: Combining and Sorting All Market Events ---")
    all_events = pd.concat([order_events, trade_events], ignore_index=True, sort=False)
    all_events.sort_values(by='timestamp', inplace=True)
    all_events.reset_index(drop=True, inplace=True)
    total_events = len(all_events)
    print(f"Total chronological events to process: {total_events}")

    # --- Step 3: Reconstructing Order Book with Corrected Logic ---
    live_book = {}
    last_traded_price = 0.0
    flag = 0
    
    with open(quote_output_path, 'w', newline='') as f:
        writer = csv.writer(f)
        writer.writerow(['timestamp', 'best_bid', 'depth_bid', 'best_bid_ud', 'depth_bid_ud',  'best_ask', 'depth_ask', 'best_ask_ud', 'depth_ask_ud', 'ltp'])

        print("\n--- Step 3: Reconstructing Order Book and Generating Quote Stream ---")
        for index, event in all_events.iterrows():
            timestamp = event['timestamp']
            
            # Part A: Update Live Book
            if event['event_type'] == 'NEW' and event['Original_Qty'] >= 1e-6:
                live_book[event['order_number']] = {
                    'side': event['Side'], 'price': event['Price'], 'quantity': event['Original_Qty'], 'disclosed' : event['Volume_Disclosed'], 'initial_d' : event['Volume_Disclosed']
                }
            
            # *** CORRECTED MODIFICATION LOGIC ***
            elif event['event_type'] == 'MODIFY':
                order_num = event['order_number']
                volume = event['Original_Qty']
                if order_num in live_book:
                    del live_book[order_num]

                if volume >= 1e-6:
                    live_book[order_num] = {
                      'side': event['Side'], 'price': event['Price'], 'quantity': event['Original_Qty'], 'disclosed' : event['Volume_Disclosed'], 'initial_d' : event['Volume_Disclosed']
                }

            elif event['event_type'] == 'CANCEL':
                if event['order_number'] in live_book:
                    del live_book[event['order_number']]

            elif event['event_type'] == 'TRADE':
                if not live_book:
                    best_bid, best_ask, best_bid_ud, best_ask_ud = 0.0, 0.0, 0.0, 0.0
                else:
                    bids = [order['price'] for order in live_book.values() if order['side'] == 'B' and order['disclosed'] >= 1e-6]
                    asks = [order['price'] for order in live_book.values() if order['side'] == 'S' and order['disclosed'] >= 1e-6]
                    best_bid = max(bids) if bids else 0.0
                    best_ask = min(asks) if asks else 0.0
                    bids = [order['price'] for order in live_book.values() if order['side'] == 'B']
                    asks = [order['price'] for order in live_book.values() if order['side'] == 'S']
                    best_bid_ud = max(bids) if bids else 0.0
                    best_ask_ud = min(asks) if asks else 0.0

               
                bid_depth = sum(o['disclosed'] for o in live_book.values() if o['side'] == 'B' and o['price'] == best_bid)
                ask_depth = sum(o['disclosed'] for o in live_book.values() if o['side'] == 'S' and o['price'] == best_ask)
                depth_bid_ud = sum(o['quantity'] for o in live_book.values() if o['side'] == 'B' and o['price'] == best_bid_ud)
                depth_ask_ud = sum(o['quantity'] for o in live_book.values() if o['side'] == 'S' and o['price'] == best_ask_ud)

                    
                last_traded_price = event['price']
                trade_qty = event['quantity']
                for order_num_hit in [event['buy_order_number'], event['sell_order_number']]:
                    if order_num_hit in live_book:
                        initial_disclosed_size = live_book[order_num_hit]['initial_d']
                        live_book[order_num_hit]['quantity'] -= trade_qty
                        live_book[order_num_hit]['disclosed'] -= trade_qty
                        if live_book[order_num_hit]['quantity'] <= 1e-6:
                            del live_book[order_num_hit]
                        elif live_book[order_num_hit]['disclosed'] <= 1e-6:
                            remaining_qty = live_book[order_num_hit]['quantity']
                            live_book[order_num_hit]['disclosed'] = min(initial_disclosed_size, remaining_qty)

                writer.writerow([timestamp, best_bid, bid_depth, best_bid_ud, depth_bid_ud,  best_ask, ask_depth, best_ask_ud, depth_ask_ud, last_traded_price])

        
            if (index + 1) % 100000 == 0 or (index + 1) == total_events:
                progress_pct = ((index + 1) / total_events) * 100
                print(f"  Processed {index + 1} / {total_events} events ({progress_pct:.1f}%)", end='\r')

    print(f"\n\nProcessing complete! Quote stream saved to '{quote_output_path}'. 🚀")

if __name__ == '__main__':
    create_quote_stream_corrected()

--- Step 1: Loading and Preparing Data ---

--- Step 2: Combining and Sorting All Market Events ---
Total chronological events to process: 1606650

--- Step 3: Reconstructing Order Book and Generating Quote Stream ---
  Processed 1606650 / 1606650 events (100.0%)

Processing complete! Quote stream saved to 'quote_stream.csv'. 🚀


In [59]:
trade_q = trade['quantity'].reset_index()
trade_q = trade_q.drop('index', axis = 1)

trade_q.head()
len(trade_q)

117456

In [60]:
dff = dff.merge(trade_q, left_index=True, right_index=True, how='inner')
len(dff)


117456

In [78]:
dff.head(60006)

Unnamed: 0,timestamp,best_bid,depth_bid,best_bid_ud,depth_bid_ud,best_ask,depth_ask,best_ask_ud,depth_ask_ud,ltp,quantity
0,79631223195796,0.00,0.0,1025.0,1.0,0.00,0.0,864.55,62.0,965.00,1
1,79631223195797,0.00,0.0,1020.0,1.0,0.00,0.0,864.55,61.0,965.00,1
2,79631223195798,0.00,0.0,1000.0,13.0,0.00,0.0,864.55,60.0,965.00,10
3,79631223195799,0.00,0.0,1000.0,3.0,0.00,0.0,864.55,50.0,965.00,3
4,79631223195800,0.00,0.0,999.0,124.0,0.00,0.0,864.55,47.0,965.00,47
...,...,...,...,...,...,...,...,...,...,...,...
60001,79631892960276,973.15,1.0,974.0,50.0,973.25,20.0,973.25,58.0,973.25,5
60002,79631892960277,973.15,1.0,974.0,50.0,973.25,17.0,973.25,53.0,973.25,4
60003,79631892960278,973.15,1.0,974.0,50.0,973.25,17.0,973.25,49.0,973.25,11
60004,79631892960279,973.15,1.0,974.0,50.0,973.25,17.0,973.25,38.0,973.25,2


In [67]:
dff.to_parquet('reliance_taq.parquet')

In [69]:
dff = pd.read_parquet('reliance_taq.parquet')
len(dff)

117456

In [18]:
try:
    order_df = pd.read_parquet('rel_order.parquet')
    trade_df = pd.read_parquet('rel_trade.parquet')
    for col in ['Order Number', 'buy_order_number', 'sell_order_number']:
        if col in order_df.columns: order_df[col] = order_df[col].astype(str)
        if col in trade_df.columns: trade_df[col] = trade_df[col].astype(str)
except FileNotFoundError as e:
    print(f"\nError: Data file not found. Make sure '{e.filename}' is in the correct directory.")
order_df_copy = order_df[order_df['Price_Type'] != 'Trigger'].copy()
order_df_copy.rename(columns={'Timestamp': 'timestamp', 'Order Number': 'order_number'}, inplace=True)
def activity_to_event(act_type):
    if act_type == 3: return 'CANCEL'
    if act_type == 4: return 'MODIFY'
    return 'NEW'
order_df_copy = order_df[order_df['Price_Type'] != 'Trigger'].copy()
order_df_copy.rename(columns={'Timestamp': 'timestamp', 'Order Number': 'order_number'}, inplace=True)
def activity_to_event(act_type):
    if act_type == 3: return 'CANCEL'
    if act_type == 4: return 'MODIFY'
    return 'NEW'
order_df_copy['event_type'] = order_df_copy['Activity Type'].apply(activity_to_event)
order_events = order_df_copy[['timestamp', 'event_type', 'order_number', 'Side', 'Price', 'Volume_Disclosed', 'Original_Qty']]

trade_df_copy = trade_df.copy()
trade_df_copy.rename(columns={'trade_time': 'timestamp'}, inplace=True)
trade_df_copy['event_type'] = 'TRADE'
trade_events = trade_df_copy[['timestamp', 'event_type', 'price', 'quantity', 'buy_order_number', 'sell_order_number']]

# --- Step 2: Combining and Sorting All Market Events ---
print("\n--- Step 2: Combining and Sorting All Market Events ---")
all_events = pd.concat([order_events, trade_events], ignore_index=True, sort=False)
all_events.sort_values(by='timestamp', inplace=True)
all_events.reset_index(drop=True, inplace=True)
total_events = len(all_events)
print(f"Total chronological events to process: {total_events}")



--- Step 2: Combining and Sorting All Market Events ---
Total chronological events to process: 1606650


In [78]:
print(len(all_events) == 

SyntaxError: incomplete input (4194761225.py, line 1)

In [None]:
print((int(all_events.iloc[40000]['timestamp']) - int(all_events.iloc[0]['timestamp'])))

396727692


In [81]:
fil = all_events[(all_events['order_number'] == '1200000002837137') | (all_events['sell_order_number'] == '1200000002837137')]


In [82]:
fil.head(1000000)

Unnamed: 0,timestamp,event_type,order_number,Side,Price,Volume_Disclosed,Original_Qty,price,quantity,buy_order_number,sell_order_number
803863,79631892957113,NEW,1200000002837137.0,S,973.25,11.0,101.0,,,,
803876,79631892960230,TRADE,,,,,,973.25,11.0,1200000002837141.0,1200000002837137.0
803886,79631892960240,TRADE,,,,,,973.25,11.0,1200000002837141.0,1200000002837137.0
803897,79631892960250,TRADE,,,,,,973.25,11.0,1200000002837141.0,1200000002837137.0
803906,79631892960259,TRADE,,,,,,973.25,11.0,1200000002837141.0,1200000002837137.0
803913,79631892960266,TRADE,,,,,,973.25,11.0,1200000002837141.0,1200000002837137.0
803919,79631892960272,TRADE,,,,,,973.25,11.0,1200000002837141.0,1200000002837137.0
803925,79631892960278,TRADE,,,,,,973.25,11.0,1200000002837141.0,1200000002837137.0
803928,79631892960281,TRADE,,,,,,973.25,11.0,1200000002837141.0,1200000002837137.0
803930,79631892960283,TRADE,,,,,,973.25,11.0,1200000002837141.0,1200000002837137.0


In [79]:
fil = all_events[(all_events['timestamp'] == '79631892960278')]

In [80]:
fil.head()

Unnamed: 0,timestamp,event_type,order_number,Side,Price,Volume_Disclosed,Original_Qty,price,quantity,buy_order_number,sell_order_number
803925,79631892960278,TRADE,,,,,,973.25,11.0,1200000002837141,1200000002837137


In [34]:
fil2 = order[order['Order Number'] == '1200000000111764']


NameError: name 'order' is not defined

In [None]:
fil2.head(10000000000)

Unnamed: 0,Timestamp,Side,Price,Volume_Disclosed,Price_Type,Symbol,Original_Qty,Order Number,IOC Flag,Activity Type
11337,79631255592918,S,964.35,0.0,Limit,RELIANCE,147.0,1200000000111764,N,1
29556,79631260312726,S,963.0,0.0,Limit,RELIANCE,146.0,1200000000111764,N,4


In [None]:
dff = pd.read_csv('quote_stream.csv')

In [54]:
dff.head()

Unnamed: 0,timestamp,best_bid,depth_bid,best_bid_ud,depth_bid_ud,best_ask,depth_ask,best_ask_ud,depth_ask_ud,ltp
0,79631223195796,0.0,0.0,1025.0,1.0,0.0,0.0,864.55,62.0,965.0
1,79631223195797,0.0,0.0,1020.0,1.0,0.0,0.0,864.55,61.0,965.0
2,79631223195798,0.0,0.0,1000.0,13.0,0.0,0.0,864.55,60.0,965.0
3,79631223195799,0.0,0.0,1000.0,3.0,0.0,0.0,864.55,50.0,965.0
4,79631223195800,0.0,0.0,999.0,124.0,0.0,0.0,864.55,47.0,965.0


In [43]:
count = ((dff['ltp'] != dff['best_bid']) & (dff['ltp'] != dff['best_ask']) & (dff['ltp'] != dff['best_bid_ud']) & (dff['ltp'] != dff['best_ask_ud'])).sum()


In [44]:
print((count)) 

6547


In [102]:
dff[dff['timestamp'] == 79631257182784]

Unnamed: 0,timestamp,best_bid,best_bid_ud,best_ask,best_ask_ud,ltp
968,79631257182784,963.95,963.95,964.55,964.35,964.35


In [4]:
dff = pd.read_csv('quote_stream.csv')

Unnamed: 0,timestamp,best_bid,best_bid_ud,best_ask,best_ask_ud,ltp
0,79631223195796,0.00,0.0,0.0,0.0,965.0
1,79631223195797,0.00,0.0,0.0,0.0,965.0
2,79631223195798,0.00,0.0,0.0,0.0,965.0
3,79631223195799,0.00,0.0,0.0,0.0,965.0
4,79631223195800,0.00,0.0,0.0,0.0,965.0
...,...,...,...,...,...,...
117451,79632819073369,971.05,971.5,972.6,972.1,971.3
117452,79632825573146,971.05,971.5,972.6,972.1,971.3
117453,79632827374556,971.05,971.5,972.6,972.1,971.3
117454,79632828171439,971.05,971.5,972.6,972.1,971.3


In [10]:
trade.head()

Unnamed: 0,trade_time,price,quantity,buy_order_number,sell_order_number
5096248,79631223195796,965.0,1,1200000000056413,1200000000064315
5096249,79631223195797,965.0,1,1200000000056512,1200000000064315
5096250,79631223195798,965.0,10,1200000000010246,1200000000064315
5096251,79631223195799,965.0,3,1200000000039060,1200000000064315
5096252,79631223195800,965.0,47,1200000000031797,1200000000064315


In [None]:
79631629205201