# Main notebook for the simulator

Python related commands:
 - `conda create --name aggregate_modeling python=3.12 -y`
 - `conda activate aggregate_modeling`
 - `install ipykernel`

In [108]:
# Imports
import csv
from enum import Enum
import sys
import random


In [109]:
# Model Stream Tuple
class MST:
    def __init__(self, id, omega, tau, w, k):
        self.id = id # The unique ID
        self.omega = omega # The wallclock time
        self.tau = tau # The event time
        self.w = w # Boolean specifying whether this is a watermark or a regular tuple
        self.k = k # The associated key

    def to_list(self):
        """Convert the object to a list for CSV writing."""
        return [self.id, self.omega, self.tau, self.w, self.k]
    
class WinAction (Enum): # Window Action 
    CREATE = 1
    UPDATE = 2
    OUTPUT = 3
    DELETE = 4

# Model Execution Event
class MEE:
    def __init__(self, id, omega, tau, winAction, k):
        self.id = id # The unique ID
        self.omega = omega # The wallclock time
        self.tau = tau # The event time
        self.winAction = winAction # Action 
        self.k = k # The associated key

    def to_list(self):
        """Convert the object to a list for CSV writing."""
        return [self.id, self.omega, self.tau, self.winAction, self.k]

# The following are utility functions

In [110]:
def get_sliding_window_starts(tau, WA, WS):
    """
    Returns a list of all starting times of sliding windows that contain tau.
    
    :param tau: The event time
    :param WA: The window advance (step size)
    :param WS: The window size
    :return: A list of start times of windows containing tau
    """
    start_times = []
    latest_start = tau  # Latest possible window start containing tau
    
    while latest_start >= tau - WS + 1:
        if latest_start % WA == 0:
            start_times.append(latest_start)
        latest_start -= 1
    
    return sorted(start_times)

The following part is to process a real input stream and create a stream of Model Input Stream Tuples (MST objects) that are written to a CSV

## Notes
- Right now focusing on linear road, using only an excerpt of tuples, and considering only position reports (tuples with type=0). The file has been created with
  - `head -n 100000 input.txt | grep -e "^0," > ../../../aggregate_modeling/data/input_stream_lr.csv`

In [111]:
input_file = '../data/input_stream_lr.csv' 
mst_input_stream = '../data/MST_input_stream_lr.csv'
mst_output_stream = '../data/MST_output_stream_lr.csv'
mee_events = '../data/MEE_events_lr.csv'
adjusted_mee_events_file = '../data/adjusted_MEE_events_lr.csv'
input_event_mapping =  '../data/input_event_lr.csv'
input_output_mapping =  '../data/input_output_lr.csv'
WA = 200
WS = 600

extract_time = lambda line: int(line.split(",")[1])*1000  # Extracts second value as integer time, that's the format for LR tuples, multiplies by 1000 to get ms
extract_key = lambda line: line.split(",")[2].strip()  # Extracts third value as key, the vehicle id

In [112]:
def process_stream(input_file, mst_input_stream, extract_time, extract_key):
    counter = 0
    last_tau = None
    non_decreasing = True

    with open(input_file, 'r') as infile, open(mst_input_stream, 'w', newline='') as outfile:
        writer = csv.writer(outfile)
        writer.writerow(["id", "omega", "tau", "w", "k"])  # CSV header

        for line in infile:
            tau = extract_time(line)  # Extract event time
            k = extract_key(line)  # Extract key

            # Check if tau decreased
            if last_tau is not None and tau < last_tau:
                non_decreasing = False

            # If tau increased, insert a watermark tuple before the new tau
            if last_tau is not None and tau > last_tau:
                watermark = MST(counter, last_tau, last_tau, True, None)
                writer.writerow(watermark.to_list())
                counter += 1

            # Create regular tuple
            mst = MST(counter, tau, tau, False, k)
            writer.writerow(mst.to_list())
            counter += 1

            last_tau = tau

        # At the end, a final watermark that flushes everything
        watermark = MST(counter, last_tau, sys.maxsize, True, None)
        writer.writerow(watermark.to_list())
        
    return non_decreasing

In [113]:
non_decreasing = process_stream(input_file, mst_input_stream, extract_time, extract_key)

if non_decreasing:
    print('Tuples had non-decreasing event times, so mst_input_stream_lr.csv can be used as is')
else:
    print('Tuples had decreasing event times, so mst_input_stream_lr.csv should be adjusted to have consistent watermarks')


Tuples had non-decreasing event times, so mst_input_stream_lr.csv can be used as is


In [114]:

def process_mst_stream(mst_input_stream, mst_output_stream, mee_events, input_event_mapping, input_output_mapping, WA, WS):
    wins = set()
    pendingMEEs = []
    event_counter = 0
    output_counter = 0
    
    with open(mst_input_stream, 'r') as infile, open(mst_output_stream, 'w', newline='') as out_mst, \
            open(mee_events, 'w', newline='') as out_mee, open(input_event_mapping, 'w', newline='') as out_event_map, \
            open(input_output_mapping, 'w', newline='') as out_output_map:
        
        mst_reader = csv.reader(infile)
        next(mst_reader)  # Skip header
        mst_writer = csv.writer(out_mst)
        mee_writer = csv.writer(out_mee)
        event_map_writer = csv.writer(out_event_map)
        output_map_writer = csv.writer(out_output_map)
        
        mst_writer.writerow(["i", "omega", "tau", "w", "k"])
        mee_writer.writerow(["i", "omega", "tau", "winAction", "k"])
        event_map_writer.writerow(["input_id", "event_id"])
        output_map_writer.writerow(["input_id", "output_id"])
        
        for row in mst_reader:
            i, omega, tau, w, k = int(row[0]), int(row[1]), int(row[2]), row[3] == 'True', row[4] if row[4] != 'None' else None
        
            if not w:  # Regular tuple
                for start_time in get_sliding_window_starts(tau, WA, WS):
                    if (start_time, k) not in wins:
                        wins.add((start_time, k))
                        mee_writer.writerow([event_counter, omega, start_time, WinAction.CREATE.name, k])
                        event_map_writer.writerow([i, event_counter])
                        pendingMEEs.append([None, None, start_time + WS - 1, WinAction.OUTPUT.name, k])
                        event_counter += 1
                    
                    mee_writer.writerow([event_counter, omega, start_time, WinAction.UPDATE.name, k])
                    event_map_writer.writerow([i, event_counter])
                    event_counter += 1
            else:  # Watermark
                while pendingMEEs and pendingMEEs[0][2] <= tau:
                    x = pendingMEEs.pop(0)
                    x[0] = event_counter
                    x[1] = omega
                    mee_writer.writerow(x)
                    event_map_writer.writerow([i, event_counter])
                    
                    mee_writer.writerow([event_counter + 1, omega, x[2], WinAction.DELETE.name, x[4]])
                    event_map_writer.writerow([i, event_counter + 1])
                    
                    mst_writer.writerow([output_counter, omega, x[2], False, x[4]])
                    output_map_writer.writerow([i, output_counter])
                    
                    output_counter += 1
                    event_counter += 2
                
                mst_writer.writerow([output_counter, omega, tau, True, None])
                output_map_writer.writerow([i, output_counter])
                output_counter += 1

process_mst_stream(mst_input_stream, mst_output_stream, mee_events, input_event_mapping, input_output_mapping, WA, WS)

# Now we adjust the times based on the actual duration of the various execution 

- For now, duration are just randoms, will be more accurate later on

In [115]:
CREATE_duration = lambda: random.uniform(0.001, 0.1)
UPDATE_duration = lambda: random.uniform(0.001, 0.1)
OUTPUT_duration = lambda: random.uniform(0.001, 0.1)
DELETE_duration = lambda: random.uniform(0.001, 0.1)

In [117]:
def update_mee_execution_times(mee_events_file,adjusted_mee_events_file):
    
    accumulated_execution_time = 0.0
    
    with open(mee_events_file, 'r') as infile, open(adjusted_mee_events_file, 'w', newline='') as outfile:
        reader = csv.reader(infile)
        writer = csv.writer(outfile)
        
        header = next(reader)
        writer.writerow(header)  # Write header back
        
        for row in reader:
            id, omega, tau, winAction, k = int(row[0]), float(row[1]), int(row[2]), row[3], row[4] if row[4] != 'None' else None
            
            accumulated_execution_time = max(omega, accumulated_execution_time)

            writer.writerow([id, int(accumulated_execution_time), tau, winAction, k])

            if winAction == "CREATE":
                accumulated_execution_time += CREATE_duration()
            elif winAction == "UPDATE":
                accumulated_execution_time += UPDATE_duration()
            elif winAction == "OUTPUT":
                accumulated_execution_time += OUTPUT_duration()
            elif winAction == "DELETE":
                accumulated_execution_time += DELETE_duration()
            
            
update_mee_execution_times(mee_events,adjusted_mee_events_file)