<img src="http://wandb.me/logo-im-png" width="400" alt="Weights & Biases" />

<br />
<br />

<a target="_blank" href="https://colab.research.google.com/github/wandb/weave/blob/master/examples/prompts/trace_debugging/dev/synthetic_trace_data.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

# Generate Synthetic Trace Data

Generate some synthetic trace data we can explore in a Trace Debugging Board.

# Step 0: Setup

In [None]:
!pip install -qqq weave

In [None]:
import weave
from weave.legacy.monitoring import StreamTable
from weave.stream_data_interfaces import TraceSpanDict

# Step 1: Configure data streaming

In [None]:
WB_PROJECT = "prompts"
WB_STREAM = "synthetic_traces_stream"
WB_ENTITY = # set wandb username or team name

**Note**: The WB_ENTITY will match the default entity associated with your [W&B API key](wandb.ai/authorize). You can optionally set WB_ENTITY to a different wandb username or team name. Log in to W&B and navigate to [the Home Page](https://wandb.ai/home) to see any other valid options for your WB_ENTITY under your "Profile" and "Teams" in the left sidebar.

# Step 2: Generate some synthetic data to log

In [None]:
import random
from uuid import uuid4
import datetime
import json

names = [
"fit",
"predict",
"transform",
"compile",
"evaluate",
"GridSearchCV",
"train_test_split",
"Sequential",
"early_stopping",
"OneHotEncoder",
"Dense",
"MinMaxScaler",
"dropout",
"confusion_matrix",
"lstm",
]

def simple_dict():
    return {
        "a_1": 42,
        "a_2": "Test",
        "a_3": True,
        "a_4": {
            "_kind": "AGENT",
            "list": [1,2,3],
            "dict": {
                "hello": "world"
            }
        }
    }

def create_random_span(accumulator, trace_id=None, parent_id=None, start_time_s=None):
    name = random.choice(names)
    kind = random.choice([None, 'CHAIN','AGENT','TOOL','LLM',])
    
    status_code = "UNSET"
    status = random.randint(0, 10000)
    if status == 0:
        status_code = "ERROR"
    elif status <= 9995:
        status_code = "SUCCESS"
    exception = random.choice([None, "Error"]) if status_code == "ERROR" else None
    span_id = str(uuid4())
    if trace_id is None:
        trace_id = str(uuid4())
    attributes = simple_dict()

    # Special Attributes
    attributes["span_kind"] = kind
    attributes["model"] = {
        "id": "4edfghu7654edfg",
        "obj": json.dumps(simple_dict())
    }
    inputs = simple_dict()
    output = simple_dict()
    summary = simple_dict()
    
    if start_time_s is None:
        start_time_s = datetime.datetime.now().timestamp()

    if parent_id is None:
        num_children = random.choice([1,2,3,4,5])
    else:
        num_children = random.choice([0,0,0,0,0,1,2])

    if num_children == 0 or status_code == 'ERROR':
        duration_s = random.randint(1, 10000) / 1000
    else:
        duration_s = 0
        for i in range(num_children):
            child_span = create_random_span(accumulator, trace_id, span_id, duration_s + start_time_s)
            duration_s += child_span['end_time_s'] - child_span['start_time_s']
            if child_span['status_code'] == 'ERROR':
                status_code = 'ERROR'
                exception = child_span['exception']
                break
        
    end_time_s = start_time_s + duration_s
    self_span = TraceSpanDict(
        name=name,
        span_id=span_id,
        trace_id=trace_id,
        status_code=status_code,
        start_time_s=start_time_s,
        end_time_s=end_time_s,
        parent_id=parent_id,
        attributes=attributes,
        inputs=inputs,
        output=output,
        summary=summary,
        exception=exception,
    )
    # Purely a simulation hack
    self_span["timestamp"] = datetime.datetime.fromtimestamp(start_time_s)
    accumulator.append(self_span)
    return self_span

# Step 3: Iinitialize data stream and log synthetic data

In [None]:
st = StreamTable(f"{WB_ENTITY}/{WB_PROJECT}/{WB_STREAM}")

start_time_s = datetime.datetime.now().timestamp()
for i in range(10):
    acc = []
    create_random_span(acc, start_time_s=start_time_s)
    st.log(acc)
    start_time_s = max([a['timestamp'].timestamp() for a in acc])

st.finish()

# Next: View and explore a Trace Debugging Board

Click on the "View data at:" link above to view your data stream in Weave. 

You can click on "+ New board from template" on the right to create a Trace Debug Board, which enables:
* key LLM tuning metrics at a glance: latency and success vs failure, for each call and as a distribution
* complete view of call details: inputs, outputs, status, timestamp, etc&mdash;also available for downtream queries & analaysis
* interactive W&B Trace view: a color-coded flow chart of every step of an LLM chain, with full metadata, model details, and similar span views updating in sync with every selected span
* monitor & analyze from the automatic Board; customize & publish the Board for streamlined collaboration with your team