In [None]:
from dataclasses import dataclass
from enum import Enum


class Stage(Enum):
    PREFILL = "Prefill"
    DECODE = "Decode"


@dataclass
class BatchPredictionInfo:
    batch_size: int
    avg_context_len: int


@dataclass
class PredictionInput:
    gpu: str  # or a custom GPU object/type
    stage: Stage
    tp_size: int
    num_new_tokens: float
    prod_ext_ctx: float
    num_context_tokens: float
    batch_size: float


class Predictor:
    def predict(self, input_data: PredictionInput) -> float:
        # placeholder for actual prediction logic
        raise NotImplementedError

    def predict_batch_with_features_ms(self, gpu, tp_size: int, batch: BatchPredictionInfo, stage: Stage) -> float:
        if stage == Stage.PREFILL:
            num_new_tokens = batch.batch_size * batch.avg_context_len
            prod_ext_ctx = batch.batch_size * (batch.avg_context_len ** 2)
            num_context_tokens = batch.avg_context_len * batch.batch_size
            num_batch_size = batch.batch_size
        else: # DECODE
            num_new_tokens = batch.batch_size
            prod_ext_ctx = batch.batch_size * batch.avg_context_len
            num_context_tokens = batch.avg_context_len * batch.batch_size
            num_batch_size = batch.batch_size

        prediction_input = PredictionInput(
            gpu=gpu,
            stage=stage,
            tp_size=tp_size,
            num_new_tokens=float(num_new_tokens),
            prod_ext_ctx=float(prod_ext_ctx),
            num_context_tokens=float(num_context_tokens),
            batch_size=float(num_batch_size)
        )

        return self.predict(prediction_input)

In [None]:
import pandas as pd
import numpy as np

# Generate mock training data
train_data = {
    'num_new_tokens': np.random.randint(1, 10, size=100),
    'prod_ext_ctx': np.random.uniform(0.0, 1.0, size=100),
    'num_context_tokens': np.random.randint(50, 200, size=100),
    'batch_size': np.random.randint(1, 16, size=100),
    'time': np.random.uniform(0.5, 5.0, size=100)  # target variable
}
train_df_decode = pd.DataFrame(train_data)

# Generate mock test data
test_data = {
    'num_new_tokens': np.random.randint(1, 10, size=20),
    'prod_ext_ctx': np.random.uniform(0.0, 1.0, size=20),
    'num_context_tokens': np.random.randint(50, 200, size=20),
    'batch_size': np.random.randint(1, 16, size=20),
    'time': np.random.uniform(0.5, 5.0, size=20)  # true target for evaluation
}
test_df_decode = pd.DataFrame(test_data)

# Display some sample data
print("Training Data Sample:")
print(train_df_decode.head())

print("\nTest Data Sample:")
print(test_df_decode.head())



Training Data Sample:
   num_new_tokens  prod_ext_ctx  num_context_tokens  batch_size      time
0               7      0.751715                 116           3  1.584784
1               6      0.805424                 109           8  4.373438
2               9      0.931937                 111          13  3.340471
3               4      0.358918                 192          11  0.624205
4               6      0.924514                  78          14  2.207557

Test Data Sample:
   num_new_tokens  prod_ext_ctx  num_context_tokens  batch_size      time
0               4      0.385448                 185           6  0.531763
1               2      0.337863                 137          12  4.128742
2               2      0.883983                 127          10  4.720189
3               9      0.661420                 125           8  2.442146
4               5      0.541860                  65           4  4.681470


In [None]:
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType

# Define and train your regressor (assuming train_df_decode is defined)
rf_model_decode = RandomForestRegressor(n_estimators=10, random_state=42, min_samples_leaf=2, max_depth=12)

X_train = train_df_decode[['num_new_tokens', 'prod_ext_ctx', 'num_context_tokens', 'batch_size']].to_numpy(dtype=np.float32)
y_train = train_df_decode['time'].to_numpy(dtype=np.float32)

rf_model_decode.fit(X_train, y_train)

# Convert to ONNX with explicit opset version
initial_type = [('float_input', FloatTensorType([None, 4]))]  # 4 features
onnx_model = convert_sklearn(
    rf_model_decode,
    initial_types=initial_type,
    target_opset=12  # or 11, depending on your ONNX Runtime version
)

# Save the ONNX model
with open("regressor_model.onnx", "wb") as f:
    f.write(onnx_model.SerializeToString())

print("ONNX regression model saved as regressor_model.onnx")


ONNX regression model saved as regressor_model.onnx


In [10]:
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType
import pickle
with open("trace_datasets/A100_TP2_Llama3p1_8B_extend.pkl", "rb") as f:
    rf_model_decode = pickle.load(f)

# Convert to ONNX with explicit opset version
initial_type = [('float_input', FloatTensorType([None, 4]))]  # 4 features
onnx_model = convert_sklearn(
    rf_model_decode,
    initial_types=initial_type,
    target_opset=12  # or 11, depending on your ONNX Runtime version
)

# Save the ONNX model
with open("a100_tp12_predictor.onnx", "wb") as f:
    f.write(onnx_model.SerializeToString())

print("ONNX regression model saved as regressor_model.onnx")


ONNX regression model saved as regressor_model.onnx


In [11]:
import onnx
model = onnx.load("model_predictor_decision_trees/model_predictors_onyx/A100_TP1_Llama3p1_8B_extend.onnx")
onnx.checker.check_model(model)   # will throw if graph is missing/invalid
print(model.graph)                # inspect the graph protobuf


ValidationError: The model does not have an ir_version set properly.