# Converter

In [None]:
import numpy as np
from starknet_py.hash.utils import pedersen_hash

def float_to_fixed_point(value, integer_bits, fractional_bits):
    scale_factor = 2**fractional_bits
    return f"FP{integer_bits}x{fractional_bits} {{ mag: {abs(int(value * scale_factor))}, sign: {str(value < 0).lower()} }}"

def cairo_array(arr, type_name="usize", fixed_point_type="FP16x16"):
    if type_name.startswith("FP"):
        integer_bits, fractional_bits = map(int, fixed_point_type[2:].split('x'))
        return ", ".join([float_to_fixed_point(x, integer_bits, fractional_bits) for x in arr])
    return ", ".join(map(str, arr))


class TreeEnsembleAttributes:
    def __init__(self):
        self._names = []

    def add(self, name, value):
        if not name.endswith("_as_tensor"):
            self._names.append(name)
        if isinstance(value, list):
            dtype = np.float32 if name in {"base_values", "class_weights", "nodes_values", "nodes_hitrates"} else None
            value = np.array(value, dtype=dtype)
        setattr(self, name, value)

class TreeEnsemble:
    def __init__(self, fixed_point_type="FP16x16", **kwargs):
        self.atts = TreeEnsembleAttributes()
        self.fixed_point_type = fixed_point_type
        for name, value in kwargs.items():
            self.atts.add(name, value)

        self.tree_ids = sorted(set(self.atts.nodes_treeids))
        self._initialize_indices()

    def _initialize_indices(self):
        self.root_index = {tid: len(self.atts.nodes_treeids) for tid in self.tree_ids}
        for index, tree_id in enumerate(self.atts.nodes_treeids):
            self.root_index[tree_id] = min(self.root_index[tree_id], index)
        self.node_index = {(tid, nid): i for i, (tid, nid) in enumerate(zip(self.atts.nodes_treeids, self.atts.nodes_nodeids))}

    def generate_cairo_code(self):
        tree_ids_cairo = f"let tree_ids: Span<usize> = array![{cairo_array(self.tree_ids)}].span();"
        root_index_cairo = self._generate_root_index_cairo()
        node_index_cairo = self._generate_node_index_cairo()

        return f"{tree_ids_cairo}\n{root_index_cairo}\n{node_index_cairo}"

    def _generate_root_index_cairo(self):
        root_index_lines = [f"    root_index.insert({tid}, {self.root_index[tid]});" for tid in self.tree_ids]
        return "let mut root_index: Felt252Dict<usize> = Default::default();\n" + "\n".join(root_index_lines)

    def _generate_node_index_cairo(self):
        node_index_lines = [f"    node_index.insert({pedersen_hash(int(tid), int(nid))}, {index});"
                            for (tid, nid), index in self.node_index.items()]
        return "let mut node_index: Felt252Dict<usize> = Default::default();\n" + "\n".join(node_index_lines)

def generate_full_cairo_code(params, fixed_point_type="FP16x16"):
    ensemble = TreeEnsemble(fixed_point_type=fixed_point_type, **params)
    tree_specific_code = ensemble.generate_cairo_code()

    # Check for base_values content
    if params['base_values']:
        base_values_cairo = f"let base_values: Option<Span<{fixed_point_type}>> = Option::Some(array![{cairo_array(params['base_values'], fixed_point_type, fixed_point_type)}].span());"
    else:
        base_values_cairo = f"let base_values: Option<Span<{fixed_point_type}>> = Option::None;"
    
    if params['post_transform']:
        post_transform_cairo = params['post_transform']
    else:
        post_transform_cairo = "NONE"

    return f"""
use orion::numbers::{fixed_point_type};
use orion::operators::tensor::{{Tensor, TensorTrait, {fixed_point_type}Tensor, U32Tensor}};
use orion::operators::ml::tree_ensemble::core::{{NODE_MODES, TreeEnsembleAttributes, TreeEnsemble}};
use orion::operators::ml::tree_ensemble::tree_ensemble_regressor::{{TreeEnsembleRegressor, POST_TRANSFORM, TreeEnsembleRegressorTrait, AGGREGATE_FUNCTION}};
use orion::operators::matrix::{{MutMatrix, MutMatrixImpl}};

fn pred(X: Tensor<{fixed_point_type}>) -> MutMatrix::<FP16x16> {{
    let n_targets: usize = 1;
    let aggregate_function = AGGREGATE_FUNCTION::{params['aggregate_function']};
    let nodes_falsenodeids: Span<usize> = array![{cairo_array(params['nodes_falsenodeids'])}].span();
    let nodes_featureids: Span<usize> = array![{cairo_array(params['nodes_featureids'])}].span();
    let nodes_missing_value_tracks_true: Span<usize> = array![{cairo_array(params['nodes_missing_value_tracks_true'])}].span();
    let nodes_modes: Span<NODE_MODES> = array![{', '.join(['NODE_MODES::' + x for x in params['nodes_modes']])}].span();
    let nodes_nodeids: Span<usize> = array![{cairo_array(params['nodes_nodeids'])}].span();
    let nodes_treeids: Span<usize> = array![{cairo_array(params['nodes_treeids'])}].span();
    let nodes_truenodeids: Span<usize> = array![{cairo_array(params['nodes_truenodeids'])}].span();
    let nodes_values: Span<{fixed_point_type}> = array![{cairo_array(params['nodes_values'], fixed_point_type, fixed_point_type)}].span();
    let target_ids: Span<usize> = array![{cairo_array(params['target_ids'])}].span();
    let target_nodeids: Span<usize> = array![{cairo_array(params['target_nodeids'])}].span();
    let target_treeids: Span<usize> = array![{cairo_array(params['target_treeids'])}].span();
    let target_weights: Span<{fixed_point_type}> = array![{cairo_array(params['target_weights'], fixed_point_type, fixed_point_type)}].span();

    {base_values_cairo}
    let post_transform = POST_TRANSFORM::{post_transform_cairo};


    {tree_specific_code}

    let atts = TreeEnsembleAttributes {{
        nodes_falsenodeids,
        nodes_featureids,
        nodes_missing_value_tracks_true,
        nodes_modes,
        nodes_nodeids,
        nodes_treeids,
        nodes_truenodeids,
        nodes_values
    }};

    let mut ensemble: TreeEnsemble<{fixed_point_type}> = TreeEnsemble {{
        atts, tree_ids, root_index, node_index
    }};

    let mut regressor: TreeEnsembleRegressor<{fixed_point_type}> = TreeEnsembleRegressor {{
        ensemble,
        target_ids,
        target_nodeids,
        target_treeids,
        target_weights,
        base_values,
        n_targets,
        aggregate_function,
        post_transform
    }};

    let mut scores = TreeEnsembleRegressorTrait::predict(ref regressor, X);
    scores
}}
    """

# Usage

In [None]:
import os
import torch
import ezkl
import json


import numpy as np
from sklearn.ensemble import RandomForestRegressor

# Define the dataset
X = np.array([[1, 1], [1, 2], [2, 2], [2, 3], [3, 3], [4, 4], [6, 8]])
y = np.dot(X, np.array([1, 2])) + 3  # y = 1 * x_0 + 2 * x_1 + 3

# Define and train the RandomForestRegressor
reg = RandomForestRegressor(n_estimators=2, random_state=42)
reg.fit(X, y)

# Evaluate the model
score = reg.score(X, y)
print(f"Model Score: {score}")

In [None]:
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType

# Determine the number of features in your input data
number_of_features = X.shape[1]
print("Number of features:", number_of_features)

# Initial types: specify the type and shape of the input data
initial_type = [('float_input', FloatTensorType([None, number_of_features]))]

# Convert the scikit-learn model to ONNX
onnx_model = convert_sklearn(reg, initial_types=initial_type)

# Save the model
with open("network.onnx", "wb") as f:
    f.write(onnx_model.SerializeToString())

print("Model exported to ONNX format as network.onnx")

### **Generating Cairo files**

Now let's generate Cairo files for each tensor in the object.

In [None]:
def decimal_to_fp16x16(num):

    whole_num = int(num)
    fractional_part = int((num - whole_num) * 65536)
    fp_number = (whole_num << 16) + fractional_part
    return fp_number

In [None]:
import os

tensor_name = ["X_test"]

base_path = os.path.join("../../src")

def generate_cairo_files(data, name):
    generated_path = os.path.join(base_path, 'generated')
    os.makedirs(generated_path, exist_ok=True)

    with open(os.path.join(base_path, 'generated', f"{name}.cairo"), "w") as f:
        f.write(
            "use array::ArrayTrait;\n" +
            "use orion::operators::tensor::{Tensor, TensorTrait, FP16x16Tensor};\n" +
            "use orion::numbers::{FixedTrait, FP16x16, FP16x16Impl};\n" +
            "\n" + f"fn {name}() -> Tensor<FP16x16>" + "{\n\n" + 
            "let mut shape = ArrayTrait::new();\n"
        )
        for dim in data.shape:
            f.write(f"shape.append({dim});\n")
    
        f.write("let mut data = ArrayTrait::new();")
        for val in np.nditer(data.flatten()):
            f.write(f"data.append(FixedTrait::new({abs(int(decimal_to_fp16x16(val)))}, {str(val < 0).lower()}));\n")
        f.write(
            "let tensor = TensorTrait::<FP16x16>::new(shape.span(), data.span());\n" +
            "return tensor;\n}"
        )

with open(os.path.join(base_path, 'generated.cairo'), 'w') as f:
    for n in tensor_name:
        f.write(f"mod {n};\n")

generate_cairo_files(np.array([X[0]], dtype=np.float32), "X_test")

Now we extract parameters from ONNX model

In [None]:
import onnx
import numpy as np

def extract_parameters_from_onnx(model_path):
    # Load the ONNX model
    model = onnx.load(model_path)

    print(model.graph.node)

    # Initialize parameters dictionary
    params = {
            'n_targets': 1,
            'aggregate_function': "SUM",
            'base_values':[],
            'nodes_falsenodeids':[],
            'nodes_featureids':[],
            'nodes_hitrates':[],
            'nodes_missing_value_tracks_true':[],
            'nodes_modes':[],
            'nodes_nodeids':[],
            'nodes_treeids':[],
            'nodes_truenodeids':[],
            'nodes_values':[],
            'post_transform':"NONE",
            'target_ids':[],
            'target_nodeids':[],
            'target_treeids':[],
            'target_weights':[],
    }

    # Traverse the ONNX model graph to extract parameters
    for node in model.graph.node:
        if node.op_type == 'TreeEnsembleRegressor':
            for attribute in node.attribute:
                # Extract the parameters based on attribute names
                # Note: The attribute names should match those in your ONNX model
                if attribute.name in params:
                    if attribute.name == 'nodes_modes':
                        params[attribute.name] = [mode.decode('utf-8') for mode in attribute.strings]
                    else:
                        params[attribute.name] = attribute.ints if attribute.ints else attribute.floats

    return params

# Path to your exported ONNX model
onnx_model_path = 'network.onnx'

# Extract parameters
parameters = extract_parameters_from_onnx(onnx_model_path)

# Print parameters to check
print(parameters)


Create main function to call into predict function (need to do this or else we will get a stack overflow)

In [None]:
! touch ../../src/main.cairo

In [None]:
%%writefile ../../src/main.cairo

use traits::TryInto;
use array::{ArrayTrait, SpanTrait};
use orion::operators::tensor::{
    Tensor, TensorTrait, FP16x16Tensor, FP16x16TensorAdd, FP16x16TensorMul, FP16x16TensorSub,
    FP16x16TensorDiv
};
use orion::numbers::{FixedTrait, FP16x16, FP16x16Impl};
use orion::numbers::fixed_point::implementations::fp16x16::core::{
    HALF, ONE, FP16x16Mul, FP16x16Div, FP16x16IntoI32, FP16x16PartialOrd,
    FP16x16PartialEq
};

use giza::{
    generated::{X_test::X_test}
};
use orion::operators::matrix::{MutMatrix};

use giza::{inference::pred};


fn main() -> MutMatrix::<FP16x16> {
    let x_test = X_test();

    let res = pred(x_test);
    res
}

Implement the ONNX model in Cairo

In [None]:
! touch ../../src/inference.cairo

In [None]:
fixed_point_type = "FP16x16"  
full_cairo_code = generate_full_cairo_code(parameters, fixed_point_type)
# write this to ../../src/helper.cairo
with open(os.path.join(base_path, 'inference.cairo'), 'w') as f:
    f.write(full_cairo_code)


In [None]:
! touch ../../src/lib.cairo

In [None]:
%%writefile ../../src/lib.cairo

mod generated;
mod inference;
mod main;