# 目标代码生成

## 目标代码静态初始化与回调

In [None]:
import tvm
from tvm import te
import ctypes
import numpy as np

静态初始化：

In [None]:
dtype = "int64"
n = te.size_var("n")
Ab = tvm.tir.decl_buffer((n,), dtype)
i = te.size_var("i")
ib = tvm.tir.ir_builder.create()
A = ib.buffer_ptr(Ab)
cp = te.thread_axis((0, 1), "cop")
finit = tvm.tir.StringImm("TVMBackendRunOnce")
ib.scope_attr(cp, "coproc_uop_scope", finit)
with ib.for_range(0, n, "i", kind="parallel") as i:
    A[i] = A[i] + 1
stmt = ib.get()

mod = tvm.IRModule.from_expr(tvm.tir.PrimFunc([Ab], stmt).with_attr("global_symbol", "ramp"))
f = tvm.driver.build(mod, target="llvm")
a = tvm.nd.array(np.zeros(10, dtype=dtype))
f(a)
f(a)
np.testing.assert_equal(a.numpy(), np.ones(a.shape[0]))

静态回调：

In [None]:
dtype = "int64"
n = te.size_var("n")
Ab = tvm.tir.decl_buffer((n,), dtype)
i = te.size_var("i")
ib = tvm.tir.ir_builder.create()
handle = tvm.tir.call_intrin("handle", "tir.tvm_static_handle")
ib.emit(tvm.tir.call_packed("test_static_callback", handle, Ab))

@tvm.register_func("test_static_callback")
def test_cb(sh, A):
    assert isinstance(sh, ctypes.c_void_p)
    return sh

stmt = ib.get()
mod = tvm.IRModule.from_expr(tvm.tir.PrimFunc([Ab], stmt).with_attr("global_symbol", "ramp"))
f = tvm.driver.build(mod, target="llvm")
a = tvm.nd.array(np.zeros(10, dtype=dtype))
f(a)

## 简单的 pack

In [1]:
import tvm
from tvm import te
import numpy as np

In [2]:
def extern_generator(ins, outs):
    """手动写 IR 为 extern 函数，并流程化。"""
    return tvm.tir.call_packed("my_extern_array_func1", ins[0], outs[0])

@tvm.register_func
def my_extern_array_func1(aa, bb):
    aa.copyto(bb)

In [3]:
nn = 1024
n = tvm.runtime.convert(nn)
A = te.placeholder((n,), name="A")
C = te.extern(A.shape, [A], extern_generator, name="C")
s = te.create_schedule(C.op)

# 构建并调用 kernel
f = tvm.build(s, [A, C], "llvm")
dev = tvm.cpu(0)
# 启动 kernel
n = nn
a = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), dev)
c = tvm.nd.array(np.zeros(n, dtype=C.dtype), dev)

f(a, c)
np.testing.assert_allclose(c.numpy(), a.numpy())

In [5]:
def extern_generator(ins, outs):
    """手动写 IR 为 extern 函数，添加管道。"""
    return tvm.tir.call_packed("my_extern_array_func2", ins[0], outs[0])

@tvm.register_func
def my_extern_array_func2(aa, bb):
    assert aa.shape == a.shape
    np.testing.assert_allclose(aa.numpy(), a.numpy() + 1)
    aa.copyto(bb)

In [6]:
nn = 1024
n = tvm.runtime.convert(nn)
A = te.placeholder((n,), name="A")
B = te.compute((n,), lambda i: A[i] + 1, name="B")
C = te.extern(B.shape, [B], extern_generator, name="C")
s = te.create_schedule(C.op)

In [8]:
# 构建并调用 kernel
f = tvm.build(s, [A, C], "llvm")
dev = tvm.cpu(0)
# 启动 kernel
n = nn
a = tvm.nd.array(np.random.uniform(size=n).astype(A.dtype), dev)
c = tvm.nd.array(np.zeros(n, dtype=C.dtype), dev)

f(a, c)
np.testing.assert_allclose(c.numpy(), a.numpy() + 1)