Skip to content

Commit 7c02fe6

Browse files
avijit-chakrobortyAvijit
andauthored
Add LoRA adapters for the SLM Engine (#1481)
This PR addresses the following: - Modified APIs for creating the engine and running generation, more KPI reporting - New APIs supporting LoRA adapters - See slm_engine.h to learn about the new APIs - Build script updates for building using Qualcomm QNN dependencies. - More unit tests - especially as examples of the LoRA APIs --------- Co-authored-by: Avijit <avijitc@microsoft.com>
1 parent 63af5fb commit 7c02fe6

File tree

14 files changed

+722
-115
lines changed

14 files changed

+722
-115
lines changed

examples/slm_engine/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -374,7 +374,7 @@ target_link_libraries(inference_server slm_engine ort ort_genai)
374374

375375
int main(int argc, char **argv) {
376376

377-
auto slm_engine = microsoft::slm_engine::SLMEngine::CreateEngine(
377+
auto slm_engine = microsoft::slm_engine::SLMEngine::Create(
378378
"path to ONNX Model Directory", "phi", true);
379379

380380
if (!slm_engine) {

examples/slm_engine/build_scripts/build_android.sh

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,40 @@ set -u
2222
# Build the docker image
2323
docker build -t slm-engine-builder -f Dockerfile .
2424

25-
# Run the docker to build dependencies
26-
docker run --rm -v \
27-
`pwd`/../../../:`pwd`/../../../ \
28-
-u $(id -u):$(id -g) -w `pwd` \
29-
slm-engine-builder python3 build_deps.py \
25+
# Define base build_deps command
26+
BUILD_DEPS_CMD="python3 build_deps.py \
3027
--build_ort_from_source \
3128
--android_sdk_path /opt/android-sdk/ \
32-
--android_ndk_path /opt/android-sdk/ndk/27.2.12479018/
29+
--android_ndk_path /opt/android-sdk/ndk/27.2.12479018/"
30+
31+
# Docker volume mount options
32+
VOLUME_MOUNTS="-v `pwd`/../../../:`pwd`/../../../"
33+
34+
# Check if USE_ORT_VERSION is defined
35+
if [ ! -z "${USE_ORT_VERSION:-}" ]; then
36+
BUILD_DEPS_CMD="$BUILD_DEPS_CMD --ort_version_to_use $USE_ORT_VERSION"
37+
echo "Using ONNX Runtime version: $USE_ORT_VERSION"
38+
fi
39+
40+
# Check if QNN_SDK_HOME is defined
41+
if [ ! -z "${QNN_SDK_HOME:-}" ]; then
42+
# Create Docker mount point for QNN SDK
43+
QNN_SDK_DOCKER_PATH="/opt/qnn_sdk"
44+
45+
# Add mount for QNN SDK
46+
VOLUME_MOUNTS="$VOLUME_MOUNTS -v $QNN_SDK_HOME:$QNN_SDK_DOCKER_PATH"
47+
48+
# Use the Docker path in build command
49+
BUILD_DEPS_CMD="$BUILD_DEPS_CMD --qnn_sdk_path $QNN_SDK_DOCKER_PATH"
50+
51+
echo "QNN SDK path detected, building with QNN support"
52+
echo "Mounting $QNN_SDK_HOME to $QNN_SDK_DOCKER_PATH in container"
53+
fi
54+
55+
# Run the docker to build dependencies
56+
docker run --rm $VOLUME_MOUNTS \
57+
-u $(id -u):$(id -g) -w `pwd` \
58+
slm-engine-builder $BUILD_DEPS_CMD
3359

3460
# Next build the slm_engine
3561
docker run --rm -v \

examples/slm_engine/build_scripts/build_deps.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,11 @@ def build_ort(args, build_dir, artifacts_dir):
228228
]
229229
)
230230
if args.qnn_sdk_path:
231-
cmd_args.extend(["--use_qnn", "--qnn_home", args.qnn_sdk_path])
231+
cmd_args.extend(
232+
["--use_qnn", "static_lib", "--qnn_home", args.qnn_sdk_path]
233+
)
234+
235+
cmd_args.extend(["--cmake_extra_defines", "onnxruntime_BUILD_UNIT_TESTS=OFF"])
232236

233237
# now build the ORT library
234238
print(f"{MAGENTA}Building ONNX Runtime{CLEAR}")
@@ -587,9 +591,9 @@ def main():
587591
ort_home = None
588592
if args.build_ort_from_source:
589593
if args.ort_version_to_use is None:
590-
# If not Windows then use 1.20.1
594+
# If not Windows then use 1.22.0
591595
if platform.system() != "Windows":
592-
args.ort_version_to_use = "v1.20.1"
596+
args.ort_version_to_use = "v1.22.0"
593597
else:
594598
args.ort_version_to_use = "main"
595599
ort_home = build_ort(args, dep_src_dir, artifacts_dir)
@@ -600,7 +604,7 @@ def main():
600604
# The ORT binaries are available as they were downloaded during the GenAI build
601605
# This is the supported version for most platforms
602606
if args.ort_version_to_use is None:
603-
ORT_VERSION = "1.20.1"
607+
ORT_VERSION = "1.22.0"
604608
else:
605609
ORT_VERSION = args.ort_version_to_use
606610
# Copy the ORT artifacts to the artifacts directory.
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
#!/bin/sh
2+
3+
# This script builds the slm_engine for Android using docker.
4+
# It uses the Dockerfile in the current directory to build a docker image
5+
# that contains all the necessary dependencies for building the slm_engine.
6+
# The script then runs the docker image to build the slm_engine.
7+
# The script assumes that the Dockerfile is in the same directory as this script.
8+
# The script also assumes that the android-sdk and android-ndk are installed
9+
# in the /opt/android-sdk directory.
10+
#
11+
12+
# Check the architecture
13+
if [ "$(uname -m)" != "x86_64" ]; then
14+
echo "This script is intended to run on x86_64 architecture only."
15+
exit 1
16+
fi
17+
18+
set -e
19+
set -x
20+
set -u
21+
22+
# Build the docker image
23+
docker build -t slm-engine-builder -f Dockerfile .
24+
25+
# Define base build_deps command
26+
BUILD_DEPS_CMD="python3 build_deps.py \
27+
--build_ort_from_source \
28+
--android_sdk_path /opt/android-sdk/ \
29+
--android_ndk_path /opt/android-sdk/ndk/27.2.12479018/"
30+
31+
# Docker volume mount options
32+
VOLUME_MOUNTS="-v `pwd`/../../../:`pwd`/../../../"
33+
34+
# Check if USE_ORT_VERSION is defined
35+
if [ ! -z "${USE_ORT_VERSION:-}" ]; then
36+
BUILD_DEPS_CMD="$BUILD_DEPS_CMD --ort_version_to_use $USE_ORT_VERSION"
37+
echo "Using ONNX Runtime version: $USE_ORT_VERSION"
38+
fi
39+
40+
# Check if QNN_SDK_HOME is defined
41+
if [ ! -z "${QNN_SDK_HOME:-}" ]; then
42+
# Create Docker mount point for QNN SDK
43+
QNN_SDK_DOCKER_PATH="/opt/qnn_sdk"
44+
45+
# Add mount for QNN SDK
46+
VOLUME_MOUNTS="$VOLUME_MOUNTS -v $QNN_SDK_HOME:$QNN_SDK_DOCKER_PATH"
47+
48+
# Use the Docker path in build command
49+
BUILD_DEPS_CMD="$BUILD_DEPS_CMD --qnn_sdk_path $QNN_SDK_DOCKER_PATH"
50+
51+
echo "QNN SDK path detected, building with QNN support"
52+
echo "Mounting $QNN_SDK_HOME to $QNN_SDK_DOCKER_PATH in container"
53+
fi
54+
55+
# Run the docker to build dependencies
56+
docker run --rm $VOLUME_MOUNTS \
57+
-u $(id -u):$(id -g) -w `pwd` \
58+
-w $HOME \
59+
-it slm-engine-builder bash

examples/slm_engine/src/VERSION.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
1.0.0
1+
2.0.0

examples/slm_engine/src/cpp/gtest_main.cpp

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,48 @@
1+
12
#include "gtest/gtest.h"
3+
#include <argparse/argparse.hpp>
4+
25
#include "httplib.h"
36
#include "ort_genai.h"
47

8+
using namespace std;
9+
10+
extern const char* MODEL_FILE_PATH;
11+
extern const char* ADAPTER_ROOT_DIR;
12+
513
int main(int argc, char** argv) {
614
testing::InitGoogleTest(&argc, argv);
715

16+
argparse::ArgumentParser program("slm_engine_test", "1.0",
17+
argparse ::default_arguments::none);
18+
string model_path;
19+
program.add_argument("-m", "--model_path")
20+
.help("Path to the model file")
21+
.store_into(model_path);
22+
23+
string adapter_root_path;
24+
program.add_argument("-m", "--adapter_root_path")
25+
.help("Path to the LoRA adapter root directory")
26+
.store_into(adapter_root_path);
27+
28+
try {
29+
program.parse_args(argc, argv);
30+
} catch (const std::exception& err) {
31+
std::cerr << err.what() << std::endl;
32+
std::cerr << program;
33+
std::exit(-1);
34+
}
35+
36+
if (!model_path.empty()) {
37+
cout << "Setting Model path: " << model_path << endl;
38+
MODEL_FILE_PATH = model_path.c_str();
39+
}
40+
41+
if (!adapter_root_path.empty()) {
42+
cout << "Setting Adapter path: " << adapter_root_path << endl;
43+
ADAPTER_ROOT_DIR = adapter_root_path.c_str();
44+
}
45+
846
auto status = RUN_ALL_TESTS();
947

1048
OgaShutdown();

examples/slm_engine/src/cpp/input_decoder.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ namespace slm_engine {
1616
// clang-format off
1717
// OpenAI API example
1818
// {
19+
// "model": "name-of-the-adapter (optional)",
1920
// "messages": [
2021
// {
2122
// "role": "system",
@@ -59,6 +60,9 @@ class OpenAIInputDecoder : public InputDecoder {
5960
return false;
6061
}
6162
}
63+
if (json_msg.contains("model")) {
64+
decoded_params.LoRAAdapterName = json_msg["model"].get<string>();
65+
}
6266
if (json_msg.contains("temperature")) {
6367
decoded_params.Temperature =
6468
json_msg["temperature"].get<float_t>();

examples/slm_engine/src/cpp/input_decoder.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@ class InputDecoder {
4242
std::vector<std::pair<Role, std::string>> Messages;
4343
// The user prompt is the last message in the sequence
4444
std::string UserPrompt;
45+
// The LoRAAdapterName is sent by the client as "model" in the
46+
// OpenAI API. In our implementation, this is the name of the adapter that will be used
47+
std::string LoRAAdapterName;
4548
uint32_t MaxGeneratedTokens;
4649
std::vector<std::string> StopTokens;
4750
float Temperature;

0 commit comments

Comments
 (0)