From adc3fcbf8fbece2ae3b06a64541fd1a9670cea3d Mon Sep 17 00:00:00 2001 From: Gilad S Date: Sun, 19 Oct 2025 18:23:15 +0300 Subject: [PATCH 1/9] fix(Vulkan): include integrated GPU memory --- docs/guide/embedding.md | 2 +- llama/addon/globals/getGpuInfo.cpp | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/docs/guide/embedding.md b/docs/guide/embedding.md index fa4f4167..8d712ea3 100644 --- a/docs/guide/embedding.md +++ b/docs/guide/embedding.md @@ -204,7 +204,7 @@ import DataBadge from "../../.vitepress/components/DataBadge/DataBadge.vue"; #### Embedded databases {#databases-embedded} -* **[LanceDB](https://lancedb.com/)** ([GitHub](https://github.com/lancedb/lancedb) | [npm](https://www.npmjs.com/package/@lancedb/lancedb) | [Quick start](https://lancedb.github.io/lancedb/basic/#__tabbed_1_2)) - Serverless vector database you can embed inside your application. No server required. +* **[LanceDB](https://lancedb.com/)** ([GitHub](https://github.com/lancedb/lancedb) | [npm](https://www.npmjs.com/package/@lancedb/lancedb) | [Quick start](https://www.npmjs.com/package/@lancedb/lancedb#usage)) - Serverless vector database you can embed inside your application. No server required.
* **Vectra** ([GitHub](https://github.com/Stevenic/vectra) | [npm](https://www.npmjs.com/package/vectra)) - local vector database using local files diff --git a/llama/addon/globals/getGpuInfo.cpp b/llama/addon/globals/getGpuInfo.cpp index 1bcde7a9..0e6550d9 100644 --- a/llama/addon/globals/getGpuInfo.cpp +++ b/llama/addon/globals/getGpuInfo.cpp @@ -27,7 +27,8 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) { for (size_t i = 0; i < ggml_backend_dev_count(); i++) { device = ggml_backend_dev_get(i); - if (ggml_backend_dev_type(device) == GGML_BACKEND_DEVICE_TYPE_GPU) { + auto deviceType = ggml_backend_dev_type(device); + if (deviceType == GGML_BACKEND_DEVICE_TYPE_GPU || deviceType == GGML_BACKEND_DEVICE_TYPE_IGPU) { deviceTotal = 0; deviceFree = 0; ggml_backend_dev_memory(device, &deviceFree, &deviceTotal); @@ -76,8 +77,8 @@ Napi::Value getGpuDeviceInfo(const Napi::CallbackInfo& info) { for (size_t i = 0; i < ggml_backend_dev_count(); i++) { ggml_backend_dev_t device = ggml_backend_dev_get(i); - if (ggml_backend_dev_type(device) == GGML_BACKEND_DEVICE_TYPE_GPU) { - + auto deviceType = ggml_backend_dev_type(device); + if (deviceType == GGML_BACKEND_DEVICE_TYPE_GPU || deviceType == GGML_BACKEND_DEVICE_TYPE_IGPU) { deviceNames.push_back(std::string(ggml_backend_dev_description(device))); } } From 6ae52b0c7c2072e4df57313a75edf456d740743e Mon Sep 17 00:00:00 2001 From: Gilad S Date: Sun, 19 Oct 2025 22:49:42 +0300 Subject: [PATCH 2/9] fix: adapt Llama chat wrappers to breaking `llama.cpp` changes --- src/chatWrappers/FunctionaryChatWrapper.ts | 28 ++++---- src/chatWrappers/Llama3ChatWrapper.ts | 14 ++-- src/chatWrappers/Llama3_1ChatWrapper.ts | 8 +-- .../Llama3_2LightweightChatWrapper.ts | 12 ++-- .../llama3.2/promptCompletion.test.ts | 6 +- .../FunctionaryChatWrapper.test.ts | 66 +++++++------------ .../chatWrappers/Llama3ChatWrapper.test.ts | 30 +++------ .../chatWrappers/Llama3_1ChatWrapper.test.ts | 33 ++++------ .../utils/resolveChatWrapper.test.ts | 16 ++--- 9 files changed, 85 insertions(+), 128 deletions(-) diff --git a/src/chatWrappers/FunctionaryChatWrapper.ts b/src/chatWrappers/FunctionaryChatWrapper.ts index 402939b1..8afac88f 100644 --- a/src/chatWrappers/FunctionaryChatWrapper.ts +++ b/src/chatWrappers/FunctionaryChatWrapper.ts @@ -39,13 +39,13 @@ export class FunctionaryChatWrapper extends ChatWrapper { prefix: LlamaText([ new SpecialTokensText("<|start_header_id|>tool<|end_header_id|>\n\n") ]), - suffix: LlamaText(new SpecialToken("EOT")) + suffix: LlamaText(new SpecialTokensText("<|eot_id|>")) }, parallelism: { call: { sectionPrefix: "", betweenCalls: "", - sectionSuffix: LlamaText(new SpecialToken("EOT")) + sectionSuffix: LlamaText(new SpecialTokensText("<|eot_id|>")) }, result: { sectionPrefix: "", @@ -72,13 +72,13 @@ export class FunctionaryChatWrapper extends ChatWrapper { "{{functionName}}", new SpecialTokensText("\n") ]), - suffix: LlamaText(new SpecialToken("EOT")) + suffix: LlamaText(new SpecialTokensText("<|eot_id|>")) }, parallelism: { call: { sectionPrefix: "", betweenCalls: "", - sectionSuffix: LlamaText(new SpecialToken("EOT")) + sectionSuffix: LlamaText(new SpecialTokensText("<|eot_id|>")) }, result: { sectionPrefix: "", @@ -155,13 +155,13 @@ export class FunctionaryChatWrapper extends ChatWrapper { return LlamaText([ new SpecialTokensText("<|start_header_id|>system<|end_header_id|>\n\n"), LlamaText.fromJSON(item.text), - new SpecialToken("EOT") + new SpecialTokensText("<|eot_id|>") ]); } else if (item.type === "user") { return LlamaText([ new SpecialTokensText("<|start_header_id|>user<|end_header_id|>\n\n"), item.text, - new SpecialToken("EOT") + new SpecialTokensText("<|eot_id|>") ]); } else if (item.type === "model") { if (isLastItem && item.response.length === 0) @@ -178,7 +178,7 @@ export class FunctionaryChatWrapper extends ChatWrapper { return; res.push(LlamaText(pendingFunctionCalls)); - res.push(LlamaText(new SpecialToken("EOT"))); + res.push(LlamaText(new SpecialTokensText("<|eot_id|>"))); res.push(LlamaText(pendingFunctionResults)); pendingFunctionResults.length = 0; @@ -206,7 +206,7 @@ export class FunctionaryChatWrapper extends ChatWrapper { response, (!isLastResponse || isLastItem) ? LlamaText([]) - : new SpecialToken("EOT") + : new SpecialTokensText("<|eot_id|>") ]) ]) ); @@ -232,7 +232,7 @@ export class FunctionaryChatWrapper extends ChatWrapper { response.result === undefined ? "" // "void" : jsonDumps(response.result), - new SpecialToken("EOT") + new SpecialTokensText("<|eot_id|>") ]) ); } else @@ -320,13 +320,13 @@ export class FunctionaryChatWrapper extends ChatWrapper { return LlamaText([ new SpecialTokensText("<|start_header_id|>system<|end_header_id|>\n\n"), LlamaText.fromJSON(item.text), - new SpecialToken("EOT") + new SpecialTokensText("<|eot_id|>") ]); } else if (item.type === "user") { return LlamaText([ new SpecialTokensText("<|start_header_id|>user<|end_header_id|>\n\n"), item.text, - new SpecialToken("EOT") + new SpecialTokensText("<|eot_id|>") ]); } else if (item.type === "model") { if (isLastItem && item.response.length === 0) @@ -343,7 +343,7 @@ export class FunctionaryChatWrapper extends ChatWrapper { return; res.push(LlamaText(pendingFunctionCalls)); - res.push(LlamaText(new SpecialToken("EOT"))); + res.push(LlamaText(new SpecialTokensText("<|eot_id|>"))); res.push(LlamaText(pendingFunctionResults)); pendingFunctionResults.length = 0; @@ -365,7 +365,7 @@ export class FunctionaryChatWrapper extends ChatWrapper { response, (isLastItem && isLastResponse) ? LlamaText([]) - : new SpecialToken("EOT") + : new SpecialTokensText("<|eot_id|>") ]) ); } else if (isChatModelResponseFunctionCall(response)) { @@ -392,7 +392,7 @@ export class FunctionaryChatWrapper extends ChatWrapper { response.result === undefined ? "" // "void" : jsonDumps(response.result), - new SpecialToken("EOT") + new SpecialTokensText("<|eot_id|>") ]) ); } else diff --git a/src/chatWrappers/Llama3ChatWrapper.ts b/src/chatWrappers/Llama3ChatWrapper.ts index a095a4f8..070f1343 100644 --- a/src/chatWrappers/Llama3ChatWrapper.ts +++ b/src/chatWrappers/Llama3ChatWrapper.ts @@ -34,13 +34,13 @@ export class Llama3ChatWrapper extends ChatWrapper { }, result: { prefix: LlamaText(new SpecialTokensText("<|start_header_id|>function_call_result<|end_header_id|>\n\n")), - suffix: LlamaText(new SpecialToken("EOT")) + suffix: LlamaText(new SpecialTokensText("<|eot_id|>")) }, parallelism: { call: { sectionPrefix: "", betweenCalls: "\n", - sectionSuffix: LlamaText(new SpecialToken("EOT")) + sectionSuffix: LlamaText(new SpecialTokensText("<|eot_id|>")) }, result: { sectionPrefix: "", @@ -62,11 +62,11 @@ export class Llama3ChatWrapper extends ChatWrapper { }, result: { prefix: LlamaText([ - LlamaText(new SpecialToken("EOT")), + LlamaText(new SpecialTokensText("<|eot_id|>")), new SpecialTokensText("<|start_header_id|>function_call_result<|end_header_id|>\n\n") ]), suffix: LlamaText([ - new SpecialToken("EOT"), + new SpecialTokensText("<|eot_id|>"), new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|>\n\n") ]) } @@ -147,7 +147,7 @@ export class Llama3ChatWrapper extends ChatWrapper { LlamaText([ new SpecialTokensText("<|start_header_id|>system<|end_header_id|>\n\n"), item.system, - new SpecialToken("EOT") + new SpecialTokensText("<|eot_id|>") ]) ); } @@ -157,7 +157,7 @@ export class Llama3ChatWrapper extends ChatWrapper { LlamaText([ new SpecialTokensText("<|start_header_id|>user<|end_header_id|>\n\n"), item.user, - new SpecialToken("EOT") + new SpecialTokensText("<|eot_id|>") ]) ); } @@ -169,7 +169,7 @@ export class Llama3ChatWrapper extends ChatWrapper { item.model, isLastItem ? LlamaText([]) - : new SpecialToken("EOT") + : new SpecialTokensText("<|eot_id|>") ]) ); } diff --git a/src/chatWrappers/Llama3_1ChatWrapper.ts b/src/chatWrappers/Llama3_1ChatWrapper.ts index 36290259..c2bdd3e5 100644 --- a/src/chatWrappers/Llama3_1ChatWrapper.ts +++ b/src/chatWrappers/Llama3_1ChatWrapper.ts @@ -29,7 +29,7 @@ export class Llama3_1ChatWrapper extends ChatWrapper { }, result: { prefix: LlamaText(new SpecialTokensText("\n<|start_header_id|>ipython<|end_header_id|>\n\n")), - suffix: LlamaText(new SpecialToken("EOT"), new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|>\n\n")) + suffix: LlamaText(new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n")) } } }; @@ -189,7 +189,7 @@ export class Llama3_1ChatWrapper extends ChatWrapper { LlamaText([ new SpecialTokensText("<|start_header_id|>system<|end_header_id|>\n\n"), item.system, - new SpecialToken("EOT") + new SpecialTokensText("<|eot_id|>") ]) ); } @@ -199,7 +199,7 @@ export class Llama3_1ChatWrapper extends ChatWrapper { LlamaText([ new SpecialTokensText("<|start_header_id|>user<|end_header_id|>\n\n"), item.user, - new SpecialToken("EOT") + new SpecialTokensText("<|eot_id|>") ]) ); } @@ -211,7 +211,7 @@ export class Llama3_1ChatWrapper extends ChatWrapper { item.model, isLastItem ? LlamaText([]) - : new SpecialToken("EOT") + : new SpecialTokensText("<|eot_id|>") ]) ); } diff --git a/src/chatWrappers/Llama3_2LightweightChatWrapper.ts b/src/chatWrappers/Llama3_2LightweightChatWrapper.ts index a9f6bb89..e849b28d 100644 --- a/src/chatWrappers/Llama3_2LightweightChatWrapper.ts +++ b/src/chatWrappers/Llama3_2LightweightChatWrapper.ts @@ -24,12 +24,12 @@ export class Llama3_2LightweightChatWrapper extends ChatWrapper { optionalPrefixSpace: true, prefix: '{"name": "', paramsPrefix: '", "parameters": ', - suffix: LlamaText("}", new SpecialToken("EOT")), + suffix: LlamaText("}", new SpecialTokensText("<|eot_id|>")), emptyCallParamsPlaceholder: {} }, result: { - prefix: LlamaText(new SpecialToken("EOT"), new SpecialTokensText("<|start_header_id|>ipython<|end_header_id|>\n\n")), - suffix: LlamaText(new SpecialToken("EOT"), new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|>\n\n")) + prefix: LlamaText(new SpecialTokensText("<|eot_id|><|start_header_id|>ipython<|end_header_id|>\n\n")), + suffix: LlamaText(new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n")) } } }; @@ -192,7 +192,7 @@ export class Llama3_2LightweightChatWrapper extends ChatWrapper { LlamaText([ new SpecialTokensText("<|start_header_id|>system<|end_header_id|>\n\n"), item.system, - new SpecialToken("EOT") + new SpecialTokensText("<|eot_id|>") ]) ); } @@ -202,7 +202,7 @@ export class Llama3_2LightweightChatWrapper extends ChatWrapper { LlamaText([ new SpecialTokensText("<|start_header_id|>user<|end_header_id|>\n\n"), item.user, - new SpecialToken("EOT") + new SpecialTokensText("<|eot_id|>") ]) ); } @@ -214,7 +214,7 @@ export class Llama3_2LightweightChatWrapper extends ChatWrapper { item.model, isLastItem ? LlamaText([]) - : new SpecialToken("EOT") + : new SpecialTokensText("<|eot_id|>") ]) ); } diff --git a/test/modelDependent/llama3.2/promptCompletion.test.ts b/test/modelDependent/llama3.2/promptCompletion.test.ts index a0db42f2..8c76051d 100644 --- a/test/modelDependent/llama3.2/promptCompletion.test.ts +++ b/test/modelDependent/llama3.2/promptCompletion.test.ts @@ -58,7 +58,7 @@ describe("llama 3.2", () => { You are a helpful, respectful and honest assistant. Always answer as helpfully as possible. If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.", - new SpecialToken("EOT"), + new SpecialToken("EOS"), new SpecialTokensText("<|start_header_id|>"), "user", new SpecialTokensText("<|end_header_id|>"), @@ -86,14 +86,14 @@ describe("llama 3.2", () => { You are a helpful, respectful and honest assistant. Always answer as helpfully as possible. If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.", - new SpecialToken("EOT"), + new SpecialToken("EOS"), new SpecialTokensText("<|start_header_id|>"), "user", new SpecialTokensText("<|end_header_id|>"), " Hi there!", - new SpecialToken("EOT"), + new SpecialToken("EOS"), new SpecialTokensText("<|start_header_id|>"), "assistant", new SpecialTokensText("<|end_header_id|>"), diff --git a/test/standalone/chatWrappers/FunctionaryChatWrapper.test.ts b/test/standalone/chatWrappers/FunctionaryChatWrapper.test.ts index 81c4b686..a330babe 100644 --- a/test/standalone/chatWrappers/FunctionaryChatWrapper.test.ts +++ b/test/standalone/chatWrappers/FunctionaryChatWrapper.test.ts @@ -199,23 +199,19 @@ describe("FunctionaryChatWrapper", () => { "), "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible. If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.", - new SpecialToken("EOT"), - new SpecialTokensText("<|start_header_id|>user<|end_header_id|> + new SpecialTokensText("<|eot_id|><|start_header_id|>user<|end_header_id|> "), "Hi there!", - new SpecialToken("EOT"), - new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|> + new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|> "), "Hello!", - new SpecialToken("EOT"), - new SpecialTokensText("<|start_header_id|>user<|end_header_id|> + new SpecialTokensText("<|eot_id|><|start_header_id|>user<|end_header_id|> "), "How are you?", - new SpecialToken("EOT"), - new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|> + new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|> "), "I'm good, how are you?", @@ -272,34 +268,28 @@ describe("FunctionaryChatWrapper", () => { }) => any; } // namespace functions", - new SpecialToken("EOT"), - new SpecialTokensText("<|start_header_id|>system<|end_header_id|> + new SpecialTokensText("<|eot_id|><|start_header_id|>system<|end_header_id|> "), "The assistant calls functions with appropriate input when necessary. The assistant writes <|stop|> when finished answering.", - new SpecialToken("EOT"), - new SpecialTokensText("<|start_header_id|>system<|end_header_id|> + new SpecialTokensText("<|eot_id|><|start_header_id|>system<|end_header_id|> "), "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible. If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.", - new SpecialToken("EOT"), - new SpecialTokensText("<|start_header_id|>user<|end_header_id|> + new SpecialTokensText("<|eot_id|><|start_header_id|>user<|end_header_id|> "), "Hi there!", - new SpecialToken("EOT"), - new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|> + new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|> "), "Hello!", - new SpecialToken("EOT"), - new SpecialTokensText("<|start_header_id|>user<|end_header_id|> + new SpecialTokensText("<|eot_id|><|start_header_id|>user<|end_header_id|> "), "Role a dice twice and tell me the total result", - new SpecialToken("EOT"), - new SpecialTokensText("<|reserved_special_token_249|>"), + new SpecialTokensText("<|eot_id|><|reserved_special_token_249|>"), "getRandomNumber", new SpecialTokensText(" "), @@ -309,24 +299,21 @@ describe("FunctionaryChatWrapper", () => { new SpecialTokensText(" "), "{"min": 1, "max": 6}", - new SpecialToken("EOT"), - new SpecialTokensText("<|start_header_id|>tool<|end_header_id|> + new SpecialTokensText("<|eot_id|><|start_header_id|>tool<|end_header_id|> name="), "getRandomNumber", new SpecialTokensText(" "), "3", - new SpecialToken("EOT"), - new SpecialTokensText("<|start_header_id|>tool<|end_header_id|> + new SpecialTokensText("<|eot_id|><|start_header_id|>tool<|end_header_id|> name="), "getRandomNumber", new SpecialTokensText(" "), "4", - new SpecialToken("EOT"), - new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|> + new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|> "), "The total result of rolling the dice twice is 3 + 4 = 7.", @@ -353,23 +340,19 @@ describe("FunctionaryChatWrapper", () => { "), "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible. If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.", - new SpecialToken("EOT"), - new SpecialTokensText("<|start_header_id|>user<|end_header_id|> + new SpecialTokensText("<|eot_id|><|start_header_id|>user<|end_header_id|> "), "Hi there!", - new SpecialToken("EOT"), - new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|> + new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|> "), "Hello!", - new SpecialToken("EOT"), - new SpecialTokensText("<|start_header_id|>user<|end_header_id|> + new SpecialTokensText("<|eot_id|><|start_header_id|>user<|end_header_id|> "), "How are you?", - new SpecialToken("EOT"), - new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|> + new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|> "), "I'm good, how are you?", @@ -384,28 +367,23 @@ describe("FunctionaryChatWrapper", () => { "), "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible. If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.", - new SpecialToken("EOT"), - new SpecialTokensText("<|start_header_id|>user<|end_header_id|> + new SpecialTokensText("<|eot_id|><|start_header_id|>user<|end_header_id|> "), "Hi there!", - new SpecialToken("EOT"), - new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|> + new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|> "), "Hello!", - new SpecialToken("EOT"), - new SpecialTokensText("<|start_header_id|>user<|end_header_id|> + new SpecialTokensText("<|eot_id|><|start_header_id|>user<|end_header_id|> "), "How are you?", - new SpecialToken("EOT"), - new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|> + new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|> "), "I'm good, how are you?", - new SpecialToken("EOT"), - new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|> + new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|> "), ]) diff --git a/test/standalone/chatWrappers/Llama3ChatWrapper.test.ts b/test/standalone/chatWrappers/Llama3ChatWrapper.test.ts index 55cc5d24..d8c9dee2 100644 --- a/test/standalone/chatWrappers/Llama3ChatWrapper.test.ts +++ b/test/standalone/chatWrappers/Llama3ChatWrapper.test.ts @@ -43,13 +43,11 @@ describe("Llama3ChatWrapper", () => { "), "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible. If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.", - new SpecialToken("EOT"), - new SpecialTokensText("<|start_header_id|>user<|end_header_id|> + new SpecialTokensText("<|eot_id|><|start_header_id|>user<|end_header_id|> "), "Hi there!", - new SpecialToken("EOT"), - new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|> + new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|> "), "Hello!", @@ -67,23 +65,19 @@ describe("Llama3ChatWrapper", () => { "), "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible. If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.", - new SpecialToken("EOT"), - new SpecialTokensText("<|start_header_id|>user<|end_header_id|> + new SpecialTokensText("<|eot_id|><|start_header_id|>user<|end_header_id|> "), "Hi there!", - new SpecialToken("EOT"), - new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|> + new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|> "), "Hello!", - new SpecialToken("EOT"), - new SpecialTokensText("<|start_header_id|>user<|end_header_id|> + new SpecialTokensText("<|eot_id|><|start_header_id|>user<|end_header_id|> "), "How are you?", - new SpecialToken("EOT"), - new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|> + new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|> "), "I'm good, how are you?", @@ -110,13 +104,11 @@ describe("Llama3ChatWrapper", () => { "), "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible. If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.", - new SpecialToken("EOT"), - new SpecialTokensText("<|start_header_id|>user<|end_header_id|> + new SpecialTokensText("<|eot_id|><|start_header_id|>user<|end_header_id|> "), "Hi there!", - new SpecialToken("EOT"), - new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|> + new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|> "), "Hello!", @@ -131,13 +123,11 @@ describe("Llama3ChatWrapper", () => { "), "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible. If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.", - new SpecialToken("EOT"), - new SpecialTokensText("<|start_header_id|>user<|end_header_id|> + new SpecialTokensText("<|eot_id|><|start_header_id|>user<|end_header_id|> "), "Hi there!", - new SpecialToken("EOT"), - new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|> + new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|> "), "Hello! diff --git a/test/standalone/chatWrappers/Llama3_1ChatWrapper.test.ts b/test/standalone/chatWrappers/Llama3_1ChatWrapper.test.ts index fe8530da..fab30cc9 100644 --- a/test/standalone/chatWrappers/Llama3_1ChatWrapper.test.ts +++ b/test/standalone/chatWrappers/Llama3_1ChatWrapper.test.ts @@ -75,13 +75,11 @@ describe("Llama3_1ChatWrapper", () => { You are a helpful, respectful and honest assistant. Always answer as helpfully as possible. If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.", - new SpecialToken("EOT"), - new SpecialTokensText("<|start_header_id|>user<|end_header_id|> + new SpecialTokensText("<|eot_id|><|start_header_id|>user<|end_header_id|> "), "Hi there!", - new SpecialToken("EOT"), - new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|> + new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|> "), "Hello!", @@ -143,23 +141,19 @@ describe("Llama3_1ChatWrapper", () => { You are a helpful, respectful and honest assistant. Always answer as helpfully as possible. If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.", - new SpecialToken("EOT"), - new SpecialTokensText("<|start_header_id|>user<|end_header_id|> + new SpecialTokensText("<|eot_id|><|start_header_id|>user<|end_header_id|> "), "Hi there!", - new SpecialToken("EOT"), - new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|> + new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|> "), "Hello!", - new SpecialToken("EOT"), - new SpecialTokensText("<|start_header_id|>user<|end_header_id|> + new SpecialTokensText("<|eot_id|><|start_header_id|>user<|end_header_id|> "), "What is the time?", - new SpecialToken("EOT"), - new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|> + new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|> { "), ""22:00:00"", - new SpecialToken("EOT"), - new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|> + new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|> "), "I'm good, how are you?", @@ -206,13 +199,11 @@ describe("Llama3_1ChatWrapper", () => { You are a helpful, respectful and honest assistant. Always answer as helpfully as possible. If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.", - new SpecialToken("EOT"), - new SpecialTokensText("<|start_header_id|>user<|end_header_id|> + new SpecialTokensText("<|eot_id|><|start_header_id|>user<|end_header_id|> "), "Hi there!", - new SpecialToken("EOT"), - new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|> + new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|> "), "Hello!", @@ -235,13 +226,11 @@ describe("Llama3_1ChatWrapper", () => { You are a helpful, respectful and honest assistant. Always answer as helpfully as possible. If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.", - new SpecialToken("EOT"), - new SpecialTokensText("<|start_header_id|>user<|end_header_id|> + new SpecialTokensText("<|eot_id|><|start_header_id|>user<|end_header_id|> "), "Hi there!", - new SpecialToken("EOT"), - new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|> + new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|> "), "Hello! diff --git a/test/standalone/chatWrappers/utils/resolveChatWrapper.test.ts b/test/standalone/chatWrappers/utils/resolveChatWrapper.test.ts index 745eb4a4..0523e7e2 100644 --- a/test/standalone/chatWrappers/utils/resolveChatWrapper.test.ts +++ b/test/standalone/chatWrappers/utils/resolveChatWrapper.test.ts @@ -63,8 +63,8 @@ const falconJinjaTemplate = ` `.slice(1, -1); const funcationaryJinjaTemplateV2 = "{% for message in messages %}\n{% if message['role'] == 'user' or message['role'] == 'system' %}\n{{ '<|from|>' + message['role'] + '\n<|recipient|>all\n<|content|>' + message['content'] + '\n' }}{% elif message['role'] == 'tool' %}\n{{ '<|from|>' + message['name'] + '\n<|recipient|>all\n<|content|>' + message['content'] + '\n' }}{% else %}\n{% set contain_content='no'%}\n{% if message['content'] is not none %}\n{{ '<|from|>assistant\n<|recipient|>all\n<|content|>' + message['content'] }}{% set contain_content='yes'%}\n{% endif %}\n{% if 'tool_calls' in message and message['tool_calls'] is not none %}\n{% for tool_call in message['tool_calls'] %}\n{% set prompt='<|from|>assistant\n<|recipient|>' + tool_call['function']['name'] + '\n<|content|>' + tool_call['function']['arguments'] %}\n{% if loop.index == 1 and contain_content == \"no\" %}\n{{ prompt }}{% else %}\n{{ '\n' + prompt}}{% endif %}\n{% endfor %}\n{% endif %}\n{{ '<|stop|>\n' }}{% endif %}\n{% endfor %}\n{% if add_generation_prompt %}{{ '<|from|>assistant\n<|recipient|>' }}{% endif %}"; -const funcationaryJinjaTemplateV2Llama3 = "{% for message in messages %}\n{% if message['role'] == 'user' or message['role'] == 'system' %}\n{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + message['content'] + eot_token }}{% elif message['role'] == 'tool' %}\n{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + 'name=' + message['name'] + '\n' + message['content'] + eot_token }}{% else %}\n{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'}}{% if message['content'] is not none %}\n{{ message['content'] }}{% endif %}\n{% if 'tool_calls' in message and message['tool_calls'] is not none %}\n{% for tool_call in message['tool_calls'] %}\n{{ '<|reserved_special_token_249|>' + tool_call['function']['name'] + '\n' + tool_call['function']['arguments'] }}{% endfor %}\n{% endif %}\n{{ eot_token }}{% endif %}\n{% endfor %}\n{% if add_generation_prompt %}{{ '<|start_header_id|>{role}<|end_header_id|>\n\n' }}{% endif %}"; -const funcationaryJinjaTemplateV3 = "{% for message in messages %}\n{% if message['role'] == 'user' or message['role'] == 'system' %}\n{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + message['content'] + eot_token }}{% elif message['role'] == 'tool' %}\n{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + message['content'] + eot_token }}{% else %}\n{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'}}{% if message['content'] is not none %}\n{{ '>>>all\n' + message['content'] }}{% endif %}\n{% if 'tool_calls' in message and message['tool_calls'] is not none %}\n{% for tool_call in message['tool_calls'] %}\n{{ '>>>' + tool_call['function']['name'] + '\n' + tool_call['function']['arguments'] }}{% endfor %}\n{% endif %}\n{{ eot_token }}{% endif %}\n{% endfor %}\n{% if add_generation_prompt %}{{ '<|start_header_id|>{role}<|end_header_id|>\n\n' }}{% endif %}"; +const funcationaryJinjaTemplateV2Llama3 = "{% for message in messages %}\n{% if message['role'] == 'user' or message['role'] == 'system' %}\n{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + message['content'] + '<|eot_id|>' }}{% elif message['role'] == 'tool' %}\n{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + 'name=' + message['name'] + '\n' + message['content'] + '<|eot_id|>' }}{% else %}\n{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'}}{% if message['content'] is not none %}\n{{ message['content'] }}{% endif %}\n{% if 'tool_calls' in message and message['tool_calls'] is not none %}\n{% for tool_call in message['tool_calls'] %}\n{{ '<|reserved_special_token_249|>' + tool_call['function']['name'] + '\n' + tool_call['function']['arguments'] }}{% endfor %}\n{% endif %}\n{{ '<|eot_id|>' }}{% endif %}\n{% endfor %}\n{% if add_generation_prompt %}{{ '<|start_header_id|>{role}<|end_header_id|>\n\n' }}{% endif %}"; +const funcationaryJinjaTemplateV3 = "{% for message in messages %}\n{% if message['role'] == 'user' or message['role'] == 'system' %}\n{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + message['content'] + '<|eot_id|>' }}{% elif message['role'] == 'tool' %}\n{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + message['content'] + '<|eot_id|>' }}{% else %}\n{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'}}{% if message['content'] is not none %}\n{{ '>>>all\n' + message['content'] }}{% endif %}\n{% if 'tool_calls' in message and message['tool_calls'] is not none %}\n{% for tool_call in message['tool_calls'] %}\n{{ '>>>' + tool_call['function']['name'] + '\n' + tool_call['function']['arguments'] }}{% endfor %}\n{% endif %}\n{{ '<|eot_id|>' }}{% endif %}\n{% endfor %}\n{% if add_generation_prompt %}{{ '<|start_header_id|>{role}<|end_header_id|>\n\n' }}{% endif %}"; const gemmaJinjaTemplate = ` {%- if messages[0]['role'] == 'system' %} @@ -125,7 +125,7 @@ const llama2ChatJinjaTemplate = ` const llama3ChatJinjaTemplate = ` {%- set loop_messages = messages -%} {%- for message in loop_messages -%} - {%- set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + eot_token -%} + {%- set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' -%} {%- if loop.index0 == 0 -%} {%- set content = bos_token + content -%} {%- endif -%} @@ -177,7 +177,7 @@ const llama3_1ChatJinjaTemplate = ` {%- endfor %} {%- endif %} {{- system_message }} -{{- eot_token }} +{{- '<|eot_id|>' }} {#- Custom tools are passed in a user message with some extra guidance #} {%- if tools_in_user_message and not tools is none %} @@ -197,12 +197,12 @@ const llama3_1ChatJinjaTemplate = ` {{- t | tojson(indent=4) }} {{- "\\n\\n" }} {%- endfor %} - {{- first_user_message + eot_token}} + {{- first_user_message + '<|eot_id|>'}} {%- endif %} {%- for message in messages %} {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} - {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + eot_token }} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }} {%- elif 'tool_calls' in message %} {%- if not message.tool_calls|length == 1 %} {{- raise_exception("This model only supports single tool-calls at once!") }} @@ -229,7 +229,7 @@ const llama3_1ChatJinjaTemplate = ` {#- This means we're in ipython mode #} {{- "<|eom_id|>" }} {%- else %} - {{- eot_token }} + {{- '<|eot_id|>' }} {%- endif %} {%- elif message.role == "tool" or message.role == "ipython" %} {{- "<|start_header_id|>ipython<|end_header_id|>\\n\\n" }} @@ -238,7 +238,7 @@ const llama3_1ChatJinjaTemplate = ` {%- else %} {{- message.content }} {%- endif %} - {{- eot_token }} + {{- '<|eot_id|>' }} {%- endif %} {%- endfor %} {%- if add_generation_prompt %} From e2794652118e2c166d12f4d6c70d5f11b6809c4c Mon Sep 17 00:00:00 2001 From: Gilad S Date: Sun, 19 Oct 2025 22:50:06 +0300 Subject: [PATCH 3/9] fix: internal log level --- src/bindings/Llama.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/bindings/Llama.ts b/src/bindings/Llama.ts index cb5f042c..e65060a5 100644 --- a/src/bindings/Llama.ts +++ b/src/bindings/Llama.ts @@ -684,6 +684,8 @@ function getTransformedLogLevel(level: LlamaLogLevel, message: string, gpu: Buil return LlamaLogLevel.info; else if (level === LlamaLogLevel.warn && message.startsWith("load: special_eog_ids contains both '<|return|>' and '<|call|>' tokens, removing '<|end|>' token from EOG list")) return LlamaLogLevel.info; + else if (level === LlamaLogLevel.warn && message.startsWith("llama_init_from_model: model default pooling_type is [0], but [-1] was specified")) + return LlamaLogLevel.info; else if (gpu === false && level === LlamaLogLevel.warn && message.startsWith("llama_adapter_lora_init_impl: lora for '") && message.endsWith("' cannot use buft 'CPU_REPACK', fallback to CPU")) return LlamaLogLevel.info; From 76aea27aca02818a24d52559272905c134682848 Mon Sep 17 00:00:00 2001 From: Gilad S Date: Sun, 19 Oct 2025 22:52:40 +0300 Subject: [PATCH 4/9] docs(Vulkan): recommend installing LLVM on Windows --- docs/guide/Vulkan.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/docs/guide/Vulkan.md b/docs/guide/Vulkan.md index 37e65573..0a089c6c 100644 --- a/docs/guide/Vulkan.md +++ b/docs/guide/Vulkan.md @@ -65,6 +65,11 @@ If you see `Vulkan used VRAM` in the output, it means that Vulkan support is wor reg add "HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Control\FileSystem" /v "LongPathsEnabled" /t REG_DWORD /d "1" /f ``` ::: +* :::details Windows only: LLVM (optional, recommended if you have build issues) + There are a few methods to install LLVM: + * **As part of Microsoft Visual C++ Build Tools (Recommended):** the dependencies for Window listed under [Downloading a Release](./building-from-source.md#downloading-a-release) will also install LLVM. + * **Independently:** visit the [latest LLVM release page](https://github.com/llvm/llvm-project/releases/latest) and download an installer for your Windows architecture. + ::: ### Building From Source When you use the [`getLlama`](../api/functions/getLlama) method, if there's no binary that matches the provided options, it'll automatically build `llama.cpp` from source. From 8b414c136f4fceb2ffd09783326a5fa77f243711 Mon Sep 17 00:00:00 2001 From: Gilad S Date: Mon, 20 Oct 2025 19:36:28 +0300 Subject: [PATCH 5/9] fix(Vulkan): deduplicate the same device coming from different drivers --- llama/gpuInfo/vulkan-gpu-info.cpp | 95 ++++++++++++++++++++++++++++++- 1 file changed, 93 insertions(+), 2 deletions(-) diff --git a/llama/gpuInfo/vulkan-gpu-info.cpp b/llama/gpuInfo/vulkan-gpu-info.cpp index 72beb2b9..7fd4f43d 100644 --- a/llama/gpuInfo/vulkan-gpu-info.cpp +++ b/llama/gpuInfo/vulkan-gpu-info.cpp @@ -1,16 +1,107 @@ #include +#include #include #include +constexpr uint32_t VK_VENDOR_ID_AMD = 0x1002; +constexpr uint32_t VK_VENDOR_ID_APPLE = 0x106b; +constexpr uint32_t VK_VENDOR_ID_INTEL = 0x8086; +constexpr uint32_t VK_VENDOR_ID_NVIDIA = 0x10de; + typedef void (*gpuInfoVulkanWarningLogCallback_t)(const char* message); -static bool enumerateVulkanDevices(size_t* total, size_t* used, size_t* unifiedMemorySize, bool addDeviceNames, std::vector * deviceNames, gpuInfoVulkanWarningLogCallback_t warningLogCallback, bool * checkSupported) { +static vk::Instance vulkanInstance() { vk::ApplicationInfo appInfo("node-llama-cpp GPU info", 1, "llama.cpp", 1, VK_API_VERSION_1_2); vk::InstanceCreateInfo createInfo(vk::InstanceCreateFlags(), &appInfo, {}, {}); - vk::Instance instance = vk::createInstance(createInfo); + return vk::createInstance(createInfo); +} +static std::vector dedupedDevices() { + vk::Instance instance = vulkanInstance(); auto physicalDevices = instance.enumeratePhysicalDevices(); + std::vector dedupedDevices; + dedupedDevices.reserve(physicalDevices.size()); + + // adapted from `ggml_vk_instance_init` in `ggml-vulkan.cpp` + for (const auto& device : physicalDevices) { + vk::PhysicalDeviceProperties2 newProps; + vk::PhysicalDeviceDriverProperties newDriver; + vk::PhysicalDeviceIDProperties newId; + newProps.pNext = &newDriver; + newDriver.pNext = &newId; + device.getProperties2(&newProps); + + auto oldDevice = std::find_if( + dedupedDevices.begin(), + dedupedDevices.end(), + [&newId](const vk::PhysicalDevice& oldDevice) { + vk::PhysicalDeviceProperties2 oldProps; + vk::PhysicalDeviceDriverProperties oldDriver; + vk::PhysicalDeviceIDProperties oldId; + oldProps.pNext = &oldDriver; + oldDriver.pNext = &oldId; + oldDevice.getProperties2(&oldProps); + + bool equals = std::equal(std::begin(oldId.deviceUUID), std::end(oldId.deviceUUID), std::begin(newId.deviceUUID)); + equals |= oldId.deviceLUIDValid && newId.deviceLUIDValid && + std::equal(std::begin(oldId.deviceLUID), std::end(oldId.deviceLUID), std::begin(newId.deviceLUID)); + + return equals; + } + ); + + if (oldDevice == dedupedDevices.end()) { + dedupedDevices.push_back(device); + continue; + } + + vk::PhysicalDeviceProperties2 oldProps; + vk::PhysicalDeviceDriverProperties oldDriver; + oldProps.pNext = &oldDriver; + oldDevice->getProperties2(&oldProps); + + std::map driverPriorities {}; + int oldPriority = std::numeric_limits::max(); + int newPriority = std::numeric_limits::max(); + + switch (oldProps.properties.vendorID) { + case VK_VENDOR_ID_AMD: + driverPriorities[vk::DriverId::eMesaRadv] = 1; + driverPriorities[vk::DriverId::eAmdOpenSource] = 2; + driverPriorities[vk::DriverId::eAmdProprietary] = 3; + break; + case VK_VENDOR_ID_INTEL: + driverPriorities[vk::DriverId::eIntelOpenSourceMESA] = 1; + driverPriorities[vk::DriverId::eIntelProprietaryWindows] = 2; + break; + case VK_VENDOR_ID_NVIDIA: + driverPriorities[vk::DriverId::eNvidiaProprietary] = 1; +#if defined(VK_API_VERSION_1_3) && VK_HEADER_VERSION >= 235 + driverPriorities[vk::DriverId::eMesaNvk] = 2; +#endif + break; + } + driverPriorities[vk::DriverId::eMesaDozen] = 4; + + if (driverPriorities.count(oldDriver.driverID)) { + oldPriority = driverPriorities[oldDriver.driverID]; + } + if (driverPriorities.count(newDriver.driverID)) { + newPriority = driverPriorities[newDriver.driverID]; + } + + if (newPriority < oldPriority) { + dedupedDevices.erase(std::remove(dedupedDevices.begin(), dedupedDevices.end(), *oldDevice), dedupedDevices.end()); + dedupedDevices.push_back(device); + } + } + + return dedupedDevices; +} + +static bool enumerateVulkanDevices(size_t* total, size_t* used, size_t* unifiedMemorySize, bool addDeviceNames, std::vector * deviceNames, gpuInfoVulkanWarningLogCallback_t warningLogCallback, bool * checkSupported) { + auto physicalDevices = dedupedDevices(); size_t usedMem = 0; size_t totalMem = 0; From 1b4048ff3cf13f97ddbdf1e1dd871dc131148376 Mon Sep 17 00:00:00 2001 From: Gilad S Date: Mon, 20 Oct 2025 19:37:39 +0300 Subject: [PATCH 6/9] fix: more explicit type --- llama/gpuInfo/vulkan-gpu-info.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llama/gpuInfo/vulkan-gpu-info.cpp b/llama/gpuInfo/vulkan-gpu-info.cpp index 7fd4f43d..54657e71 100644 --- a/llama/gpuInfo/vulkan-gpu-info.cpp +++ b/llama/gpuInfo/vulkan-gpu-info.cpp @@ -4,10 +4,10 @@ #include -constexpr uint32_t VK_VENDOR_ID_AMD = 0x1002; -constexpr uint32_t VK_VENDOR_ID_APPLE = 0x106b; -constexpr uint32_t VK_VENDOR_ID_INTEL = 0x8086; -constexpr uint32_t VK_VENDOR_ID_NVIDIA = 0x10de; +constexpr std::uint32_t VK_VENDOR_ID_AMD = 0x1002; +constexpr std::uint32_t VK_VENDOR_ID_APPLE = 0x106b; +constexpr std::uint32_t VK_VENDOR_ID_INTEL = 0x8086; +constexpr std::uint32_t VK_VENDOR_ID_NVIDIA = 0x10de; typedef void (*gpuInfoVulkanWarningLogCallback_t)(const char* message); From 41eb5103f9208012ed8cc956890f534a53905509 Mon Sep 17 00:00:00 2001 From: Gilad S Date: Mon, 20 Oct 2025 20:09:35 +0300 Subject: [PATCH 7/9] fix(Vulkan): build --- llama/gpuInfo/vulkan-gpu-info.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llama/gpuInfo/vulkan-gpu-info.cpp b/llama/gpuInfo/vulkan-gpu-info.cpp index 54657e71..f2981b56 100644 --- a/llama/gpuInfo/vulkan-gpu-info.cpp +++ b/llama/gpuInfo/vulkan-gpu-info.cpp @@ -62,8 +62,8 @@ static std::vector dedupedDevices() { oldDevice->getProperties2(&oldProps); std::map driverPriorities {}; - int oldPriority = std::numeric_limits::max(); - int newPriority = std::numeric_limits::max(); + int oldPriority = 1000; + int newPriority = 1000; switch (oldProps.properties.vendorID) { case VK_VENDOR_ID_AMD: From 794953b1959addf5b4823c13f007f666685dc68b Mon Sep 17 00:00:00 2001 From: Gilad S Date: Sun, 26 Oct 2025 16:36:42 +0200 Subject: [PATCH 8/9] fix(Vulkan): device deduplication logic --- llama/gpuInfo/vulkan-gpu-info.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/llama/gpuInfo/vulkan-gpu-info.cpp b/llama/gpuInfo/vulkan-gpu-info.cpp index f2981b56..1cb00d78 100644 --- a/llama/gpuInfo/vulkan-gpu-info.cpp +++ b/llama/gpuInfo/vulkan-gpu-info.cpp @@ -44,8 +44,10 @@ static std::vector dedupedDevices() { oldDevice.getProperties2(&oldProps); bool equals = std::equal(std::begin(oldId.deviceUUID), std::end(oldId.deviceUUID), std::begin(newId.deviceUUID)); - equals |= oldId.deviceLUIDValid && newId.deviceLUIDValid && - std::equal(std::begin(oldId.deviceLUID), std::end(oldId.deviceLUID), std::begin(newId.deviceLUID)); + equals = equals || ( + oldId.deviceLUIDValid && newId.deviceLUIDValid && + std::equal(std::begin(oldId.deviceLUID), std::end(oldId.deviceLUID), std::begin(newId.deviceLUID)) + ); return equals; } From d912d236557a4ce8772570fb9dec3d41a82557b0 Mon Sep 17 00:00:00 2001 From: Gilad S Date: Sun, 26 Oct 2025 16:42:52 +0200 Subject: [PATCH 9/9] docs: typo --- docs/guide/Vulkan.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/guide/Vulkan.md b/docs/guide/Vulkan.md index 0a089c6c..cb66ab3d 100644 --- a/docs/guide/Vulkan.md +++ b/docs/guide/Vulkan.md @@ -68,7 +68,7 @@ If you see `Vulkan used VRAM` in the output, it means that Vulkan support is wor * :::details Windows only: LLVM (optional, recommended if you have build issues) There are a few methods to install LLVM: * **As part of Microsoft Visual C++ Build Tools (Recommended):** the dependencies for Window listed under [Downloading a Release](./building-from-source.md#downloading-a-release) will also install LLVM. - * **Independently:** visit the [latest LLVM release page](https://github.com/llvm/llvm-project/releases/latest) and download an installer for your Windows architecture. + * **Independently:** visit the [latest LLVM release page](https://github.com/llvm/llvm-project/releases/latest) and download the installer for your Windows architecture. ::: ### Building From Source