From adc3fcbf8fbece2ae3b06a64541fd1a9670cea3d Mon Sep 17 00:00:00 2001
From: Gilad S <giladgd@gmail.com>
Date: Sun, 19 Oct 2025 18:23:15 +0300
Subject: [PATCH 1/9] fix(Vulkan): include integrated GPU memory

---
 docs/guide/embedding.md            | 2 +-
 llama/addon/globals/getGpuInfo.cpp | 7 ++++---
 2 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/docs/guide/embedding.md b/docs/guide/embedding.md
index fa4f4167..8d712ea3 100644
--- a/docs/guide/embedding.md
+++ b/docs/guide/embedding.md
@@ -204,7 +204,7 @@ import DataBadge from "../../.vitepress/components/DataBadge/DataBadge.vue";
 </script>
 
 #### Embedded databases {#databases-embedded}
-* **[LanceDB](https://lancedb.com/)** ([GitHub](https://github.com/lancedb/lancedb) | [npm](https://www.npmjs.com/package/@lancedb/lancedb) | [Quick start](https://lancedb.github.io/lancedb/basic/#__tabbed_1_2)) - Serverless vector database you can embed inside your application. No server required.
+* **[LanceDB](https://lancedb.com/)** ([GitHub](https://github.com/lancedb/lancedb) | [npm](https://www.npmjs.com/package/@lancedb/lancedb) | [Quick start](https://www.npmjs.com/package/@lancedb/lancedb#usage)) - Serverless vector database you can embed inside your application. No server required.
   <br/><DataBadge title="Written in" content="Rust"/><DataBadge title="License" content="Apache-2.0"/>
 
 * **Vectra** ([GitHub](https://github.com/Stevenic/vectra) | [npm](https://www.npmjs.com/package/vectra)) - local vector database using local files
diff --git a/llama/addon/globals/getGpuInfo.cpp b/llama/addon/globals/getGpuInfo.cpp
index 1bcde7a9..0e6550d9 100644
--- a/llama/addon/globals/getGpuInfo.cpp
+++ b/llama/addon/globals/getGpuInfo.cpp
@@ -27,7 +27,8 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
 
     for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
         device = ggml_backend_dev_get(i);
-        if (ggml_backend_dev_type(device) == GGML_BACKEND_DEVICE_TYPE_GPU) {
+        auto deviceType = ggml_backend_dev_type(device);
+        if (deviceType == GGML_BACKEND_DEVICE_TYPE_GPU || deviceType == GGML_BACKEND_DEVICE_TYPE_IGPU) {
             deviceTotal = 0;
             deviceFree = 0;
             ggml_backend_dev_memory(device, &deviceFree, &deviceTotal);
@@ -76,8 +77,8 @@ Napi::Value getGpuDeviceInfo(const Napi::CallbackInfo& info) {
 
     for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
         ggml_backend_dev_t device = ggml_backend_dev_get(i);
-        if (ggml_backend_dev_type(device) == GGML_BACKEND_DEVICE_TYPE_GPU) {
-
+        auto deviceType = ggml_backend_dev_type(device);
+        if (deviceType == GGML_BACKEND_DEVICE_TYPE_GPU || deviceType == GGML_BACKEND_DEVICE_TYPE_IGPU) {
             deviceNames.push_back(std::string(ggml_backend_dev_description(device)));
         }
     }

From 6ae52b0c7c2072e4df57313a75edf456d740743e Mon Sep 17 00:00:00 2001
From: Gilad S <giladgd@gmail.com>
Date: Sun, 19 Oct 2025 22:49:42 +0300
Subject: [PATCH 2/9] fix: adapt Llama chat wrappers to breaking `llama.cpp`
 changes

---
 src/chatWrappers/FunctionaryChatWrapper.ts    | 28 ++++----
 src/chatWrappers/Llama3ChatWrapper.ts         | 14 ++--
 src/chatWrappers/Llama3_1ChatWrapper.ts       |  8 +--
 .../Llama3_2LightweightChatWrapper.ts         | 12 ++--
 .../llama3.2/promptCompletion.test.ts         |  6 +-
 .../FunctionaryChatWrapper.test.ts            | 66 +++++++------------
 .../chatWrappers/Llama3ChatWrapper.test.ts    | 30 +++------
 .../chatWrappers/Llama3_1ChatWrapper.test.ts  | 33 ++++------
 .../utils/resolveChatWrapper.test.ts          | 16 ++---
 9 files changed, 85 insertions(+), 128 deletions(-)

diff --git a/src/chatWrappers/FunctionaryChatWrapper.ts b/src/chatWrappers/FunctionaryChatWrapper.ts
index 402939b1..8afac88f 100644
--- a/src/chatWrappers/FunctionaryChatWrapper.ts
+++ b/src/chatWrappers/FunctionaryChatWrapper.ts
@@ -39,13 +39,13 @@ export class FunctionaryChatWrapper extends ChatWrapper {
                         prefix: LlamaText([
                             new SpecialTokensText("<|start_header_id|>tool<|end_header_id|>\n\n")
                         ]),
-                        suffix: LlamaText(new SpecialToken("EOT"))
+                        suffix: LlamaText(new SpecialTokensText("<|eot_id|>"))
                     },
                     parallelism: {
                         call: {
                             sectionPrefix: "",
                             betweenCalls: "",
-                            sectionSuffix: LlamaText(new SpecialToken("EOT"))
+                            sectionSuffix: LlamaText(new SpecialTokensText("<|eot_id|>"))
                         },
                         result: {
                             sectionPrefix: "",
@@ -72,13 +72,13 @@ export class FunctionaryChatWrapper extends ChatWrapper {
                             "{{functionName}}",
                             new SpecialTokensText("\n")
                         ]),
-                        suffix: LlamaText(new SpecialToken("EOT"))
+                        suffix: LlamaText(new SpecialTokensText("<|eot_id|>"))
                     },
                     parallelism: {
                         call: {
                             sectionPrefix: "",
                             betweenCalls: "",
-                            sectionSuffix: LlamaText(new SpecialToken("EOT"))
+                            sectionSuffix: LlamaText(new SpecialTokensText("<|eot_id|>"))
                         },
                         result: {
                             sectionPrefix: "",
@@ -155,13 +155,13 @@ export class FunctionaryChatWrapper extends ChatWrapper {
                     return LlamaText([
                         new SpecialTokensText("<|start_header_id|>system<|end_header_id|>\n\n"),
                         LlamaText.fromJSON(item.text),
-                        new SpecialToken("EOT")
+                        new SpecialTokensText("<|eot_id|>")
                     ]);
                 } else if (item.type === "user") {
                     return LlamaText([
                         new SpecialTokensText("<|start_header_id|>user<|end_header_id|>\n\n"),
                         item.text,
-                        new SpecialToken("EOT")
+                        new SpecialTokensText("<|eot_id|>")
                     ]);
                 } else if (item.type === "model") {
                     if (isLastItem && item.response.length === 0)
@@ -178,7 +178,7 @@ export class FunctionaryChatWrapper extends ChatWrapper {
                             return;
 
                         res.push(LlamaText(pendingFunctionCalls));
-                        res.push(LlamaText(new SpecialToken("EOT")));
+                        res.push(LlamaText(new SpecialTokensText("<|eot_id|>")));
                         res.push(LlamaText(pendingFunctionResults));
 
                         pendingFunctionResults.length = 0;
@@ -206,7 +206,7 @@ export class FunctionaryChatWrapper extends ChatWrapper {
                                             response,
                                             (!isLastResponse || isLastItem)
                                                 ? LlamaText([])
-                                                : new SpecialToken("EOT")
+                                                : new SpecialTokensText("<|eot_id|>")
                                         ])
                                 ])
                             );
@@ -232,7 +232,7 @@ export class FunctionaryChatWrapper extends ChatWrapper {
                                     response.result === undefined
                                         ? "" // "void"
                                         : jsonDumps(response.result),
-                                    new SpecialToken("EOT")
+                                    new SpecialTokensText("<|eot_id|>")
                                 ])
                             );
                         } else
@@ -320,13 +320,13 @@ export class FunctionaryChatWrapper extends ChatWrapper {
                     return LlamaText([
                         new SpecialTokensText("<|start_header_id|>system<|end_header_id|>\n\n"),
                         LlamaText.fromJSON(item.text),
-                        new SpecialToken("EOT")
+                        new SpecialTokensText("<|eot_id|>")
                     ]);
                 } else if (item.type === "user") {
                     return LlamaText([
                         new SpecialTokensText("<|start_header_id|>user<|end_header_id|>\n\n"),
                         item.text,
-                        new SpecialToken("EOT")
+                        new SpecialTokensText("<|eot_id|>")
                     ]);
                 } else if (item.type === "model") {
                     if (isLastItem && item.response.length === 0)
@@ -343,7 +343,7 @@ export class FunctionaryChatWrapper extends ChatWrapper {
                             return;
 
                         res.push(LlamaText(pendingFunctionCalls));
-                        res.push(LlamaText(new SpecialToken("EOT")));
+                        res.push(LlamaText(new SpecialTokensText("<|eot_id|>")));
                         res.push(LlamaText(pendingFunctionResults));
 
                         pendingFunctionResults.length = 0;
@@ -365,7 +365,7 @@ export class FunctionaryChatWrapper extends ChatWrapper {
                                     response,
                                     (isLastItem && isLastResponse)
                                         ? LlamaText([])
-                                        : new SpecialToken("EOT")
+                                        : new SpecialTokensText("<|eot_id|>")
                                 ])
                             );
                         } else if (isChatModelResponseFunctionCall(response)) {
@@ -392,7 +392,7 @@ export class FunctionaryChatWrapper extends ChatWrapper {
                                     response.result === undefined
                                         ? "" // "void"
                                         : jsonDumps(response.result),
-                                    new SpecialToken("EOT")
+                                    new SpecialTokensText("<|eot_id|>")
                                 ])
                             );
                         } else
diff --git a/src/chatWrappers/Llama3ChatWrapper.ts b/src/chatWrappers/Llama3ChatWrapper.ts
index a095a4f8..070f1343 100644
--- a/src/chatWrappers/Llama3ChatWrapper.ts
+++ b/src/chatWrappers/Llama3ChatWrapper.ts
@@ -34,13 +34,13 @@ export class Llama3ChatWrapper extends ChatWrapper {
                     },
                     result: {
                         prefix: LlamaText(new SpecialTokensText("<|start_header_id|>function_call_result<|end_header_id|>\n\n")),
-                        suffix: LlamaText(new SpecialToken("EOT"))
+                        suffix: LlamaText(new SpecialTokensText("<|eot_id|>"))
                     },
                     parallelism: {
                         call: {
                             sectionPrefix: "",
                             betweenCalls: "\n",
-                            sectionSuffix: LlamaText(new SpecialToken("EOT"))
+                            sectionSuffix: LlamaText(new SpecialTokensText("<|eot_id|>"))
                         },
                         result: {
                             sectionPrefix: "",
@@ -62,11 +62,11 @@ export class Llama3ChatWrapper extends ChatWrapper {
                     },
                     result: {
                         prefix: LlamaText([
-                            LlamaText(new SpecialToken("EOT")),
+                            LlamaText(new SpecialTokensText("<|eot_id|>")),
                             new SpecialTokensText("<|start_header_id|>function_call_result<|end_header_id|>\n\n")
                         ]),
                         suffix: LlamaText([
-                            new SpecialToken("EOT"),
+                            new SpecialTokensText("<|eot_id|>"),
                             new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|>\n\n")
                         ])
                     }
@@ -147,7 +147,7 @@ export class Llama3ChatWrapper extends ChatWrapper {
                         LlamaText([
                             new SpecialTokensText("<|start_header_id|>system<|end_header_id|>\n\n"),
                             item.system,
-                            new SpecialToken("EOT")
+                            new SpecialTokensText("<|eot_id|>")
                         ])
                     );
                 }
@@ -157,7 +157,7 @@ export class Llama3ChatWrapper extends ChatWrapper {
                         LlamaText([
                             new SpecialTokensText("<|start_header_id|>user<|end_header_id|>\n\n"),
                             item.user,
-                            new SpecialToken("EOT")
+                            new SpecialTokensText("<|eot_id|>")
                         ])
                     );
                 }
@@ -169,7 +169,7 @@ export class Llama3ChatWrapper extends ChatWrapper {
                             item.model,
                             isLastItem
                                 ? LlamaText([])
-                                : new SpecialToken("EOT")
+                                : new SpecialTokensText("<|eot_id|>")
                         ])
                     );
                 }
diff --git a/src/chatWrappers/Llama3_1ChatWrapper.ts b/src/chatWrappers/Llama3_1ChatWrapper.ts
index 36290259..c2bdd3e5 100644
--- a/src/chatWrappers/Llama3_1ChatWrapper.ts
+++ b/src/chatWrappers/Llama3_1ChatWrapper.ts
@@ -29,7 +29,7 @@ export class Llama3_1ChatWrapper extends ChatWrapper {
             },
             result: {
                 prefix: LlamaText(new SpecialTokensText("\n<|start_header_id|>ipython<|end_header_id|>\n\n")),
-                suffix: LlamaText(new SpecialToken("EOT"), new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|>\n\n"))
+                suffix: LlamaText(new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"))
             }
         }
     };
@@ -189,7 +189,7 @@ export class Llama3_1ChatWrapper extends ChatWrapper {
                         LlamaText([
                             new SpecialTokensText("<|start_header_id|>system<|end_header_id|>\n\n"),
                             item.system,
-                            new SpecialToken("EOT")
+                            new SpecialTokensText("<|eot_id|>")
                         ])
                     );
                 }
@@ -199,7 +199,7 @@ export class Llama3_1ChatWrapper extends ChatWrapper {
                         LlamaText([
                             new SpecialTokensText("<|start_header_id|>user<|end_header_id|>\n\n"),
                             item.user,
-                            new SpecialToken("EOT")
+                            new SpecialTokensText("<|eot_id|>")
                         ])
                     );
                 }
@@ -211,7 +211,7 @@ export class Llama3_1ChatWrapper extends ChatWrapper {
                             item.model,
                             isLastItem
                                 ? LlamaText([])
-                                : new SpecialToken("EOT")
+                                : new SpecialTokensText("<|eot_id|>")
                         ])
                     );
                 }
diff --git a/src/chatWrappers/Llama3_2LightweightChatWrapper.ts b/src/chatWrappers/Llama3_2LightweightChatWrapper.ts
index a9f6bb89..e849b28d 100644
--- a/src/chatWrappers/Llama3_2LightweightChatWrapper.ts
+++ b/src/chatWrappers/Llama3_2LightweightChatWrapper.ts
@@ -24,12 +24,12 @@ export class Llama3_2LightweightChatWrapper extends ChatWrapper {
                 optionalPrefixSpace: true,
                 prefix: '{"name": "',
                 paramsPrefix: '", "parameters": ',
-                suffix: LlamaText("}", new SpecialToken("EOT")),
+                suffix: LlamaText("}", new SpecialTokensText("<|eot_id|>")),
                 emptyCallParamsPlaceholder: {}
             },
             result: {
-                prefix: LlamaText(new SpecialToken("EOT"), new SpecialTokensText("<|start_header_id|>ipython<|end_header_id|>\n\n")),
-                suffix: LlamaText(new SpecialToken("EOT"), new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|>\n\n"))
+                prefix: LlamaText(new SpecialTokensText("<|eot_id|><|start_header_id|>ipython<|end_header_id|>\n\n")),
+                suffix: LlamaText(new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"))
             }
         }
     };
@@ -192,7 +192,7 @@ export class Llama3_2LightweightChatWrapper extends ChatWrapper {
                         LlamaText([
                             new SpecialTokensText("<|start_header_id|>system<|end_header_id|>\n\n"),
                             item.system,
-                            new SpecialToken("EOT")
+                            new SpecialTokensText("<|eot_id|>")
                         ])
                     );
                 }
@@ -202,7 +202,7 @@ export class Llama3_2LightweightChatWrapper extends ChatWrapper {
                         LlamaText([
                             new SpecialTokensText("<|start_header_id|>user<|end_header_id|>\n\n"),
                             item.user,
-                            new SpecialToken("EOT")
+                            new SpecialTokensText("<|eot_id|>")
                         ])
                     );
                 }
@@ -214,7 +214,7 @@ export class Llama3_2LightweightChatWrapper extends ChatWrapper {
                             item.model,
                             isLastItem
                                 ? LlamaText([])
-                                : new SpecialToken("EOT")
+                                : new SpecialTokensText("<|eot_id|>")
                         ])
                     );
                 }
diff --git a/test/modelDependent/llama3.2/promptCompletion.test.ts b/test/modelDependent/llama3.2/promptCompletion.test.ts
index a0db42f2..8c76051d 100644
--- a/test/modelDependent/llama3.2/promptCompletion.test.ts
+++ b/test/modelDependent/llama3.2/promptCompletion.test.ts
@@ -58,7 +58,7 @@ describe("llama 3.2", () => {
 
               You are a helpful, respectful and honest assistant. Always answer as helpfully as possible.
               If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.",
-                new SpecialToken("EOT"),
+                new SpecialToken("EOS"),
                 new SpecialTokensText("<|start_header_id|>"),
                 "user",
                 new SpecialTokensText("<|end_header_id|>"),
@@ -86,14 +86,14 @@ describe("llama 3.2", () => {
 
               You are a helpful, respectful and honest assistant. Always answer as helpfully as possible.
               If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.",
-                new SpecialToken("EOT"),
+                new SpecialToken("EOS"),
                 new SpecialTokensText("<|start_header_id|>"),
                 "user",
                 new SpecialTokensText("<|end_header_id|>"),
                 "
 
               Hi there!",
-                new SpecialToken("EOT"),
+                new SpecialToken("EOS"),
                 new SpecialTokensText("<|start_header_id|>"),
                 "assistant",
                 new SpecialTokensText("<|end_header_id|>"),
diff --git a/test/standalone/chatWrappers/FunctionaryChatWrapper.test.ts b/test/standalone/chatWrappers/FunctionaryChatWrapper.test.ts
index 81c4b686..a330babe 100644
--- a/test/standalone/chatWrappers/FunctionaryChatWrapper.test.ts
+++ b/test/standalone/chatWrappers/FunctionaryChatWrapper.test.ts
@@ -199,23 +199,19 @@ describe("FunctionaryChatWrapper", () => {
               "),
                 "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible.
               If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.",
-                new SpecialToken("EOT"),
-                new SpecialTokensText("<|start_header_id|>user<|end_header_id|>
+                new SpecialTokensText("<|eot_id|><|start_header_id|>user<|end_header_id|>
 
               "),
                 "Hi there!",
-                new SpecialToken("EOT"),
-                new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|>
+                new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 
               "),
                 "Hello!",
-                new SpecialToken("EOT"),
-                new SpecialTokensText("<|start_header_id|>user<|end_header_id|>
+                new SpecialTokensText("<|eot_id|><|start_header_id|>user<|end_header_id|>
 
               "),
                 "How are you?",
-                new SpecialToken("EOT"),
-                new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|>
+                new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 
               "),
                 "I'm good, how are you?",
@@ -272,34 +268,28 @@ describe("FunctionaryChatWrapper", () => {
               }) => any;
 
               } // namespace functions",
-                new SpecialToken("EOT"),
-                new SpecialTokensText("<|start_header_id|>system<|end_header_id|>
+                new SpecialTokensText("<|eot_id|><|start_header_id|>system<|end_header_id|>
 
               "),
                 "The assistant calls functions with appropriate input when necessary. The assistant writes <|stop|> when finished answering.",
-                new SpecialToken("EOT"),
-                new SpecialTokensText("<|start_header_id|>system<|end_header_id|>
+                new SpecialTokensText("<|eot_id|><|start_header_id|>system<|end_header_id|>
 
               "),
                 "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible.
               If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.",
-                new SpecialToken("EOT"),
-                new SpecialTokensText("<|start_header_id|>user<|end_header_id|>
+                new SpecialTokensText("<|eot_id|><|start_header_id|>user<|end_header_id|>
 
               "),
                 "Hi there!",
-                new SpecialToken("EOT"),
-                new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|>
+                new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 
               "),
                 "Hello!",
-                new SpecialToken("EOT"),
-                new SpecialTokensText("<|start_header_id|>user<|end_header_id|>
+                new SpecialTokensText("<|eot_id|><|start_header_id|>user<|end_header_id|>
 
               "),
                 "Role a dice twice and tell me the total result",
-                new SpecialToken("EOT"),
-                new SpecialTokensText("<|reserved_special_token_249|>"),
+                new SpecialTokensText("<|eot_id|><|reserved_special_token_249|>"),
                 "getRandomNumber",
                 new SpecialTokensText("
               "),
@@ -309,24 +299,21 @@ describe("FunctionaryChatWrapper", () => {
                 new SpecialTokensText("
               "),
                 "{"min": 1, "max": 6}",
-                new SpecialToken("EOT"),
-                new SpecialTokensText("<|start_header_id|>tool<|end_header_id|>
+                new SpecialTokensText("<|eot_id|><|start_header_id|>tool<|end_header_id|>
 
               name="),
                 "getRandomNumber",
                 new SpecialTokensText("
               "),
                 "3",
-                new SpecialToken("EOT"),
-                new SpecialTokensText("<|start_header_id|>tool<|end_header_id|>
+                new SpecialTokensText("<|eot_id|><|start_header_id|>tool<|end_header_id|>
 
               name="),
                 "getRandomNumber",
                 new SpecialTokensText("
               "),
                 "4",
-                new SpecialToken("EOT"),
-                new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|>
+                new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 
               "),
                 "The total result of rolling the dice twice is 3 + 4 = 7.",
@@ -353,23 +340,19 @@ describe("FunctionaryChatWrapper", () => {
               "),
                 "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible.
               If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.",
-                new SpecialToken("EOT"),
-                new SpecialTokensText("<|start_header_id|>user<|end_header_id|>
+                new SpecialTokensText("<|eot_id|><|start_header_id|>user<|end_header_id|>
 
               "),
                 "Hi there!",
-                new SpecialToken("EOT"),
-                new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|>
+                new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 
               "),
                 "Hello!",
-                new SpecialToken("EOT"),
-                new SpecialTokensText("<|start_header_id|>user<|end_header_id|>
+                new SpecialTokensText("<|eot_id|><|start_header_id|>user<|end_header_id|>
 
               "),
                 "How are you?",
-                new SpecialToken("EOT"),
-                new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|>
+                new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 
               "),
                 "I'm good, how are you?",
@@ -384,28 +367,23 @@ describe("FunctionaryChatWrapper", () => {
               "),
                 "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible.
               If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.",
-                new SpecialToken("EOT"),
-                new SpecialTokensText("<|start_header_id|>user<|end_header_id|>
+                new SpecialTokensText("<|eot_id|><|start_header_id|>user<|end_header_id|>
 
               "),
                 "Hi there!",
-                new SpecialToken("EOT"),
-                new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|>
+                new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 
               "),
                 "Hello!",
-                new SpecialToken("EOT"),
-                new SpecialTokensText("<|start_header_id|>user<|end_header_id|>
+                new SpecialTokensText("<|eot_id|><|start_header_id|>user<|end_header_id|>
 
               "),
                 "How are you?",
-                new SpecialToken("EOT"),
-                new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|>
+                new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 
               "),
                 "I'm good, how are you?",
-                new SpecialToken("EOT"),
-                new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|>
+                new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 
               "),
               ])
diff --git a/test/standalone/chatWrappers/Llama3ChatWrapper.test.ts b/test/standalone/chatWrappers/Llama3ChatWrapper.test.ts
index 55cc5d24..d8c9dee2 100644
--- a/test/standalone/chatWrappers/Llama3ChatWrapper.test.ts
+++ b/test/standalone/chatWrappers/Llama3ChatWrapper.test.ts
@@ -43,13 +43,11 @@ describe("Llama3ChatWrapper", () => {
           "),
             "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible.
           If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.",
-            new SpecialToken("EOT"),
-            new SpecialTokensText("<|start_header_id|>user<|end_header_id|>
+            new SpecialTokensText("<|eot_id|><|start_header_id|>user<|end_header_id|>
 
           "),
             "Hi there!",
-            new SpecialToken("EOT"),
-            new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|>
+            new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 
           "),
             "Hello!",
@@ -67,23 +65,19 @@ describe("Llama3ChatWrapper", () => {
           "),
             "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible.
           If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.",
-            new SpecialToken("EOT"),
-            new SpecialTokensText("<|start_header_id|>user<|end_header_id|>
+            new SpecialTokensText("<|eot_id|><|start_header_id|>user<|end_header_id|>
 
           "),
             "Hi there!",
-            new SpecialToken("EOT"),
-            new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|>
+            new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 
           "),
             "Hello!",
-            new SpecialToken("EOT"),
-            new SpecialTokensText("<|start_header_id|>user<|end_header_id|>
+            new SpecialTokensText("<|eot_id|><|start_header_id|>user<|end_header_id|>
 
           "),
             "How are you?",
-            new SpecialToken("EOT"),
-            new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|>
+            new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 
           "),
             "I'm good, how are you?",
@@ -110,13 +104,11 @@ describe("Llama3ChatWrapper", () => {
           "),
             "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible.
           If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.",
-            new SpecialToken("EOT"),
-            new SpecialTokensText("<|start_header_id|>user<|end_header_id|>
+            new SpecialTokensText("<|eot_id|><|start_header_id|>user<|end_header_id|>
 
           "),
             "Hi there!",
-            new SpecialToken("EOT"),
-            new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|>
+            new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 
           "),
             "Hello!",
@@ -131,13 +123,11 @@ describe("Llama3ChatWrapper", () => {
           "),
             "You are a helpful, respectful and honest assistant. Always answer as helpfully as possible.
           If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.",
-            new SpecialToken("EOT"),
-            new SpecialTokensText("<|start_header_id|>user<|end_header_id|>
+            new SpecialTokensText("<|eot_id|><|start_header_id|>user<|end_header_id|>
 
           "),
             "Hi there!",
-            new SpecialToken("EOT"),
-            new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|>
+            new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 
           "),
             "Hello!
diff --git a/test/standalone/chatWrappers/Llama3_1ChatWrapper.test.ts b/test/standalone/chatWrappers/Llama3_1ChatWrapper.test.ts
index fe8530da..fab30cc9 100644
--- a/test/standalone/chatWrappers/Llama3_1ChatWrapper.test.ts
+++ b/test/standalone/chatWrappers/Llama3_1ChatWrapper.test.ts
@@ -75,13 +75,11 @@ describe("Llama3_1ChatWrapper", () => {
 
           You are a helpful, respectful and honest assistant. Always answer as helpfully as possible.
           If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.",
-            new SpecialToken("EOT"),
-            new SpecialTokensText("<|start_header_id|>user<|end_header_id|>
+            new SpecialTokensText("<|eot_id|><|start_header_id|>user<|end_header_id|>
 
           "),
             "Hi there!",
-            new SpecialToken("EOT"),
-            new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|>
+            new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 
           "),
             "Hello!",
@@ -143,23 +141,19 @@ describe("Llama3_1ChatWrapper", () => {
 
           You are a helpful, respectful and honest assistant. Always answer as helpfully as possible.
           If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.",
-            new SpecialToken("EOT"),
-            new SpecialTokensText("<|start_header_id|>user<|end_header_id|>
+            new SpecialTokensText("<|eot_id|><|start_header_id|>user<|end_header_id|>
 
           "),
             "Hi there!",
-            new SpecialToken("EOT"),
-            new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|>
+            new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 
           "),
             "Hello!",
-            new SpecialToken("EOT"),
-            new SpecialTokensText("<|start_header_id|>user<|end_header_id|>
+            new SpecialTokensText("<|eot_id|><|start_header_id|>user<|end_header_id|>
 
           "),
             "What is the time?",
-            new SpecialToken("EOT"),
-            new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|>
+            new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 
           <function="),
             "getTime",
@@ -170,8 +164,7 @@ describe("Llama3_1ChatWrapper", () => {
 
           "),
             ""22:00:00"",
-            new SpecialToken("EOT"),
-            new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|>
+            new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 
           "),
             "I'm good, how are you?",
@@ -206,13 +199,11 @@ describe("Llama3_1ChatWrapper", () => {
 
           You are a helpful, respectful and honest assistant. Always answer as helpfully as possible.
           If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.",
-            new SpecialToken("EOT"),
-            new SpecialTokensText("<|start_header_id|>user<|end_header_id|>
+            new SpecialTokensText("<|eot_id|><|start_header_id|>user<|end_header_id|>
 
           "),
             "Hi there!",
-            new SpecialToken("EOT"),
-            new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|>
+            new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 
           "),
             "Hello!",
@@ -235,13 +226,11 @@ describe("Llama3_1ChatWrapper", () => {
 
           You are a helpful, respectful and honest assistant. Always answer as helpfully as possible.
           If a question does not make any sense, or is not factually coherent, explain why instead of answering something incorrectly. If you don't know the answer to a question, don't share false information.",
-            new SpecialToken("EOT"),
-            new SpecialTokensText("<|start_header_id|>user<|end_header_id|>
+            new SpecialTokensText("<|eot_id|><|start_header_id|>user<|end_header_id|>
 
           "),
             "Hi there!",
-            new SpecialToken("EOT"),
-            new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|>
+            new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|>
 
           "),
             "Hello!
diff --git a/test/standalone/chatWrappers/utils/resolveChatWrapper.test.ts b/test/standalone/chatWrappers/utils/resolveChatWrapper.test.ts
index 745eb4a4..0523e7e2 100644
--- a/test/standalone/chatWrappers/utils/resolveChatWrapper.test.ts
+++ b/test/standalone/chatWrappers/utils/resolveChatWrapper.test.ts
@@ -63,8 +63,8 @@ const falconJinjaTemplate = `
 `.slice(1, -1);
 
 const funcationaryJinjaTemplateV2 = "{% for message in messages %}\n{% if message['role'] == 'user' or message['role'] == 'system' %}\n{{ '<|from|>' + message['role'] + '\n<|recipient|>all\n<|content|>' + message['content'] + '\n' }}{% elif message['role'] == 'tool' %}\n{{ '<|from|>' + message['name'] + '\n<|recipient|>all\n<|content|>' + message['content'] + '\n' }}{% else %}\n{% set contain_content='no'%}\n{% if message['content'] is not none %}\n{{ '<|from|>assistant\n<|recipient|>all\n<|content|>' + message['content'] }}{% set contain_content='yes'%}\n{% endif %}\n{% if 'tool_calls' in message and message['tool_calls'] is not none %}\n{% for tool_call in message['tool_calls'] %}\n{% set prompt='<|from|>assistant\n<|recipient|>' + tool_call['function']['name'] + '\n<|content|>' + tool_call['function']['arguments'] %}\n{% if loop.index == 1 and contain_content == \"no\" %}\n{{ prompt }}{% else %}\n{{ '\n' + prompt}}{% endif %}\n{% endfor %}\n{% endif %}\n{{ '<|stop|>\n' }}{% endif %}\n{% endfor %}\n{% if add_generation_prompt %}{{ '<|from|>assistant\n<|recipient|>' }}{% endif %}";
-const funcationaryJinjaTemplateV2Llama3 = "{% for message in messages %}\n{% if message['role'] == 'user' or message['role'] == 'system' %}\n{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + message['content'] + eot_token }}{% elif message['role'] == 'tool' %}\n{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + 'name=' + message['name'] + '\n' + message['content'] + eot_token }}{% else %}\n{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'}}{% if message['content'] is not none %}\n{{ message['content'] }}{% endif %}\n{% if 'tool_calls' in message and message['tool_calls'] is not none %}\n{% for tool_call in message['tool_calls'] %}\n{{ '<|reserved_special_token_249|>' + tool_call['function']['name'] + '\n' + tool_call['function']['arguments'] }}{% endfor %}\n{% endif %}\n{{ eot_token }}{% endif %}\n{% endfor %}\n{% if add_generation_prompt %}{{ '<|start_header_id|>{role}<|end_header_id|>\n\n' }}{% endif %}";
-const funcationaryJinjaTemplateV3 = "{% for message in messages %}\n{% if message['role'] == 'user' or message['role'] == 'system' %}\n{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + message['content'] + eot_token }}{% elif message['role'] == 'tool' %}\n{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + message['content'] + eot_token }}{% else %}\n{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'}}{% if message['content'] is not none %}\n{{ '>>>all\n' + message['content'] }}{% endif %}\n{% if 'tool_calls' in message and message['tool_calls'] is not none %}\n{% for tool_call in message['tool_calls'] %}\n{{ '>>>' + tool_call['function']['name'] + '\n' + tool_call['function']['arguments'] }}{% endfor %}\n{% endif %}\n{{ eot_token }}{% endif %}\n{% endfor %}\n{% if add_generation_prompt %}{{ '<|start_header_id|>{role}<|end_header_id|>\n\n' }}{% endif %}";
+const funcationaryJinjaTemplateV2Llama3 = "{% for message in messages %}\n{% if message['role'] == 'user' or message['role'] == 'system' %}\n{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + message['content'] + '<|eot_id|>' }}{% elif message['role'] == 'tool' %}\n{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + 'name=' + message['name'] + '\n' + message['content'] + '<|eot_id|>' }}{% else %}\n{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'}}{% if message['content'] is not none %}\n{{ message['content'] }}{% endif %}\n{% if 'tool_calls' in message and message['tool_calls'] is not none %}\n{% for tool_call in message['tool_calls'] %}\n{{ '<|reserved_special_token_249|>' + tool_call['function']['name'] + '\n' + tool_call['function']['arguments'] }}{% endfor %}\n{% endif %}\n{{ '<|eot_id|>' }}{% endif %}\n{% endfor %}\n{% if add_generation_prompt %}{{ '<|start_header_id|>{role}<|end_header_id|>\n\n' }}{% endif %}";
+const funcationaryJinjaTemplateV3 = "{% for message in messages %}\n{% if message['role'] == 'user' or message['role'] == 'system' %}\n{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + message['content'] + '<|eot_id|>' }}{% elif message['role'] == 'tool' %}\n{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n' + message['content'] + '<|eot_id|>' }}{% else %}\n{{ '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'}}{% if message['content'] is not none %}\n{{ '>>>all\n' + message['content'] }}{% endif %}\n{% if 'tool_calls' in message and message['tool_calls'] is not none %}\n{% for tool_call in message['tool_calls'] %}\n{{ '>>>' + tool_call['function']['name'] + '\n' + tool_call['function']['arguments'] }}{% endfor %}\n{% endif %}\n{{ '<|eot_id|>' }}{% endif %}\n{% endfor %}\n{% if add_generation_prompt %}{{ '<|start_header_id|>{role}<|end_header_id|>\n\n' }}{% endif %}";
 
 const gemmaJinjaTemplate = `
 {%- if messages[0]['role'] == 'system' %}
@@ -125,7 +125,7 @@ const llama2ChatJinjaTemplate = `
 const llama3ChatJinjaTemplate = `
 {%- set loop_messages = messages -%}
 {%- for message in loop_messages -%}
-    {%- set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + eot_token -%}
+    {%- set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' -%}
     {%- if loop.index0 == 0 -%}
         {%- set content = bos_token + content -%}
     {%- endif -%}
@@ -177,7 +177,7 @@ const llama3_1ChatJinjaTemplate = `
     {%- endfor %}
 {%- endif %}
 {{- system_message }}
-{{- eot_token }}
+{{- '<|eot_id|>' }}
 
 {#- Custom tools are passed in a user message with some extra guidance #}
 {%- if tools_in_user_message and not tools is none %}
@@ -197,12 +197,12 @@ const llama3_1ChatJinjaTemplate = `
         {{- t | tojson(indent=4) }}
         {{- "\\n\\n" }}
     {%- endfor %}
-    {{- first_user_message + eot_token}}
+    {{- first_user_message + '<|eot_id|>'}}
 {%- endif %}
 
 {%- for message in messages %}
     {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}
-        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + eot_token }}
+        {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}
     {%- elif 'tool_calls' in message %}
         {%- if not message.tool_calls|length == 1 %}
             {{- raise_exception("This model only supports single tool-calls at once!") }}
@@ -229,7 +229,7 @@ const llama3_1ChatJinjaTemplate = `
             {#- This means we're in ipython mode #}
             {{- "<|eom_id|>" }}
         {%- else %}
-            {{- eot_token }}
+            {{- '<|eot_id|>' }}
         {%- endif %}
     {%- elif message.role == "tool" or message.role == "ipython" %}
         {{- "<|start_header_id|>ipython<|end_header_id|>\\n\\n" }}
@@ -238,7 +238,7 @@ const llama3_1ChatJinjaTemplate = `
         {%- else %}
             {{- message.content }}
         {%- endif %}
-        {{- eot_token }}
+        {{- '<|eot_id|>' }}
     {%- endif %}
 {%- endfor %}
 {%- if add_generation_prompt %}

From e2794652118e2c166d12f4d6c70d5f11b6809c4c Mon Sep 17 00:00:00 2001
From: Gilad S <giladgd@gmail.com>
Date: Sun, 19 Oct 2025 22:50:06 +0300
Subject: [PATCH 3/9] fix: internal log level

---
 src/bindings/Llama.ts | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/bindings/Llama.ts b/src/bindings/Llama.ts
index cb5f042c..e65060a5 100644
--- a/src/bindings/Llama.ts
+++ b/src/bindings/Llama.ts
@@ -684,6 +684,8 @@ function getTransformedLogLevel(level: LlamaLogLevel, message: string, gpu: Buil
         return LlamaLogLevel.info;
     else if (level === LlamaLogLevel.warn && message.startsWith("load: special_eog_ids contains both '<|return|>' and '<|call|>' tokens, removing '<|end|>' token from EOG list"))
         return LlamaLogLevel.info;
+    else if (level === LlamaLogLevel.warn && message.startsWith("llama_init_from_model: model default pooling_type is [0], but [-1] was specified"))
+        return LlamaLogLevel.info;
     else if (gpu === false && level === LlamaLogLevel.warn && message.startsWith("llama_adapter_lora_init_impl: lora for '") && message.endsWith("' cannot use buft 'CPU_REPACK', fallback to CPU"))
         return LlamaLogLevel.info;
 

From 76aea27aca02818a24d52559272905c134682848 Mon Sep 17 00:00:00 2001
From: Gilad S <giladgd@gmail.com>
Date: Sun, 19 Oct 2025 22:52:40 +0300
Subject: [PATCH 4/9] docs(Vulkan): recommend installing LLVM on Windows

---
 docs/guide/Vulkan.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/docs/guide/Vulkan.md b/docs/guide/Vulkan.md
index 37e65573..0a089c6c 100644
--- a/docs/guide/Vulkan.md
+++ b/docs/guide/Vulkan.md
@@ -65,6 +65,11 @@ If you see `Vulkan used VRAM` in the output, it means that Vulkan support is wor
   reg add "HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Control\FileSystem" /v "LongPathsEnabled" /t REG_DWORD /d "1" /f  
   ```
   :::
+* :::details Windows only: LLVM (optional, recommended if you have build issues)
+  There are a few methods to install LLVM:
+  * **As part of Microsoft Visual C++ Build Tools (Recommended):** the dependencies for Window listed under [Downloading a Release](./building-from-source.md#downloading-a-release) will also install LLVM.
+  * **Independently:** visit the [latest LLVM release page](https://github.com/llvm/llvm-project/releases/latest) and download an installer for your Windows architecture.
+  :::
 
 ### Building From Source
 When you use the [`getLlama`](../api/functions/getLlama) method, if there's no binary that matches the provided options, it'll automatically build `llama.cpp` from source.

From 8b414c136f4fceb2ffd09783326a5fa77f243711 Mon Sep 17 00:00:00 2001
From: Gilad S <giladgd@gmail.com>
Date: Mon, 20 Oct 2025 19:36:28 +0300
Subject: [PATCH 5/9] fix(Vulkan): deduplicate the same device coming from
 different drivers

---
 llama/gpuInfo/vulkan-gpu-info.cpp | 95 ++++++++++++++++++++++++++++++-
 1 file changed, 93 insertions(+), 2 deletions(-)

diff --git a/llama/gpuInfo/vulkan-gpu-info.cpp b/llama/gpuInfo/vulkan-gpu-info.cpp
index 72beb2b9..7fd4f43d 100644
--- a/llama/gpuInfo/vulkan-gpu-info.cpp
+++ b/llama/gpuInfo/vulkan-gpu-info.cpp
@@ -1,16 +1,107 @@
 #include <stddef.h>
+#include <map>
 #include <vector>
 
 #include <vulkan/vulkan.hpp>
 
+constexpr uint32_t VK_VENDOR_ID_AMD = 0x1002;
+constexpr uint32_t VK_VENDOR_ID_APPLE = 0x106b;
+constexpr uint32_t VK_VENDOR_ID_INTEL = 0x8086;
+constexpr uint32_t VK_VENDOR_ID_NVIDIA = 0x10de;
+
 typedef void (*gpuInfoVulkanWarningLogCallback_t)(const char* message);
 
-static bool enumerateVulkanDevices(size_t* total, size_t* used, size_t* unifiedMemorySize, bool addDeviceNames, std::vector<std::string> * deviceNames, gpuInfoVulkanWarningLogCallback_t warningLogCallback, bool * checkSupported) {
+static vk::Instance vulkanInstance() {
     vk::ApplicationInfo appInfo("node-llama-cpp GPU info", 1, "llama.cpp", 1, VK_API_VERSION_1_2);
     vk::InstanceCreateInfo createInfo(vk::InstanceCreateFlags(), &appInfo, {}, {});
-    vk::Instance instance = vk::createInstance(createInfo);
+    return vk::createInstance(createInfo);
+}
 
+static std::vector<vk::PhysicalDevice> dedupedDevices() {
+    vk::Instance instance = vulkanInstance();
     auto physicalDevices = instance.enumeratePhysicalDevices();
+    std::vector<vk::PhysicalDevice> dedupedDevices;
+    dedupedDevices.reserve(physicalDevices.size());
+
+    // adapted from `ggml_vk_instance_init` in `ggml-vulkan.cpp`
+    for (const auto& device : physicalDevices) {
+        vk::PhysicalDeviceProperties2 newProps;
+        vk::PhysicalDeviceDriverProperties newDriver;
+        vk::PhysicalDeviceIDProperties newId;
+        newProps.pNext = &newDriver;
+        newDriver.pNext = &newId;
+        device.getProperties2(&newProps);
+
+        auto oldDevice = std::find_if(
+            dedupedDevices.begin(),
+            dedupedDevices.end(),
+            [&newId](const vk::PhysicalDevice& oldDevice) {
+                vk::PhysicalDeviceProperties2 oldProps;
+                vk::PhysicalDeviceDriverProperties oldDriver;
+                vk::PhysicalDeviceIDProperties oldId;
+                oldProps.pNext = &oldDriver;
+                oldDriver.pNext = &oldId;
+                oldDevice.getProperties2(&oldProps);
+
+                bool equals = std::equal(std::begin(oldId.deviceUUID), std::end(oldId.deviceUUID), std::begin(newId.deviceUUID));
+                equals |= oldId.deviceLUIDValid && newId.deviceLUIDValid &&
+                    std::equal(std::begin(oldId.deviceLUID), std::end(oldId.deviceLUID), std::begin(newId.deviceLUID));
+
+                return equals;
+            }
+        );
+
+        if (oldDevice == dedupedDevices.end()) {
+            dedupedDevices.push_back(device);
+            continue;
+        }
+
+        vk::PhysicalDeviceProperties2 oldProps;
+        vk::PhysicalDeviceDriverProperties oldDriver;
+        oldProps.pNext = &oldDriver;
+        oldDevice->getProperties2(&oldProps);
+
+        std::map<vk::DriverId, int> driverPriorities {};
+        int oldPriority = std::numeric_limits<int>::max();
+        int newPriority = std::numeric_limits<int>::max();
+
+        switch (oldProps.properties.vendorID) {
+            case VK_VENDOR_ID_AMD:
+                driverPriorities[vk::DriverId::eMesaRadv] = 1;
+                driverPriorities[vk::DriverId::eAmdOpenSource] = 2;
+                driverPriorities[vk::DriverId::eAmdProprietary] = 3;
+                break;
+            case VK_VENDOR_ID_INTEL:
+                driverPriorities[vk::DriverId::eIntelOpenSourceMESA] = 1;
+                driverPriorities[vk::DriverId::eIntelProprietaryWindows] = 2;
+                break;
+            case VK_VENDOR_ID_NVIDIA:
+                driverPriorities[vk::DriverId::eNvidiaProprietary] = 1;
+#if defined(VK_API_VERSION_1_3) && VK_HEADER_VERSION >= 235
+                driverPriorities[vk::DriverId::eMesaNvk] = 2;
+#endif
+                break;
+        }
+        driverPriorities[vk::DriverId::eMesaDozen] = 4;
+
+        if (driverPriorities.count(oldDriver.driverID)) {
+            oldPriority = driverPriorities[oldDriver.driverID];
+        }
+        if (driverPriorities.count(newDriver.driverID)) {
+            newPriority = driverPriorities[newDriver.driverID];
+        }
+
+        if (newPriority < oldPriority) {
+            dedupedDevices.erase(std::remove(dedupedDevices.begin(), dedupedDevices.end(), *oldDevice), dedupedDevices.end());
+            dedupedDevices.push_back(device);
+        }
+    }
+
+    return dedupedDevices;
+}
+
+static bool enumerateVulkanDevices(size_t* total, size_t* used, size_t* unifiedMemorySize, bool addDeviceNames, std::vector<std::string> * deviceNames, gpuInfoVulkanWarningLogCallback_t warningLogCallback, bool * checkSupported) {
+    auto physicalDevices = dedupedDevices();
 
     size_t usedMem = 0;
     size_t totalMem = 0;

From 1b4048ff3cf13f97ddbdf1e1dd871dc131148376 Mon Sep 17 00:00:00 2001
From: Gilad S <giladgd@gmail.com>
Date: Mon, 20 Oct 2025 19:37:39 +0300
Subject: [PATCH 6/9] fix: more explicit type

---
 llama/gpuInfo/vulkan-gpu-info.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llama/gpuInfo/vulkan-gpu-info.cpp b/llama/gpuInfo/vulkan-gpu-info.cpp
index 7fd4f43d..54657e71 100644
--- a/llama/gpuInfo/vulkan-gpu-info.cpp
+++ b/llama/gpuInfo/vulkan-gpu-info.cpp
@@ -4,10 +4,10 @@
 
 #include <vulkan/vulkan.hpp>
 
-constexpr uint32_t VK_VENDOR_ID_AMD = 0x1002;
-constexpr uint32_t VK_VENDOR_ID_APPLE = 0x106b;
-constexpr uint32_t VK_VENDOR_ID_INTEL = 0x8086;
-constexpr uint32_t VK_VENDOR_ID_NVIDIA = 0x10de;
+constexpr std::uint32_t VK_VENDOR_ID_AMD = 0x1002;
+constexpr std::uint32_t VK_VENDOR_ID_APPLE = 0x106b;
+constexpr std::uint32_t VK_VENDOR_ID_INTEL = 0x8086;
+constexpr std::uint32_t VK_VENDOR_ID_NVIDIA = 0x10de;
 
 typedef void (*gpuInfoVulkanWarningLogCallback_t)(const char* message);
 

From 41eb5103f9208012ed8cc956890f534a53905509 Mon Sep 17 00:00:00 2001
From: Gilad S <giladgd@gmail.com>
Date: Mon, 20 Oct 2025 20:09:35 +0300
Subject: [PATCH 7/9] fix(Vulkan): build

---
 llama/gpuInfo/vulkan-gpu-info.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llama/gpuInfo/vulkan-gpu-info.cpp b/llama/gpuInfo/vulkan-gpu-info.cpp
index 54657e71..f2981b56 100644
--- a/llama/gpuInfo/vulkan-gpu-info.cpp
+++ b/llama/gpuInfo/vulkan-gpu-info.cpp
@@ -62,8 +62,8 @@ static std::vector<vk::PhysicalDevice> dedupedDevices() {
         oldDevice->getProperties2(&oldProps);
 
         std::map<vk::DriverId, int> driverPriorities {};
-        int oldPriority = std::numeric_limits<int>::max();
-        int newPriority = std::numeric_limits<int>::max();
+        int oldPriority = 1000;
+        int newPriority = 1000;
 
         switch (oldProps.properties.vendorID) {
             case VK_VENDOR_ID_AMD:

From 794953b1959addf5b4823c13f007f666685dc68b Mon Sep 17 00:00:00 2001
From: Gilad S <giladgd@gmail.com>
Date: Sun, 26 Oct 2025 16:36:42 +0200
Subject: [PATCH 8/9] fix(Vulkan): device deduplication logic

---
 llama/gpuInfo/vulkan-gpu-info.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/llama/gpuInfo/vulkan-gpu-info.cpp b/llama/gpuInfo/vulkan-gpu-info.cpp
index f2981b56..1cb00d78 100644
--- a/llama/gpuInfo/vulkan-gpu-info.cpp
+++ b/llama/gpuInfo/vulkan-gpu-info.cpp
@@ -44,8 +44,10 @@ static std::vector<vk::PhysicalDevice> dedupedDevices() {
                 oldDevice.getProperties2(&oldProps);
 
                 bool equals = std::equal(std::begin(oldId.deviceUUID), std::end(oldId.deviceUUID), std::begin(newId.deviceUUID));
-                equals |= oldId.deviceLUIDValid && newId.deviceLUIDValid &&
-                    std::equal(std::begin(oldId.deviceLUID), std::end(oldId.deviceLUID), std::begin(newId.deviceLUID));
+                equals = equals || (
+                    oldId.deviceLUIDValid && newId.deviceLUIDValid &&
+                    std::equal(std::begin(oldId.deviceLUID), std::end(oldId.deviceLUID), std::begin(newId.deviceLUID))
+                );
 
                 return equals;
             }

From d912d236557a4ce8772570fb9dec3d41a82557b0 Mon Sep 17 00:00:00 2001
From: Gilad S <giladgd@gmail.com>
Date: Sun, 26 Oct 2025 16:42:52 +0200
Subject: [PATCH 9/9] docs: typo

---
 docs/guide/Vulkan.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/guide/Vulkan.md b/docs/guide/Vulkan.md
index 0a089c6c..cb66ab3d 100644
--- a/docs/guide/Vulkan.md
+++ b/docs/guide/Vulkan.md
@@ -68,7 +68,7 @@ If you see `Vulkan used VRAM` in the output, it means that Vulkan support is wor
 * :::details Windows only: LLVM (optional, recommended if you have build issues)
   There are a few methods to install LLVM:
   * **As part of Microsoft Visual C++ Build Tools (Recommended):** the dependencies for Window listed under [Downloading a Release](./building-from-source.md#downloading-a-release) will also install LLVM.
-  * **Independently:** visit the [latest LLVM release page](https://github.com/llvm/llvm-project/releases/latest) and download an installer for your Windows architecture.
+  * **Independently:** visit the [latest LLVM release page](https://github.com/llvm/llvm-project/releases/latest) and download the installer for your Windows architecture.
   :::
 
 ### Building From Source