withcatai · giladgd · Oct 26, 2025 · Oct 19, 2025 · Oct 19, 2025 · Oct 19, 2025
diff --git a/docs/guide/Vulkan.md b/docs/guide/Vulkan.md
@@ -65,6 +65,11 @@ If you see `Vulkan used VRAM` in the output, it means that Vulkan support is wor
   reg add "HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Control\FileSystem" /v "LongPathsEnabled" /t REG_DWORD /d "1" /f  
   ```
   :::
+* :::details Windows only: LLVM (optional, recommended if you have build issues)
+  There are a few methods to install LLVM:
+  * **As part of Microsoft Visual C++ Build Tools (Recommended):** the dependencies for Window listed under [Downloading a Release](./building-from-source.md#downloading-a-release) will also install LLVM.
+  * **Independently:** visit the [latest LLVM release page](https://github.com/llvm/llvm-project/releases/latest) and download the installer for your Windows architecture.
+  :::
 
 ### Building From Source
 When you use the [`getLlama`](../api/functions/getLlama) method, if there's no binary that matches the provided options, it'll automatically build `llama.cpp` from source.

diff --git a/docs/guide/embedding.md b/docs/guide/embedding.md
@@ -204,7 +204,7 @@ import DataBadge from "../../.vitepress/components/DataBadge/DataBadge.vue";
 </script>
 
 #### Embedded databases {#databases-embedded}
-* **[LanceDB](https://lancedb.com/)** ([GitHub](https://github.com/lancedb/lancedb) | [npm](https://www.npmjs.com/package/@lancedb/lancedb) | [Quick start](https://lancedb.github.io/lancedb/basic/#__tabbed_1_2)) - Serverless vector database you can embed inside your application. No server required.
+* **[LanceDB](https://lancedb.com/)** ([GitHub](https://github.com/lancedb/lancedb) | [npm](https://www.npmjs.com/package/@lancedb/lancedb) | [Quick start](https://www.npmjs.com/package/@lancedb/lancedb#usage)) - Serverless vector database you can embed inside your application. No server required.
   <br/><DataBadge title="Written in" content="Rust"/><DataBadge title="License" content="Apache-2.0"/>
 
 * **Vectra** ([GitHub](https://github.com/Stevenic/vectra) | [npm](https://www.npmjs.com/package/vectra)) - local vector database using local files

diff --git a/llama/addon/globals/getGpuInfo.cpp b/llama/addon/globals/getGpuInfo.cpp
@@ -27,7 +27,8 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {
 
     for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
         device = ggml_backend_dev_get(i);
-        if (ggml_backend_dev_type(device) == GGML_BACKEND_DEVICE_TYPE_GPU) {
+        auto deviceType = ggml_backend_dev_type(device);
+        if (deviceType == GGML_BACKEND_DEVICE_TYPE_GPU || deviceType == GGML_BACKEND_DEVICE_TYPE_IGPU) {
             deviceTotal = 0;
             deviceFree = 0;
             ggml_backend_dev_memory(device, &deviceFree, &deviceTotal);
@@ -76,8 +77,8 @@ Napi::Value getGpuDeviceInfo(const Napi::CallbackInfo& info) {
 
     for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
         ggml_backend_dev_t device = ggml_backend_dev_get(i);
-        if (ggml_backend_dev_type(device) == GGML_BACKEND_DEVICE_TYPE_GPU) {
-
+        auto deviceType = ggml_backend_dev_type(device);
+        if (deviceType == GGML_BACKEND_DEVICE_TYPE_GPU || deviceType == GGML_BACKEND_DEVICE_TYPE_IGPU) {
             deviceNames.push_back(std::string(ggml_backend_dev_description(device)));
         }
     }

diff --git a/llama/gpuInfo/vulkan-gpu-info.cpp b/llama/gpuInfo/vulkan-gpu-info.cpp
@@ -1,16 +1,109 @@
 #include <stddef.h>
+#include <map>
 #include <vector>
 
 #include <vulkan/vulkan.hpp>
 
+constexpr std::uint32_t VK_VENDOR_ID_AMD = 0x1002;
+constexpr std::uint32_t VK_VENDOR_ID_APPLE = 0x106b;
+constexpr std::uint32_t VK_VENDOR_ID_INTEL = 0x8086;
+constexpr std::uint32_t VK_VENDOR_ID_NVIDIA = 0x10de;
+
 typedef void (*gpuInfoVulkanWarningLogCallback_t)(const char* message);
 
-static bool enumerateVulkanDevices(size_t* total, size_t* used, size_t* unifiedMemorySize, bool addDeviceNames, std::vector<std::string> * deviceNames, gpuInfoVulkanWarningLogCallback_t warningLogCallback, bool * checkSupported) {
+static vk::Instance vulkanInstance() {
     vk::ApplicationInfo appInfo("node-llama-cpp GPU info", 1, "llama.cpp", 1, VK_API_VERSION_1_2);
     vk::InstanceCreateInfo createInfo(vk::InstanceCreateFlags(), &appInfo, {}, {});
-    vk::Instance instance = vk::createInstance(createInfo);
+    return vk::createInstance(createInfo);
+}
 
+static std::vector<vk::PhysicalDevice> dedupedDevices() {
+    vk::Instance instance = vulkanInstance();
     auto physicalDevices = instance.enumeratePhysicalDevices();
+    std::vector<vk::PhysicalDevice> dedupedDevices;
+    dedupedDevices.reserve(physicalDevices.size());
+
+    // adapted from `ggml_vk_instance_init` in `ggml-vulkan.cpp`
+    for (const auto& device : physicalDevices) {
+        vk::PhysicalDeviceProperties2 newProps;
+        vk::PhysicalDeviceDriverProperties newDriver;
+        vk::PhysicalDeviceIDProperties newId;
+        newProps.pNext = &newDriver;
+        newDriver.pNext = &newId;
+        device.getProperties2(&newProps);
+
+        auto oldDevice = std::find_if(
+            dedupedDevices.begin(),
+            dedupedDevices.end(),
+            [&newId](const vk::PhysicalDevice& oldDevice) {
+                vk::PhysicalDeviceProperties2 oldProps;
+                vk::PhysicalDeviceDriverProperties oldDriver;
+                vk::PhysicalDeviceIDProperties oldId;
+                oldProps.pNext = &oldDriver;
+                oldDriver.pNext = &oldId;
+                oldDevice.getProperties2(&oldProps);
+
+                bool equals = std::equal(std::begin(oldId.deviceUUID), std::end(oldId.deviceUUID), std::begin(newId.deviceUUID));
+                equals = equals || (
+                    oldId.deviceLUIDValid && newId.deviceLUIDValid &&
+                    std::equal(std::begin(oldId.deviceLUID), std::end(oldId.deviceLUID), std::begin(newId.deviceLUID))
+                );
+
+                return equals;
+            }
+        );
+
+        if (oldDevice == dedupedDevices.end()) {
+            dedupedDevices.push_back(device);
+            continue;
+        }
+
+        vk::PhysicalDeviceProperties2 oldProps;
+        vk::PhysicalDeviceDriverProperties oldDriver;
+        oldProps.pNext = &oldDriver;
+        oldDevice->getProperties2(&oldProps);
+
+        std::map<vk::DriverId, int> driverPriorities {};
+        int oldPriority = 1000;
+        int newPriority = 1000;
+
+        switch (oldProps.properties.vendorID) {
+            case VK_VENDOR_ID_AMD:
+                driverPriorities[vk::DriverId::eMesaRadv] = 1;
+                driverPriorities[vk::DriverId::eAmdOpenSource] = 2;
+                driverPriorities[vk::DriverId::eAmdProprietary] = 3;
+                break;
+            case VK_VENDOR_ID_INTEL:
+                driverPriorities[vk::DriverId::eIntelOpenSourceMESA] = 1;
+                driverPriorities[vk::DriverId::eIntelProprietaryWindows] = 2;
+                break;
+            case VK_VENDOR_ID_NVIDIA:
+                driverPriorities[vk::DriverId::eNvidiaProprietary] = 1;
+#if defined(VK_API_VERSION_1_3) && VK_HEADER_VERSION >= 235
+                driverPriorities[vk::DriverId::eMesaNvk] = 2;
+#endif
+                break;
+        }
+        driverPriorities[vk::DriverId::eMesaDozen] = 4;
+
+        if (driverPriorities.count(oldDriver.driverID)) {
+            oldPriority = driverPriorities[oldDriver.driverID];
+        }
+        if (driverPriorities.count(newDriver.driverID)) {
+            newPriority = driverPriorities[newDriver.driverID];
+        }
+
+        if (newPriority < oldPriority) {
+            dedupedDevices.erase(std::remove(dedupedDevices.begin(), dedupedDevices.end(), *oldDevice), dedupedDevices.end());
+            dedupedDevices.push_back(device);
+        }
+    }
+
+    return dedupedDevices;
+}
+
+static bool enumerateVulkanDevices(size_t* total, size_t* used, size_t* unifiedMemorySize, bool addDeviceNames, std::vector<std::string> * deviceNames, gpuInfoVulkanWarningLogCallback_t warningLogCallback, bool * checkSupported) {
+    auto physicalDevices = dedupedDevices();
 
     size_t usedMem = 0;
     size_t totalMem = 0;

diff --git a/src/bindings/Llama.ts b/src/bindings/Llama.ts
@@ -684,6 +684,8 @@ function getTransformedLogLevel(level: LlamaLogLevel, message: string, gpu: Buil
         return LlamaLogLevel.info;
     else if (level === LlamaLogLevel.warn && message.startsWith("load: special_eog_ids contains both '<|return|>' and '<|call|>' tokens, removing '<|end|>' token from EOG list"))
         return LlamaLogLevel.info;
+    else if (level === LlamaLogLevel.warn && message.startsWith("llama_init_from_model: model default pooling_type is [0], but [-1] was specified"))
+        return LlamaLogLevel.info;
     else if (gpu === false && level === LlamaLogLevel.warn && message.startsWith("llama_adapter_lora_init_impl: lora for '") && message.endsWith("' cannot use buft 'CPU_REPACK', fallback to CPU"))
         return LlamaLogLevel.info;
 

diff --git a/src/chatWrappers/FunctionaryChatWrapper.ts b/src/chatWrappers/FunctionaryChatWrapper.ts
@@ -39,13 +39,13 @@ export class FunctionaryChatWrapper extends ChatWrapper {
                         prefix: LlamaText([
                             new SpecialTokensText("<|start_header_id|>tool<|end_header_id|>\n\n")
                         ]),
-                        suffix: LlamaText(new SpecialToken("EOT"))
+                        suffix: LlamaText(new SpecialTokensText("<|eot_id|>"))
                     },
                     parallelism: {
                         call: {
                             sectionPrefix: "",
                             betweenCalls: "",
-                            sectionSuffix: LlamaText(new SpecialToken("EOT"))
+                            sectionSuffix: LlamaText(new SpecialTokensText("<|eot_id|>"))
                         },
                         result: {
                             sectionPrefix: "",
@@ -72,13 +72,13 @@ export class FunctionaryChatWrapper extends ChatWrapper {
                             "{{functionName}}",
                             new SpecialTokensText("\n")
                         ]),
-                        suffix: LlamaText(new SpecialToken("EOT"))
+                        suffix: LlamaText(new SpecialTokensText("<|eot_id|>"))
                     },
                     parallelism: {
                         call: {
                             sectionPrefix: "",
                             betweenCalls: "",
-                            sectionSuffix: LlamaText(new SpecialToken("EOT"))
+                            sectionSuffix: LlamaText(new SpecialTokensText("<|eot_id|>"))
                         },
                         result: {
                             sectionPrefix: "",
@@ -155,13 +155,13 @@ export class FunctionaryChatWrapper extends ChatWrapper {
                     return LlamaText([
                         new SpecialTokensText("<|start_header_id|>system<|end_header_id|>\n\n"),
                         LlamaText.fromJSON(item.text),
-                        new SpecialToken("EOT")
+                        new SpecialTokensText("<|eot_id|>")
                     ]);
                 } else if (item.type === "user") {
                     return LlamaText([
                         new SpecialTokensText("<|start_header_id|>user<|end_header_id|>\n\n"),
                         item.text,
-                        new SpecialToken("EOT")
+                        new SpecialTokensText("<|eot_id|>")
                     ]);
                 } else if (item.type === "model") {
                     if (isLastItem && item.response.length === 0)
@@ -178,7 +178,7 @@ export class FunctionaryChatWrapper extends ChatWrapper {
                             return;
 
                         res.push(LlamaText(pendingFunctionCalls));
-                        res.push(LlamaText(new SpecialToken("EOT")));
+                        res.push(LlamaText(new SpecialTokensText("<|eot_id|>")));
                         res.push(LlamaText(pendingFunctionResults));
 
                         pendingFunctionResults.length = 0;
@@ -206,7 +206,7 @@ export class FunctionaryChatWrapper extends ChatWrapper {
                                             response,
                                             (!isLastResponse || isLastItem)
                                                 ? LlamaText([])
-                                                : new SpecialToken("EOT")
+                                                : new SpecialTokensText("<|eot_id|>")
                                         ])
                                 ])
                             );
@@ -232,7 +232,7 @@ export class FunctionaryChatWrapper extends ChatWrapper {
                                     response.result === undefined
                                         ? "" // "void"
                                         : jsonDumps(response.result),
-                                    new SpecialToken("EOT")
+                                    new SpecialTokensText("<|eot_id|>")
                                 ])
                             );
                         } else
@@ -320,13 +320,13 @@ export class FunctionaryChatWrapper extends ChatWrapper {
                     return LlamaText([
                         new SpecialTokensText("<|start_header_id|>system<|end_header_id|>\n\n"),
                         LlamaText.fromJSON(item.text),
-                        new SpecialToken("EOT")
+                        new SpecialTokensText("<|eot_id|>")
                     ]);
                 } else if (item.type === "user") {
                     return LlamaText([
                         new SpecialTokensText("<|start_header_id|>user<|end_header_id|>\n\n"),
                         item.text,
-                        new SpecialToken("EOT")
+                        new SpecialTokensText("<|eot_id|>")
                     ]);
                 } else if (item.type === "model") {
                     if (isLastItem && item.response.length === 0)
@@ -343,7 +343,7 @@ export class FunctionaryChatWrapper extends ChatWrapper {
                             return;
 
                         res.push(LlamaText(pendingFunctionCalls));
-                        res.push(LlamaText(new SpecialToken("EOT")));
+                        res.push(LlamaText(new SpecialTokensText("<|eot_id|>")));
                         res.push(LlamaText(pendingFunctionResults));
 
                         pendingFunctionResults.length = 0;
@@ -365,7 +365,7 @@ export class FunctionaryChatWrapper extends ChatWrapper {
                                     response,
                                     (isLastItem && isLastResponse)
                                         ? LlamaText([])
-                                        : new SpecialToken("EOT")
+                                        : new SpecialTokensText("<|eot_id|>")
                                 ])
                             );
                         } else if (isChatModelResponseFunctionCall(response)) {
@@ -392,7 +392,7 @@ export class FunctionaryChatWrapper extends ChatWrapper {
                                     response.result === undefined
                                         ? "" // "void"
                                         : jsonDumps(response.result),
-                                    new SpecialToken("EOT")
+                                    new SpecialTokensText("<|eot_id|>")
                                 ])
                             );
                         } else

diff --git a/src/chatWrappers/Llama3ChatWrapper.ts b/src/chatWrappers/Llama3ChatWrapper.ts
@@ -34,13 +34,13 @@ export class Llama3ChatWrapper extends ChatWrapper {
                     },
                     result: {
                         prefix: LlamaText(new SpecialTokensText("<|start_header_id|>function_call_result<|end_header_id|>\n\n")),
-                        suffix: LlamaText(new SpecialToken("EOT"))
+                        suffix: LlamaText(new SpecialTokensText("<|eot_id|>"))
                     },
                     parallelism: {
                         call: {
                             sectionPrefix: "",
                             betweenCalls: "\n",
-                            sectionSuffix: LlamaText(new SpecialToken("EOT"))
+                            sectionSuffix: LlamaText(new SpecialTokensText("<|eot_id|>"))
                         },
                         result: {
                             sectionPrefix: "",
@@ -62,11 +62,11 @@ export class Llama3ChatWrapper extends ChatWrapper {
                     },
                     result: {
                         prefix: LlamaText([
-                            LlamaText(new SpecialToken("EOT")),
+                            LlamaText(new SpecialTokensText("<|eot_id|>")),
                             new SpecialTokensText("<|start_header_id|>function_call_result<|end_header_id|>\n\n")
                         ]),
                         suffix: LlamaText([
-                            new SpecialToken("EOT"),
+                            new SpecialTokensText("<|eot_id|>"),
                             new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|>\n\n")
                         ])
                     }
@@ -147,7 +147,7 @@ export class Llama3ChatWrapper extends ChatWrapper {
                         LlamaText([
                             new SpecialTokensText("<|start_header_id|>system<|end_header_id|>\n\n"),
                             item.system,
-                            new SpecialToken("EOT")
+                            new SpecialTokensText("<|eot_id|>")
                         ])
                     );
                 }
@@ -157,7 +157,7 @@ export class Llama3ChatWrapper extends ChatWrapper {
                         LlamaText([
                             new SpecialTokensText("<|start_header_id|>user<|end_header_id|>\n\n"),
                             item.user,
-                            new SpecialToken("EOT")
+                            new SpecialTokensText("<|eot_id|>")
                         ])
                     );
                 }
@@ -169,7 +169,7 @@ export class Llama3ChatWrapper extends ChatWrapper {
                             item.model,
                             isLastItem
                                 ? LlamaText([])
-                                : new SpecialToken("EOT")
+                                : new SpecialTokensText("<|eot_id|>")
                         ])
                     );
                 }

diff --git a/src/chatWrappers/Llama3_1ChatWrapper.ts b/src/chatWrappers/Llama3_1ChatWrapper.ts
@@ -29,7 +29,7 @@ export class Llama3_1ChatWrapper extends ChatWrapper {
             },
             result: {
                 prefix: LlamaText(new SpecialTokensText("\n<|start_header_id|>ipython<|end_header_id|>\n\n")),
-                suffix: LlamaText(new SpecialToken("EOT"), new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|>\n\n"))
+                suffix: LlamaText(new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"))
             }
         }
     };
@@ -189,7 +189,7 @@ export class Llama3_1ChatWrapper extends ChatWrapper {
                         LlamaText([
                             new SpecialTokensText("<|start_header_id|>system<|end_header_id|>\n\n"),
                             item.system,
-                            new SpecialToken("EOT")
+                            new SpecialTokensText("<|eot_id|>")
                         ])
                     );
                 }
@@ -199,7 +199,7 @@ export class Llama3_1ChatWrapper extends ChatWrapper {
                         LlamaText([
                             new SpecialTokensText("<|start_header_id|>user<|end_header_id|>\n\n"),
                             item.user,
-                            new SpecialToken("EOT")
+                            new SpecialTokensText("<|eot_id|>")
                         ])
                     );
                 }
@@ -211,7 +211,7 @@ export class Llama3_1ChatWrapper extends ChatWrapper {
                             item.model,
                             isLastItem
                                 ? LlamaText([])
-                                : new SpecialToken("EOT")
+                                : new SpecialTokensText("<|eot_id|>")
                         ])
                     );
                 }