Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/guide/Vulkan.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,11 @@ If you see `Vulkan used VRAM` in the output, it means that Vulkan support is wor
reg add "HKEY_LOCAL_MACHINE\SYSTEM\CurrentControlSet\Control\FileSystem" /v "LongPathsEnabled" /t REG_DWORD /d "1" /f
```
:::
* :::details Windows only: LLVM (optional, recommended if you have build issues)
There are a few methods to install LLVM:
* **As part of Microsoft Visual C++ Build Tools (Recommended):** the dependencies for Window listed under [Downloading a Release](./building-from-source.md#downloading-a-release) will also install LLVM.
* **Independently:** visit the [latest LLVM release page](https://github.com/llvm/llvm-project/releases/latest) and download the installer for your Windows architecture.
:::

### Building From Source
When you use the [`getLlama`](../api/functions/getLlama) method, if there's no binary that matches the provided options, it'll automatically build `llama.cpp` from source.
Expand Down
2 changes: 1 addition & 1 deletion docs/guide/embedding.md
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ import DataBadge from "../../.vitepress/components/DataBadge/DataBadge.vue";
</script>

#### Embedded databases {#databases-embedded}
* **[LanceDB](https://lancedb.com/)** ([GitHub](https://github.com/lancedb/lancedb) | [npm](https://www.npmjs.com/package/@lancedb/lancedb) | [Quick start](https://lancedb.github.io/lancedb/basic/#__tabbed_1_2)) - Serverless vector database you can embed inside your application. No server required.
* **[LanceDB](https://lancedb.com/)** ([GitHub](https://github.com/lancedb/lancedb) | [npm](https://www.npmjs.com/package/@lancedb/lancedb) | [Quick start](https://www.npmjs.com/package/@lancedb/lancedb#usage)) - Serverless vector database you can embed inside your application. No server required.
<br/><DataBadge title="Written in" content="Rust"/><DataBadge title="License" content="Apache-2.0"/>

* **Vectra** ([GitHub](https://github.com/Stevenic/vectra) | [npm](https://www.npmjs.com/package/vectra)) - local vector database using local files
Expand Down
7 changes: 4 additions & 3 deletions llama/addon/globals/getGpuInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ Napi::Value getGpuVramInfo(const Napi::CallbackInfo& info) {

for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
device = ggml_backend_dev_get(i);
if (ggml_backend_dev_type(device) == GGML_BACKEND_DEVICE_TYPE_GPU) {
auto deviceType = ggml_backend_dev_type(device);
if (deviceType == GGML_BACKEND_DEVICE_TYPE_GPU || deviceType == GGML_BACKEND_DEVICE_TYPE_IGPU) {
deviceTotal = 0;
deviceFree = 0;
ggml_backend_dev_memory(device, &deviceFree, &deviceTotal);
Expand Down Expand Up @@ -76,8 +77,8 @@ Napi::Value getGpuDeviceInfo(const Napi::CallbackInfo& info) {

for (size_t i = 0; i < ggml_backend_dev_count(); i++) {
ggml_backend_dev_t device = ggml_backend_dev_get(i);
if (ggml_backend_dev_type(device) == GGML_BACKEND_DEVICE_TYPE_GPU) {

auto deviceType = ggml_backend_dev_type(device);
if (deviceType == GGML_BACKEND_DEVICE_TYPE_GPU || deviceType == GGML_BACKEND_DEVICE_TYPE_IGPU) {
deviceNames.push_back(std::string(ggml_backend_dev_description(device)));
}
}
Expand Down
97 changes: 95 additions & 2 deletions llama/gpuInfo/vulkan-gpu-info.cpp
Original file line number Diff line number Diff line change
@@ -1,16 +1,109 @@
#include <stddef.h>
#include <map>
#include <vector>

#include <vulkan/vulkan.hpp>

constexpr std::uint32_t VK_VENDOR_ID_AMD = 0x1002;
constexpr std::uint32_t VK_VENDOR_ID_APPLE = 0x106b;
constexpr std::uint32_t VK_VENDOR_ID_INTEL = 0x8086;
constexpr std::uint32_t VK_VENDOR_ID_NVIDIA = 0x10de;

typedef void (*gpuInfoVulkanWarningLogCallback_t)(const char* message);

static bool enumerateVulkanDevices(size_t* total, size_t* used, size_t* unifiedMemorySize, bool addDeviceNames, std::vector<std::string> * deviceNames, gpuInfoVulkanWarningLogCallback_t warningLogCallback, bool * checkSupported) {
static vk::Instance vulkanInstance() {
vk::ApplicationInfo appInfo("node-llama-cpp GPU info", 1, "llama.cpp", 1, VK_API_VERSION_1_2);
vk::InstanceCreateInfo createInfo(vk::InstanceCreateFlags(), &appInfo, {}, {});
vk::Instance instance = vk::createInstance(createInfo);
return vk::createInstance(createInfo);
}

static std::vector<vk::PhysicalDevice> dedupedDevices() {
vk::Instance instance = vulkanInstance();
auto physicalDevices = instance.enumeratePhysicalDevices();
std::vector<vk::PhysicalDevice> dedupedDevices;
dedupedDevices.reserve(physicalDevices.size());

// adapted from `ggml_vk_instance_init` in `ggml-vulkan.cpp`
for (const auto& device : physicalDevices) {
vk::PhysicalDeviceProperties2 newProps;
vk::PhysicalDeviceDriverProperties newDriver;
vk::PhysicalDeviceIDProperties newId;
newProps.pNext = &newDriver;
newDriver.pNext = &newId;
device.getProperties2(&newProps);

auto oldDevice = std::find_if(
dedupedDevices.begin(),
dedupedDevices.end(),
[&newId](const vk::PhysicalDevice& oldDevice) {
vk::PhysicalDeviceProperties2 oldProps;
vk::PhysicalDeviceDriverProperties oldDriver;
vk::PhysicalDeviceIDProperties oldId;
oldProps.pNext = &oldDriver;
oldDriver.pNext = &oldId;
oldDevice.getProperties2(&oldProps);

bool equals = std::equal(std::begin(oldId.deviceUUID), std::end(oldId.deviceUUID), std::begin(newId.deviceUUID));
equals = equals || (
oldId.deviceLUIDValid && newId.deviceLUIDValid &&
std::equal(std::begin(oldId.deviceLUID), std::end(oldId.deviceLUID), std::begin(newId.deviceLUID))
);

return equals;
}
);

if (oldDevice == dedupedDevices.end()) {
dedupedDevices.push_back(device);
continue;
}

vk::PhysicalDeviceProperties2 oldProps;
vk::PhysicalDeviceDriverProperties oldDriver;
oldProps.pNext = &oldDriver;
oldDevice->getProperties2(&oldProps);

std::map<vk::DriverId, int> driverPriorities {};
int oldPriority = 1000;
int newPriority = 1000;

switch (oldProps.properties.vendorID) {
case VK_VENDOR_ID_AMD:
driverPriorities[vk::DriverId::eMesaRadv] = 1;
driverPriorities[vk::DriverId::eAmdOpenSource] = 2;
driverPriorities[vk::DriverId::eAmdProprietary] = 3;
break;
case VK_VENDOR_ID_INTEL:
driverPriorities[vk::DriverId::eIntelOpenSourceMESA] = 1;
driverPriorities[vk::DriverId::eIntelProprietaryWindows] = 2;
break;
case VK_VENDOR_ID_NVIDIA:
driverPriorities[vk::DriverId::eNvidiaProprietary] = 1;
#if defined(VK_API_VERSION_1_3) && VK_HEADER_VERSION >= 235
driverPriorities[vk::DriverId::eMesaNvk] = 2;
#endif
break;
}
driverPriorities[vk::DriverId::eMesaDozen] = 4;

if (driverPriorities.count(oldDriver.driverID)) {
oldPriority = driverPriorities[oldDriver.driverID];
}
if (driverPriorities.count(newDriver.driverID)) {
newPriority = driverPriorities[newDriver.driverID];
}

if (newPriority < oldPriority) {
dedupedDevices.erase(std::remove(dedupedDevices.begin(), dedupedDevices.end(), *oldDevice), dedupedDevices.end());
dedupedDevices.push_back(device);
}
}

return dedupedDevices;
}

static bool enumerateVulkanDevices(size_t* total, size_t* used, size_t* unifiedMemorySize, bool addDeviceNames, std::vector<std::string> * deviceNames, gpuInfoVulkanWarningLogCallback_t warningLogCallback, bool * checkSupported) {
auto physicalDevices = dedupedDevices();

size_t usedMem = 0;
size_t totalMem = 0;
Expand Down
2 changes: 2 additions & 0 deletions src/bindings/Llama.ts
Original file line number Diff line number Diff line change
Expand Up @@ -684,6 +684,8 @@ function getTransformedLogLevel(level: LlamaLogLevel, message: string, gpu: Buil
return LlamaLogLevel.info;
else if (level === LlamaLogLevel.warn && message.startsWith("load: special_eog_ids contains both '<|return|>' and '<|call|>' tokens, removing '<|end|>' token from EOG list"))
return LlamaLogLevel.info;
else if (level === LlamaLogLevel.warn && message.startsWith("llama_init_from_model: model default pooling_type is [0], but [-1] was specified"))
return LlamaLogLevel.info;
else if (gpu === false && level === LlamaLogLevel.warn && message.startsWith("llama_adapter_lora_init_impl: lora for '") && message.endsWith("' cannot use buft 'CPU_REPACK', fallback to CPU"))
return LlamaLogLevel.info;

Expand Down
28 changes: 14 additions & 14 deletions src/chatWrappers/FunctionaryChatWrapper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,13 @@ export class FunctionaryChatWrapper extends ChatWrapper {
prefix: LlamaText([
new SpecialTokensText("<|start_header_id|>tool<|end_header_id|>\n\n")
]),
suffix: LlamaText(new SpecialToken("EOT"))
suffix: LlamaText(new SpecialTokensText("<|eot_id|>"))
},
parallelism: {
call: {
sectionPrefix: "",
betweenCalls: "",
sectionSuffix: LlamaText(new SpecialToken("EOT"))
sectionSuffix: LlamaText(new SpecialTokensText("<|eot_id|>"))
},
result: {
sectionPrefix: "",
Expand All @@ -72,13 +72,13 @@ export class FunctionaryChatWrapper extends ChatWrapper {
"{{functionName}}",
new SpecialTokensText("\n")
]),
suffix: LlamaText(new SpecialToken("EOT"))
suffix: LlamaText(new SpecialTokensText("<|eot_id|>"))
},
parallelism: {
call: {
sectionPrefix: "",
betweenCalls: "",
sectionSuffix: LlamaText(new SpecialToken("EOT"))
sectionSuffix: LlamaText(new SpecialTokensText("<|eot_id|>"))
},
result: {
sectionPrefix: "",
Expand Down Expand Up @@ -155,13 +155,13 @@ export class FunctionaryChatWrapper extends ChatWrapper {
return LlamaText([
new SpecialTokensText("<|start_header_id|>system<|end_header_id|>\n\n"),
LlamaText.fromJSON(item.text),
new SpecialToken("EOT")
new SpecialTokensText("<|eot_id|>")
]);
} else if (item.type === "user") {
return LlamaText([
new SpecialTokensText("<|start_header_id|>user<|end_header_id|>\n\n"),
item.text,
new SpecialToken("EOT")
new SpecialTokensText("<|eot_id|>")
]);
} else if (item.type === "model") {
if (isLastItem && item.response.length === 0)
Expand All @@ -178,7 +178,7 @@ export class FunctionaryChatWrapper extends ChatWrapper {
return;

res.push(LlamaText(pendingFunctionCalls));
res.push(LlamaText(new SpecialToken("EOT")));
res.push(LlamaText(new SpecialTokensText("<|eot_id|>")));
res.push(LlamaText(pendingFunctionResults));

pendingFunctionResults.length = 0;
Expand Down Expand Up @@ -206,7 +206,7 @@ export class FunctionaryChatWrapper extends ChatWrapper {
response,
(!isLastResponse || isLastItem)
? LlamaText([])
: new SpecialToken("EOT")
: new SpecialTokensText("<|eot_id|>")
])
])
);
Expand All @@ -232,7 +232,7 @@ export class FunctionaryChatWrapper extends ChatWrapper {
response.result === undefined
? "" // "void"
: jsonDumps(response.result),
new SpecialToken("EOT")
new SpecialTokensText("<|eot_id|>")
])
);
} else
Expand Down Expand Up @@ -320,13 +320,13 @@ export class FunctionaryChatWrapper extends ChatWrapper {
return LlamaText([
new SpecialTokensText("<|start_header_id|>system<|end_header_id|>\n\n"),
LlamaText.fromJSON(item.text),
new SpecialToken("EOT")
new SpecialTokensText("<|eot_id|>")
]);
} else if (item.type === "user") {
return LlamaText([
new SpecialTokensText("<|start_header_id|>user<|end_header_id|>\n\n"),
item.text,
new SpecialToken("EOT")
new SpecialTokensText("<|eot_id|>")
]);
} else if (item.type === "model") {
if (isLastItem && item.response.length === 0)
Expand All @@ -343,7 +343,7 @@ export class FunctionaryChatWrapper extends ChatWrapper {
return;

res.push(LlamaText(pendingFunctionCalls));
res.push(LlamaText(new SpecialToken("EOT")));
res.push(LlamaText(new SpecialTokensText("<|eot_id|>")));
res.push(LlamaText(pendingFunctionResults));

pendingFunctionResults.length = 0;
Expand All @@ -365,7 +365,7 @@ export class FunctionaryChatWrapper extends ChatWrapper {
response,
(isLastItem && isLastResponse)
? LlamaText([])
: new SpecialToken("EOT")
: new SpecialTokensText("<|eot_id|>")
])
);
} else if (isChatModelResponseFunctionCall(response)) {
Expand All @@ -392,7 +392,7 @@ export class FunctionaryChatWrapper extends ChatWrapper {
response.result === undefined
? "" // "void"
: jsonDumps(response.result),
new SpecialToken("EOT")
new SpecialTokensText("<|eot_id|>")
])
);
} else
Expand Down
14 changes: 7 additions & 7 deletions src/chatWrappers/Llama3ChatWrapper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,13 @@ export class Llama3ChatWrapper extends ChatWrapper {
},
result: {
prefix: LlamaText(new SpecialTokensText("<|start_header_id|>function_call_result<|end_header_id|>\n\n")),
suffix: LlamaText(new SpecialToken("EOT"))
suffix: LlamaText(new SpecialTokensText("<|eot_id|>"))
},
parallelism: {
call: {
sectionPrefix: "",
betweenCalls: "\n",
sectionSuffix: LlamaText(new SpecialToken("EOT"))
sectionSuffix: LlamaText(new SpecialTokensText("<|eot_id|>"))
},
result: {
sectionPrefix: "",
Expand All @@ -62,11 +62,11 @@ export class Llama3ChatWrapper extends ChatWrapper {
},
result: {
prefix: LlamaText([
LlamaText(new SpecialToken("EOT")),
LlamaText(new SpecialTokensText("<|eot_id|>")),
new SpecialTokensText("<|start_header_id|>function_call_result<|end_header_id|>\n\n")
]),
suffix: LlamaText([
new SpecialToken("EOT"),
new SpecialTokensText("<|eot_id|>"),
new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|>\n\n")
])
}
Expand Down Expand Up @@ -147,7 +147,7 @@ export class Llama3ChatWrapper extends ChatWrapper {
LlamaText([
new SpecialTokensText("<|start_header_id|>system<|end_header_id|>\n\n"),
item.system,
new SpecialToken("EOT")
new SpecialTokensText("<|eot_id|>")
])
);
}
Expand All @@ -157,7 +157,7 @@ export class Llama3ChatWrapper extends ChatWrapper {
LlamaText([
new SpecialTokensText("<|start_header_id|>user<|end_header_id|>\n\n"),
item.user,
new SpecialToken("EOT")
new SpecialTokensText("<|eot_id|>")
])
);
}
Expand All @@ -169,7 +169,7 @@ export class Llama3ChatWrapper extends ChatWrapper {
item.model,
isLastItem
? LlamaText([])
: new SpecialToken("EOT")
: new SpecialTokensText("<|eot_id|>")
])
);
}
Expand Down
8 changes: 4 additions & 4 deletions src/chatWrappers/Llama3_1ChatWrapper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ export class Llama3_1ChatWrapper extends ChatWrapper {
},
result: {
prefix: LlamaText(new SpecialTokensText("\n<|start_header_id|>ipython<|end_header_id|>\n\n")),
suffix: LlamaText(new SpecialToken("EOT"), new SpecialTokensText("<|start_header_id|>assistant<|end_header_id|>\n\n"))
suffix: LlamaText(new SpecialTokensText("<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"))
}
}
};
Expand Down Expand Up @@ -189,7 +189,7 @@ export class Llama3_1ChatWrapper extends ChatWrapper {
LlamaText([
new SpecialTokensText("<|start_header_id|>system<|end_header_id|>\n\n"),
item.system,
new SpecialToken("EOT")
new SpecialTokensText("<|eot_id|>")
])
);
}
Expand All @@ -199,7 +199,7 @@ export class Llama3_1ChatWrapper extends ChatWrapper {
LlamaText([
new SpecialTokensText("<|start_header_id|>user<|end_header_id|>\n\n"),
item.user,
new SpecialToken("EOT")
new SpecialTokensText("<|eot_id|>")
])
);
}
Expand All @@ -211,7 +211,7 @@ export class Llama3_1ChatWrapper extends ChatWrapper {
item.model,
isLastItem
? LlamaText([])
: new SpecialToken("EOT")
: new SpecialTokensText("<|eot_id|>")
])
);
}
Expand Down
Loading
Loading