vllm-project · WoosukKwon · Mar 27, 2024 · Mar 26, 2024 · Mar 26, 2024 · Mar 27, 2024
diff --git a/README.md b/README.md
@@ -67,6 +67,7 @@ vLLM seamlessly supports many Hugging Face models, including the following archi
 - Baichuan & Baichuan2 (`baichuan-inc/Baichuan2-13B-Chat`, `baichuan-inc/Baichuan-7B`, etc.)
 - BLOOM (`bigscience/bloom`, `bigscience/bloomz`, etc.)
 - ChatGLM (`THUDM/chatglm2-6b`, `THUDM/chatglm3-6b`, etc.)
+- DBRX (`databricks/dbrx-base`, `databricks/dbrx-instruct` etc.)
 - DeciLM (`Deci/DeciLM-7B`, `Deci/DeciLM-7B-instruct`, etc.)
 - Falcon (`tiiuae/falcon-7b`, `tiiuae/falcon-40b`, `tiiuae/falcon-rw-7b`, etc.)
 - Gemma (`google/gemma-2b`, `google/gemma-7b`, etc.)

diff --git a/docs/source/models/supported_models.rst b/docs/source/models/supported_models.rst
@@ -27,6 +27,10 @@ Alongside each architecture, we include some popular models that use it.
     - ChatGLM
     - :code:`THUDM/chatglm2-6b`, :code:`THUDM/chatglm3-6b`, etc.
     - ✅︎
+  * - :code:`DbrxForCausalLM`
+    - DBRX
+    - :code:`databricks/dbrx-base`, :code:`databricks/dbrx-instruct`, etc.
+    - 
   * - :code:`DeciLMForCausalLM`
     - DeciLM
     - :code:`Deci/DeciLM-7B`, :code:`Deci/DeciLM-7B-instruct`, etc.

diff --git a/requirements.txt b/requirements.txt
@@ -14,3 +14,4 @@ prometheus_client >= 0.18.0
 pynvml == 11.5.0
 triton >= 2.1.0
 outlines == 0.0.34
+tiktoken == 0.6.0 # Required for DBRX tokenizer
diff --git a/vllm/config.py b/vllm/config.py
@@ -277,6 +277,11 @@ def get_total_num_kv_heads(self) -> int:
             # Currently, tensor parallelism is not supported in this case.
             return 1
 
+        # For DBRX and MPT
+        if self.hf_config.model_type in ["dbrx", "mpt"]:
+            return getattr(self.hf_config.attn_config, "kv_n_heads",
+                           self.hf_config.num_attention_heads)
+
         attributes = [
             # For Falcon:
             "n_head_kv",

diff --git a/vllm/model_executor/models/__init__.py b/vllm/model_executor/models/__init__.py
@@ -17,6 +17,7 @@
     "BloomForCausalLM": ("bloom", "BloomForCausalLM"),
     "ChatGLMModel": ("chatglm", "ChatGLMForCausalLM"),
     "ChatGLMForConditionalGeneration": ("chatglm", "ChatGLMForCausalLM"),
+    "DbrxForCausalLM": ("dbrx", "DbrxForCausalLM"),
     "DeciLMForCausalLM": ("decilm", "DeciLMForCausalLM"),
     "DeepseekForCausalLM": ("deepseek", "DeepseekForCausalLM"),
     "FalconForCausalLM": ("falcon", "FalconForCausalLM"),