-
-
Notifications
You must be signed in to change notification settings - Fork 120
/
Copy pathrecommendedModels.ts
346 lines (318 loc) · 17.6 KB
/
recommendedModels.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
import {ModelRecommendation} from "./utils/resolveModelRecommendationFileOptions.js";
export const recommendedModels: ModelRecommendation[] = [{
name: "DeepSeek R1 Distill Qwen 7B",
abilities: ["chat", "complete", "functionCalling", "reasoning"],
description: "DeepSeek R1 model was created by DeepSeek and is using chain of though (CoT) to reason across a wide variety of topics.\n" +
"It's optimized for an assistant-like chat use cases, with support for function calling.\n" +
"This model is censored, but its responses quality on many topics is extremely high.\n" +
"This is the 7 billion parameters version of the model - a fine tuned Qwen 2.5 7B base model with distillation from the 671B DeepSeek R1 version.",
fileOptions: [
"hf:mradermacher/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q8_0",
"hf:mradermacher/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q6_K",
"hf:mradermacher/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q5_K_M",
"hf:mradermacher/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q5_K_S",
"hf:mradermacher/DeepSeek-R1-Distill-Qwen-7B-GGUF:Q4_K_M"
]
}, {
name: "DeepSeek R1 Distill Qwen 14B",
abilities: ["chat", "complete", "functionCalling", "reasoning"],
description: "DeepSeek R1 model was created by DeepSeek and is using chain of though (CoT) to reason across a wide variety of topics.\n" +
"It's optimized for an assistant-like chat use cases, with support for function calling.\n" +
"This model is censored, but its responses quality on many topics is extremely high.\n" +
"This is the 14 billion parameters version of the model - a fine tuned Qwen 2.5 14B base model with distillation from the 671B DeepSeek R1 version.",
fileOptions: [
"hf:mradermacher/DeepSeek-R1-Distill-Qwen-14B-GGUF:Q8_0",
"hf:mradermacher/DeepSeek-R1-Distill-Qwen-14B-GGUF:Q6_K",
"hf:mradermacher/DeepSeek-R1-Distill-Qwen-14B-GGUF:Q5_K_M",
"hf:mradermacher/DeepSeek-R1-Distill-Qwen-14B-GGUF:Q5_K_S",
"hf:mradermacher/DeepSeek-R1-Distill-Qwen-14B-GGUF:Q4_K_M"
]
}, {
name: "DeepSeek R1 Distill Qwen 32B",
abilities: ["chat", "complete", "functionCalling", "reasoning"],
description: "DeepSeek R1 model was created by DeepSeek and is using chain of though (CoT) to reason across a wide variety of topics.\n" +
"It's optimized for an assistant-like chat use cases, with support for function calling.\n" +
"This model is censored, but its responses quality on many topics is extremely high.\n" +
"This is the 32 billion parameters version of the model - a fine tuned Qwen 2.5 32B base model with distillation from the 671B DeepSeek R1 version.",
fileOptions: [
"hf:mradermacher/DeepSeek-R1-Distill-Qwen-32B-GGUF:Q8_0",
"hf:mradermacher/DeepSeek-R1-Distill-Qwen-32B-GGUF:Q6_K",
"hf:mradermacher/DeepSeek-R1-Distill-Qwen-32B-GGUF:Q5_K_M",
"hf:mradermacher/DeepSeek-R1-Distill-Qwen-32B-GGUF:Q5_K_S",
"hf:mradermacher/DeepSeek-R1-Distill-Qwen-32B-GGUF:Q4_K_M"
]
}, {
name: "DeepSeek R1 Distill Llama 8B",
abilities: ["chat", "complete", "functionCalling", "reasoning"],
description: "DeepSeek R1 model was created by DeepSeek and is using chain of though (CoT) to reason across a wide variety of topics.\n" +
"It's optimized for an assistant-like chat use cases, with support for function calling.\n" +
"This model is censored, even though it's based on Llama 3.1.\n" +
"This is the 8 billion parameters version of the model - a fine tuned Llama 3.1 8B base model with distillation from the 671B DeepSeek R1 version.",
fileOptions: [
"hf:mradermacher/DeepSeek-R1-Distill-Llama-8B-GGUF:Q8_0",
"hf:mradermacher/DeepSeek-R1-Distill-Llama-8B-GGUF:Q6_K",
"hf:mradermacher/DeepSeek-R1-Distill-Llama-8B-GGUF:Q5_K_M",
"hf:mradermacher/DeepSeek-R1-Distill-Llama-8B-GGUF:Q5_K_S",
"hf:mradermacher/DeepSeek-R1-Distill-Llama-8B-GGUF:Q4_K_M"
]
}, {
name: "DeepSeek R1 Distill Llama 70B",
abilities: ["chat", "complete", "functionCalling", "reasoning"],
description: "DeepSeek R1 model was created by DeepSeek and is using chain of though (CoT) to reason across a wide variety of topics.\n" +
"It's optimized for an assistant-like chat use cases, with support for function calling.\n" +
"This model is censored, even though it's based on Llama 3.3.\n" +
"This is the 70 billion parameters version of the model - a fine tuned Llama 3.3 70B base model with distillation from the 671B DeepSeek R1 version.",
fileOptions: [
"hf:mradermacher/DeepSeek-R1-Distill-Llama-70B-GGUF/DeepSeek-R1-Distill-Llama-70B.Q8_0.gguf.part1of2",
"hf:mradermacher/DeepSeek-R1-Distill-Llama-70B-GGUF/DeepSeek-R1-Distill-Llama-70B.Q6_K.gguf.part1of2",
"hf:mradermacher/DeepSeek-R1-Distill-Llama-70B-GGUF:Q5_K_M",
"hf:mradermacher/DeepSeek-R1-Distill-Llama-70B-GGUF:Q5_K_S",
"hf:mradermacher/DeepSeek-R1-Distill-Llama-70B-GGUF:Q4_K_M"
]
}, {
name: "QwQ 32B",
abilities: ["chat", "complete", "functionCalling", "reasoning"],
description: "QwQ model was created by Alibaba and is using chain of though (CoT) to reason across a wide variety of topics.\n" +
"It's optimized for an assistant-like chat use cases, with native support for function calling.\n" +
"This model is censored, but its responses quality on many topics is extremely high.\n" +
"Its performance is comparable to DeepSeek R1 671B.",
fileOptions: [
"hf:Qwen/QwQ-32B-GGUF:Q8_0",
"hf:Qwen/QwQ-32B-GGUF:Q6_K",
"hf:Qwen/QwQ-32B-GGUF:Q5_K_M",
"hf:Qwen/QwQ-32B-GGUF:Q4_K_M"
]
}, {
name: "Llama 3.1 8B",
abilities: ["chat", "complete", "functionCalling"],
description: "Llama 3.1 model was created by Meta and is optimized for an assistant-like chat use cases, with support for function calling.\n" +
"This is the 8 billion parameters version of the model.",
fileOptions: [
"hf:mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF:Q8_0",
"hf:mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF:Q6_K",
"hf:mradermacher/Meta-Llama-3.1-8B-Instruct-GGUF:Q4_K_M"
]
}, {
name: "Llama 3.1 70B",
abilities: ["chat", "complete", "functionCalling"],
description: "Llama 3.1 model was created by Meta and is optimized for an assistant-like chat use cases, with support for function calling.\n" +
"This is the 70 billion parameters version of the model. " +
"You need a GPU with a lot of VRAM to use this version.",
fileOptions: [
"hf:mradermacher/Meta-Llama-3.1-70B-Instruct-GGUF/Meta-Llama-3.1-70B-Instruct.Q8_0.gguf.part1of2",
"hf:mradermacher/Meta-Llama-3.1-70B-Instruct-GGUF/Meta-Llama-3.1-70B-Instruct.Q6_K.gguf.part1of2",
"hf:mradermacher/Meta-Llama-3.1-70B-Instruct-GGUF:Q4_K_M",
"hf:mradermacher/Meta-Llama-3.1-70B-Instruct-GGUF:Q4_K_S"
]
}, {
name: "Llama 3.1 405B",
abilities: ["chat", "complete", "functionCalling"],
description: "Llama 3.1 model was created by Meta and is optimized for an assistant-like chat use cases, with support for function calling.\n" +
"This is the 405 billion parameters version of the model, and its capabilities are comparable and sometimes even surpass GPT-4o and Claude 3.5 Sonnet.\n" +
"You need a GPU with a lot of VRAM to use this version of Llama 3.1.",
fileOptions: [
"hf:mradermacher/Meta-Llama-3.1-405B-Instruct-GGUF/Meta-Llama-3.1-405B-Instruct.Q3_K_L.gguf.part1of5",
"hf:mradermacher/Meta-Llama-3.1-405B-Instruct-GGUF/Meta-Llama-3.1-405B-Instruct.Q3_K_M.gguf.part1of4"
]
}, {
name: "Phi 4 14B",
abilities: ["chat", "complete", "functionCalling"],
description: "Phi 4 model was created by Microsoft and is optimized for complex reasoning in areas such as math.",
fileOptions: [
"hf:mradermacher/phi-4-GGUF:Q8_0",
"hf:mradermacher/phi-4-GGUF:Q6_K",
"hf:mradermacher/phi-4-GGUF:Q4_K_M",
"hf:mradermacher/phi-4-GGUF:Q4_K_S"
]
}, {
name: "Mistral Nemo 12B",
abilities: ["chat", "complete", "functionCalling"],
description: "Mistral Nemo model was created by Mistral AI and was trained on large proportion of multilingual and code data, with support for function calling.\n" +
"It was trained jointly by Mistral AI and NVIDIA.\n" +
"This is a 12 billion parameters model.",
fileOptions: [
"hf:mradermacher/Mistral-Nemo-Instruct-2407-GGUF:Q8_0",
"hf:mradermacher/Mistral-Nemo-Instruct-2407-GGUF:Q6_K",
"hf:mradermacher/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_M",
"hf:mradermacher/Mistral-Nemo-Instruct-2407-GGUF:Q4_K_S"
]
}, {
name: "Llama 3.2 3B",
abilities: ["chat", "complete", "functionCalling"],
description: "Llama 3.2 3B model was created by Meta and is optimized for an assistant-like chat use cases, with support for function calling.\n" +
"This model is smarter than the 1B model, but is still relatively small and can run on less capable machines.",
fileOptions: [
"hf:mradermacher/Llama-3.2-3B-Instruct-GGUF:Q8_0",
"hf:mradermacher/Llama-3.2-3B-Instruct-GGUF:Q6_K",
"hf:mradermacher/Llama-3.2-3B-Instruct-GGUF:Q4_K_M",
"hf:mradermacher/Llama-3.2-3B-Instruct-GGUF:Q4_K_S"
]
}, {
name: "Phi 3 3.8B",
abilities: ["chat", "complete", "functionCalling"],
description: "Phi 3 model was created by Microsoft and is optimized for strong reasoning (especially math and logic).\n" +
"This is the small version of the model.",
fileOptions: [
"hf:bartowski/Phi-3.1-mini-4k-instruct-GGUF:Q8_0",
"hf:bartowski/Phi-3.1-mini-4k-instruct-GGUF:Q4_K_M"
]
}, {
name: "OLMoE 1B 7B MoE",
abilities: ["chat"],
description: "OLMoE models were created by AllenAI, and are fully open source models that utilize a Mixture of Experts architecture.\n" +
"Mixtures of Experts (MoE) is a technique where different models, each skilled in solving a particular kind of problem, work together to the improve the overall performance on complex tasks.\n" +
"This model includes 64 expert models, with a total of 7 billion parameters.\n" +
"This model generates output extremely fast.",
fileOptions: [
"hf:allenai/OLMoE-1B-7B-0924-Instruct-GGUF/olmoe-1b-7b-0924-instruct-q8_0.gguf",
"hf:allenai/OLMoE-1B-7B-0924-Instruct-GGUF/olmoe-1b-7b-0924-instruct-q6_k.gguf",
"hf:allenai/OLMoE-1B-7B-0924-Instruct-GGUF/olmoe-1b-7b-0924-instruct-q5_k_m.gguf",
"hf:allenai/OLMoE-1B-7B-0924-Instruct-GGUF/olmoe-1b-7b-0924-instruct-q4_k_s.gguf",
"hf:allenai/OLMoE-1B-7B-0924-Instruct-GGUF/olmoe-1b-7b-0924-instruct-q4_k_m.gguf"
]
}, {
name: "Mixtral 8x7B MoE",
abilities: ["chat", "complete"],
description: "Mixtral models were created by Mistal AI and are general purpose models that utilize a Mixture of Experts architecture.\n" +
"Mixtures of Experts (MoE) is a technique where different models, each skilled in solving a particular kind of problem, work together to the improve the overall performance on complex tasks.\n" +
"This model includes 8 expert models, each with 7 billion parameters.",
fileOptions: [
"hf:TheBloke/Mixtral-8x7B-v0.1-GGUF:Q5_K_M",
"hf:TheBloke/Mixtral-8x7B-v0.1-GGUF:Q4_K_M"
]
}, {
name: "Mistral 7B Instruct v0.2",
abilities: ["chat", "complete"],
description: "Mistral models were created by Mistal AI and are general purpose models.\n" +
"This is the 7 billion parameters version of the model.",
fileOptions: [
"hf:TheBloke/Mistral-7B-Instruct-v0.2-GGUF:Q5_K_M",
"hf:TheBloke/Mistral-7B-Instruct-v0.2-GGUF:Q4_K_M"
]
}, {
name: "Dolphin 2.5 Mixtral 8x7B MoE",
abilities: ["chat", "complete"],
description: "This Dolphin Mixtral model was created by Eric Hartford and is an uncensored model based on Mixtral, with really good coding skills.\n" +
"See the Mixtral model above for more information about Mixtral models.\n" +
"This model includes 8 expert models, each with 7 billion parameters.",
fileOptions: [
"hf:TheBloke/dolphin-2.5-mixtral-8x7b-GGUF:Q5_K_M",
"hf:TheBloke/dolphin-2.5-mixtral-8x7b-GGUF:Q4_K_M"
]
}, {
name: "Gemma 2 9B",
abilities: ["chat", "complete"],
description: "Gemma models were created by Google and are optimized suited for variety of text generation tasks, " +
"including question answering, summarization, and reasoning, with a focus on responsible responses.\n" +
"This is the 9 billion parameters version of the model.",
fileOptions: [
"hf:bartowski/gemma-2-9b-it-GGUF:Q6_K_L",
"hf:bartowski/gemma-2-9b-it-GGUF:Q6_K",
"hf:bartowski/gemma-2-9b-it-GGUF:Q5_K_L",
"hf:bartowski/gemma-2-9b-it-GGUF:Q5_K_M",
"hf:bartowski/gemma-2-9b-it-GGUF:Q5_K_S",
"hf:bartowski/gemma-2-9b-it-GGUF:Q4_K_L",
"hf:bartowski/gemma-2-9b-it-GGUF:Q4_K_M"
]
}, {
name: "Gemma 2 2B",
abilities: ["chat", "complete"],
description: "Gemma models were created by Google and are optimized suited for variety of text generation tasks, " +
"including question answering, summarization, and reasoning, with a focus on responsible responses.\n" +
"This is the 2 billion parameters version of the model and is significantly less powerful than the 9B version.",
fileOptions: [
"hf:bartowski/gemma-2-2b-it-GGUF:Q6_K_L",
"hf:bartowski/gemma-2-2b-it-GGUF:Q6_K",
"hf:bartowski/gemma-2-2b-it-GGUF:Q5_K_M",
"hf:bartowski/gemma-2-2b-it-GGUF:Q5_K_S",
"hf:bartowski/gemma-2-2b-it-GGUF:Q4_K_M"
]
}, {
name: "Gemma 2 27B",
abilities: ["chat", "complete"],
description: "Gemma models were created by Google and are optimized suited for varoety of text generation tasks, " +
"including question answering, summarization, and reasoning, with a focus on responsible responses.\n" +
"This is the 27 billion parameters version of the model.\n" +
"Since the model is relatively big, it may not run well on your machine",
fileOptions: [
"hf:bartowski/gemma-2-27b-it-GGUF:Q6_K_L",
"hf:bartowski/gemma-2-27b-it-GGUF:Q6_K",
"hf:bartowski/gemma-2-27b-it-GGUF:Q5_K_L",
"hf:bartowski/gemma-2-27b-it-GGUF:Q5_K_M",
"hf:bartowski/gemma-2-27b-it-GGUF:Q5_K_S",
"hf:bartowski/gemma-2-27b-it-GGUF:Q4_K_L",
"hf:bartowski/gemma-2-27b-it-GGUF:Q4_K_M"
]
}, {
name: "Orca 2 13B",
abilities: ["chat", "complete"],
description: "Orca 2 model was created by Microsoft and is optimized for reasoning over given data, reading comprehensions, math problem solving and text summarization.\n" +
"This is the 13 billion parameters version of the model.",
fileOptions: [
"hf:TheBloke/Orca-2-13B-GGUF:Q5_K_M",
"hf:TheBloke/Orca-2-13B-GGUF:Q4_K_M"
]
}, {
name: "Code Llama 7B",
abilities: ["chat", "complete", "infill"],
description: "Code Llama model was created by Meta based on Llama 2 and is optimized for coding tasks.\n" +
"This is the 7 billion parameters version of the model.",
fileOptions: [
"hf:TheBloke/CodeLlama-7B-GGUF:Q5_K_M",
"hf:TheBloke/CodeLlama-7B-GGUF:Q4_K_M"
]
}, {
name: "Code Llama 13B",
abilities: ["chat", "complete", "infill"],
description: "Code Llama model was created by Meta based on Llama 2 and is optimized for coding tasks.\n" +
"This is the 13 billion parameters version of the model.",
fileOptions: [
"hf:TheBloke/CodeLlama-13B-GGUF:Q5_K_M",
"hf:TheBloke/CodeLlama-13B-GGUF:Q4_K_M"
]
}, {
name: "Code Llama 34B",
abilities: ["chat", "complete", "infill"],
description: "Code Llama model was created by Meta based on Llama 2 and is optimized for coding tasks.\n" +
"This is the 34 billion parameters version of the model.\n" +
"You need a GPU with handful of VRAM to use this version.",
fileOptions: [
"hf:TheBloke/CodeLlama-34B-GGUF:Q5_K_M",
"hf:TheBloke/CodeLlama-34B-GGUF:Q4_K_M"
]
}, {
name: "CodeGemma 2B",
abilities: ["code", "complete", "infill"],
description: "CodeGemma models were created by Google and are optimized for code completion, code generation, " +
"natual language understanding, mathematical reasoning, and instruction following.\n" +
"This model is not suited for chat.\n" +
"This is the 2 billion parameters version of the model.\n",
fileOptions: [
"hf:bartowski/codegemma-2b-GGUF:Q8_0",
"hf:bartowski/codegemma-2b-GGUF:Q6_K",
"hf:bartowski/codegemma-2b-GGUF:Q5_K_M",
"hf:bartowski/codegemma-2b-GGUF:Q5_K_S",
"hf:bartowski/codegemma-2b-GGUF:Q4_K_M"
]
}, {
name: "CodeGemma 7B",
abilities: ["code", "complete", "infill"],
description: "CodeGemma models were created by Google and are optimized for code completion, code generation, " +
"natual language understanding, mathematical reasoning, and instruction following.\n" +
"This model is not suited for chat.\n" +
"This is the 7 billion parameters version of the model.\n",
fileOptions: [
"hf:bartowski/codegemma-1.1-7b-it-GGUF:Q6_K",
"hf:bartowski/codegemma-1.1-7b-it-GGUF:Q5_K_M",
"hf:bartowski/codegemma-1.1-7b-it-GGUF:Q5_K_S",
"hf:bartowski/codegemma-1.1-7b-it-GGUF:Q4_K_M"
]
}, {
name: "Stable Code Instruct 3B",
abilities: ["chat", "complete", "infill"],
description: "Stable Code models were created by Stability AI and are optimized for code completion.",
fileOptions: [
"hf:stabilityai/stable-code-instruct-3b/stable-code-3b-q5_k_m.gguf",
"hf:stabilityai/stable-code-instruct-3b/stable-code-3b-q4_k_m.gguf"
]
}];