Skip to content

Commit ce7ca66

Browse files
authoredSep 23, 2024
Merge pull request #14 from copilot-extensions/sgoedecke/support-non-streaming-models
Support non-streaming models
2 parents 8e46482 + d1b69de commit ce7ca66

File tree

2 files changed

+29
-14
lines changed

2 files changed

+29
-14
lines changed
 

‎src/functions/execute-model.ts

+1-1
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ Example Queries (IMPORTANT: Phrasing doesn't have to match):
7070
model: args.model,
7171
messages: [
7272
{
73-
role: "system",
73+
role: ["o1-mini", "o1-preview"].includes(args.model) ? "assistant" : "system",
7474
content: content.join("\n"),
7575
},
7676
{ role: "user", content: args.instruction },

‎src/index.ts

+28-13
Original file line numberDiff line numberDiff line change
@@ -165,13 +165,13 @@ const server = createServer(async (request, response) => {
165165
}
166166
console.timeEnd("function-exec");
167167

168-
// Now that we have a tool result, let's use it to call the model. Note that we're calling the model
169-
// via the Models API, instead of the Copilot Chat API, so that if we're in the execute-model tool we
170-
// can switch out the default model name for the requested model. We could change this in the future
171-
// if we want to handle rate-limited users more gracefully or the model difference becomes a problem.
168+
// Now that we have a tool result, let's use it to call the model.
172169
try {
170+
let stream: AsyncIterable<any>;
171+
173172
if (functionToCall.name === executeModel.definition.name) {
174-
// fetch the model data from the index (already in-memory) so we have all the information we need
173+
// First, let's write a reference with the model we're executing.
174+
// Fetch the model data from the index (already in-memory) so we have all the information we need
175175
// to build out the reference URLs
176176
const modelData = await modelsAPI.getModelFromIndex(functionCallRes.model);
177177
const sseData = {
@@ -189,15 +189,30 @@ const server = createServer(async (request, response) => {
189189
};
190190
const event = createReferencesEvent([sseData]);
191191
response.write(event);
192-
}
193192

194-
// We should keep all optional parameters out of this call, so it can work for any model (in case we've
195-
// just run the execute-model tool).
196-
const stream = await modelsAPI.inference.chat.completions.create({
197-
model: functionCallRes.model,
198-
messages: functionCallRes.messages,
199-
stream: true,
200-
});
193+
if (["o1-mini", "o1-preview"].includes(args.model)) {
194+
// for non-streaming models, we need to still stream the response back, so we build the stream ourselves
195+
stream = (async function*() {
196+
const result = await modelsAPI.inference.chat.completions.create({
197+
model: functionCallRes.model,
198+
messages: functionCallRes.messages
199+
});
200+
yield result;
201+
})();
202+
} else {
203+
stream = await modelsAPI.inference.chat.completions.create({
204+
model: functionCallRes.model,
205+
messages: functionCallRes.messages,
206+
stream: true
207+
});
208+
}
209+
} else {
210+
stream = await capiClient.chat.completions.create({
211+
stream: true,
212+
model: "gpt-4o",
213+
messages: functionCallRes.messages,
214+
});
215+
}
201216

202217
console.time("streaming");
203218
for await (const chunk of stream) {

0 commit comments

Comments
 (0)
Failed to load comments.