@@ -165,13 +165,13 @@ const server = createServer(async (request, response) => {
165
165
}
166
166
console . timeEnd ( "function-exec" ) ;
167
167
168
- // Now that we have a tool result, let's use it to call the model. Note that we're calling the model
169
- // via the Models API, instead of the Copilot Chat API, so that if we're in the execute-model tool we
170
- // can switch out the default model name for the requested model. We could change this in the future
171
- // if we want to handle rate-limited users more gracefully or the model difference becomes a problem.
168
+ // Now that we have a tool result, let's use it to call the model.
172
169
try {
170
+ let stream : AsyncIterable < any > ;
171
+
173
172
if ( functionToCall . name === executeModel . definition . name ) {
174
- // fetch the model data from the index (already in-memory) so we have all the information we need
173
+ // First, let's write a reference with the model we're executing.
174
+ // Fetch the model data from the index (already in-memory) so we have all the information we need
175
175
// to build out the reference URLs
176
176
const modelData = await modelsAPI . getModelFromIndex ( functionCallRes . model ) ;
177
177
const sseData = {
@@ -189,15 +189,30 @@ const server = createServer(async (request, response) => {
189
189
} ;
190
190
const event = createReferencesEvent ( [ sseData ] ) ;
191
191
response . write ( event ) ;
192
- }
193
192
194
- // We should keep all optional parameters out of this call, so it can work for any model (in case we've
195
- // just run the execute-model tool).
196
- const stream = await modelsAPI . inference . chat . completions . create ( {
197
- model : functionCallRes . model ,
198
- messages : functionCallRes . messages ,
199
- stream : true ,
200
- } ) ;
193
+ if ( [ "o1-mini" , "o1-preview" ] . includes ( args . model ) ) {
194
+ // for non-streaming models, we need to still stream the response back, so we build the stream ourselves
195
+ stream = ( async function * ( ) {
196
+ const result = await modelsAPI . inference . chat . completions . create ( {
197
+ model : functionCallRes . model ,
198
+ messages : functionCallRes . messages
199
+ } ) ;
200
+ yield result ;
201
+ } ) ( ) ;
202
+ } else {
203
+ stream = await modelsAPI . inference . chat . completions . create ( {
204
+ model : functionCallRes . model ,
205
+ messages : functionCallRes . messages ,
206
+ stream : true
207
+ } ) ;
208
+ }
209
+ } else {
210
+ stream = await capiClient . chat . completions . create ( {
211
+ stream : true ,
212
+ model : "gpt-4o" ,
213
+ messages : functionCallRes . messages ,
214
+ } ) ;
215
+ }
201
216
202
217
console . time ( "streaming" ) ;
203
218
for await ( const chunk of stream ) {
0 commit comments