Skip to content

Commit cba79e3

Browse files
jameszyaoSimsonW
authored andcommitted
feat: add chunk CRUD ops
1 parent 0bc5456 commit cba79e3

File tree

4 files changed

+412
-11
lines changed

4 files changed

+412
-11
lines changed

examples/assistant/chat_with_assistant.ipynb

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -127,12 +127,18 @@
127127
" \"You know the meaning of various numbers.\",\n",
128128
" \"No matter what the user's language is, you will use the {{langugae}} to explain.\"\n",
129129
" ],\n",
130-
" tools=[AssistantTool(\n",
131-
" type=AssistantToolType.ACTION,\n",
132-
" id=action.action_id,\n",
133-
" )],\n",
130+
" tools=[\n",
131+
" AssistantTool(\n",
132+
" type=AssistantToolType.ACTION,\n",
133+
" id=action.action_id,\n",
134+
" ), \n",
135+
" AssistantTool(\n",
136+
" type=AssistantToolType.ACTION,\n",
137+
" id=action.action_id,\n",
138+
" )\n",
139+
" ],\n",
134140
" retrievals=[],\n",
135-
" metadata={\"foo\": \"bar\"},\n",
141+
" metadata={\"k\": \"v\"},\n",
136142
")\n",
137143
"print(f\"created assistant: {assistant}\\n\")"
138144
],

examples/crud/retrieval_crud.ipynb

Lines changed: 170 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
"execution_count": null,
2929
"outputs": [],
3030
"source": [
31-
"from taskingai.retrieval import Collection, Record, TokenTextSplitter\n",
31+
"from taskingai.retrieval import Collection, Record, Chunk, TokenTextSplitter\n",
3232
"\n",
3333
"# choose an available text_embedding model from your project\n",
3434
"embedding_model_id = \"YOUR_EMBEDDING_MODEL_ID\""
@@ -212,11 +212,11 @@
212212
"execution_count": null,
213213
"outputs": [],
214214
"source": [
215-
"# update record\n",
215+
"# update record - metadata\n",
216216
"record = taskingai.retrieval.update_record(\n",
217217
" collection_id=collection.collection_id,\n",
218218
" record_id=record.record_id,\n",
219-
" metadata={\"foo\": \"bar\"}, # currently only metadata update is supported\n",
219+
" metadata={\"foo\": \"bar\"},\n",
220220
")\n",
221221
"print(f\"updated record: {record}\")"
222222
],
@@ -225,6 +225,25 @@
225225
},
226226
"id": "65d833b22e1e657"
227227
},
228+
{
229+
"cell_type": "code",
230+
"execution_count": null,
231+
"outputs": [],
232+
"source": [
233+
"# update record - content\n",
234+
"record = taskingai.retrieval.update_record(\n",
235+
" collection_id=collection.collection_id,\n",
236+
" record_id=record.record_id,\n",
237+
" content=\"New content\",\n",
238+
" text_splitter=TokenTextSplitter(chunk_size=100, chunk_overlap=20),\n",
239+
")\n",
240+
"print(f\"updated record: {record}\")"
241+
],
242+
"metadata": {
243+
"collapsed": false
244+
},
245+
"id": "4369989d2bd1a777"
246+
},
228247
{
229248
"cell_type": "code",
230249
"execution_count": null,
@@ -257,6 +276,154 @@
257276
"collapsed": false
258277
},
259278
"id": "accf6d883fcffaa8"
279+
},
280+
{
281+
"cell_type": "markdown",
282+
"source": [
283+
"## Chunk Object"
284+
],
285+
"metadata": {
286+
"collapsed": false
287+
},
288+
"id": "b0e4c12fb7509fea"
289+
},
290+
{
291+
"cell_type": "code",
292+
"execution_count": null,
293+
"outputs": [],
294+
"source": [
295+
"# create a new text record\n",
296+
"chunk: Chunk = taskingai.retrieval.create_chunk(\n",
297+
" collection_id=collection.collection_id,\n",
298+
" content=\"The dog is a domesticated descendant of the wolf. Also called the domestic dog, it is derived from extinct gray wolves, and the gray wolf is the dog's closest living relative. The dog was the first species to be domesticated by humans.\",\n",
299+
")\n",
300+
"print(f\"created chunk: {chunk.chunk_id} for collection: {collection.collection_id}\\n\")"
301+
],
302+
"metadata": {
303+
"collapsed": false
304+
},
305+
"id": "a395337f136500fc"
306+
},
307+
{
308+
"cell_type": "code",
309+
"execution_count": null,
310+
"outputs": [],
311+
"source": [
312+
"# update chunk metadata\n",
313+
"chunk = taskingai.retrieval.update_chunk(\n",
314+
" collection_id=collection.collection_id,\n",
315+
" chunk_id=chunk.chunk_id,\n",
316+
" metadata={\"k\": \"v\"},\n",
317+
")\n",
318+
"print(f\"updated chunk: {chunk}\")"
319+
],
320+
"metadata": {
321+
"collapsed": false
322+
},
323+
"id": "309e1771251bb079"
324+
},
325+
{
326+
"cell_type": "code",
327+
"execution_count": null,
328+
"outputs": [],
329+
"source": [
330+
"# update chunk content\n",
331+
"chunk = taskingai.retrieval.update_chunk(\n",
332+
" collection_id=collection.collection_id,\n",
333+
" chunk_id=chunk.chunk_id,\n",
334+
" content=\"New content\",\n",
335+
")\n",
336+
"print(f\"updated chunk: {chunk}\")"
337+
],
338+
"metadata": {
339+
"collapsed": false
340+
},
341+
"id": "a9d68db12329b558"
342+
},
343+
{
344+
"cell_type": "code",
345+
"execution_count": null,
346+
"outputs": [],
347+
"source": [
348+
"# get chunk\n",
349+
"chunk = taskingai.retrieval.get_chunk(\n",
350+
" collection_id=collection.collection_id,\n",
351+
" chunk_id=chunk.chunk_id\n",
352+
")\n",
353+
"print(f\"got chunk: {chunk}\\n\")"
354+
],
355+
"metadata": {
356+
"collapsed": false
357+
},
358+
"id": "d3899097cd6d0cf2"
359+
},
360+
{
361+
"cell_type": "code",
362+
"execution_count": null,
363+
"outputs": [],
364+
"source": [
365+
"# delete chunk\n",
366+
"taskingai.retrieval.delete_chunk(\n",
367+
" collection_id=collection.collection_id,\n",
368+
" chunk_id=chunk.chunk_id,\n",
369+
")\n",
370+
"print(f\"deleted chunk {chunk.chunk_id} from collection {collection.collection_id}\\n\")"
371+
],
372+
"metadata": {
373+
"collapsed": false
374+
},
375+
"id": "27e643ad8e8636ed"
376+
},
377+
{
378+
"cell_type": "code",
379+
"execution_count": null,
380+
"outputs": [],
381+
"source": [
382+
"# create a new text record and a new chunk\n",
383+
"taskingai.retrieval.create_record(\n",
384+
" collection_id=collection.collection_id,\n",
385+
" content=\"Machine learning is a subfield of artificial intelligence (AI) that involves the development of algorithms that allow computers to learn from and make decisions or predictions based on data. The term \\\"machine learning\\\" was coined by Arthur Samuel in 1959. In other words, machine learning enables a system to automatically learn and improve from experience without being explicitly programmed. This is achieved by feeding the system massive amounts of data, which it uses to learn patterns and make inferences. There are three main types of machine learning: 1. Supervised Learning: This is where the model is given labeled training data and the goal of learning is to generalize from the training data to unseen situations in a principled way. 2. Unsupervised Learning: This involves training on a dataset without explicit labels. The goal might be to discover inherent groupings or patterns within the data. 3. Reinforcement Learning: In this type, an agent learns to perform actions based on reward/penalty feedback to achieve a goal. It's commonly used in robotics, gaming, and navigation. Deep learning, a subset of machine learning, uses neural networks with many layers (\\\"deep\\\" structures) and has been responsible for many recent breakthroughs in AI, including speech recognition, image recognition, and natural language processing. It's important to note that machine learning is a rapidly developing field, with new techniques and applications emerging regularly.\",\n",
386+
" text_splitter=TokenTextSplitter(chunk_size=200, chunk_overlap=20)\n",
387+
")\n",
388+
"\n",
389+
"taskingai.retrieval.create_chunk(\n",
390+
" collection_id=collection.collection_id,\n",
391+
" content=\"The dog is a domesticated descendant of the wolf. Also called the domestic dog, it is derived from extinct gray wolves, and the gray wolf is the dog's closest living relative. The dog was the first species to be domesticated by humans.\",\n",
392+
")"
393+
],
394+
"metadata": {
395+
"collapsed": false
396+
},
397+
"id": "a74dd7615ec28528"
398+
},
399+
{
400+
"cell_type": "code",
401+
"execution_count": null,
402+
"outputs": [],
403+
"source": [
404+
"# list chunks\n",
405+
"chunks = taskingai.retrieval.list_chunks(collection_id=collection.collection_id)\n",
406+
"for chunk in chunks:\n",
407+
" print(chunk)\n",
408+
" print(\"-\" * 50)"
409+
],
410+
"metadata": {
411+
"collapsed": false
412+
},
413+
"id": "55e9645ac41f8ca"
414+
},
415+
{
416+
"cell_type": "code",
417+
"execution_count": null,
418+
"outputs": [],
419+
"source": [
420+
"# delete collection\n",
421+
"taskingai.retrieval.delete_collection(collection_id=collection.collection_id)"
422+
],
423+
"metadata": {
424+
"collapsed": false
425+
},
426+
"id": "b97aaa156f586e34"
260427
}
261428
],
262429
"metadata": {

0 commit comments

Comments
 (0)