|
28 | 28 | "execution_count": null,
|
29 | 29 | "outputs": [],
|
30 | 30 | "source": [
|
31 |
| - "from taskingai.retrieval import Collection, Record, TokenTextSplitter\n", |
| 31 | + "from taskingai.retrieval import Collection, Record, Chunk, TokenTextSplitter\n", |
32 | 32 | "\n",
|
33 | 33 | "# choose an available text_embedding model from your project\n",
|
34 | 34 | "embedding_model_id = \"YOUR_EMBEDDING_MODEL_ID\""
|
|
212 | 212 | "execution_count": null,
|
213 | 213 | "outputs": [],
|
214 | 214 | "source": [
|
215 |
| - "# update record\n", |
| 215 | + "# update record - metadata\n", |
216 | 216 | "record = taskingai.retrieval.update_record(\n",
|
217 | 217 | " collection_id=collection.collection_id,\n",
|
218 | 218 | " record_id=record.record_id,\n",
|
219 |
| - " metadata={\"foo\": \"bar\"}, # currently only metadata update is supported\n", |
| 219 | + " metadata={\"foo\": \"bar\"},\n", |
220 | 220 | ")\n",
|
221 | 221 | "print(f\"updated record: {record}\")"
|
222 | 222 | ],
|
|
225 | 225 | },
|
226 | 226 | "id": "65d833b22e1e657"
|
227 | 227 | },
|
| 228 | + { |
| 229 | + "cell_type": "code", |
| 230 | + "execution_count": null, |
| 231 | + "outputs": [], |
| 232 | + "source": [ |
| 233 | + "# update record - content\n", |
| 234 | + "record = taskingai.retrieval.update_record(\n", |
| 235 | + " collection_id=collection.collection_id,\n", |
| 236 | + " record_id=record.record_id,\n", |
| 237 | + " content=\"New content\",\n", |
| 238 | + " text_splitter=TokenTextSplitter(chunk_size=100, chunk_overlap=20),\n", |
| 239 | + ")\n", |
| 240 | + "print(f\"updated record: {record}\")" |
| 241 | + ], |
| 242 | + "metadata": { |
| 243 | + "collapsed": false |
| 244 | + }, |
| 245 | + "id": "4369989d2bd1a777" |
| 246 | + }, |
228 | 247 | {
|
229 | 248 | "cell_type": "code",
|
230 | 249 | "execution_count": null,
|
|
257 | 276 | "collapsed": false
|
258 | 277 | },
|
259 | 278 | "id": "accf6d883fcffaa8"
|
| 279 | + }, |
| 280 | + { |
| 281 | + "cell_type": "markdown", |
| 282 | + "source": [ |
| 283 | + "## Chunk Object" |
| 284 | + ], |
| 285 | + "metadata": { |
| 286 | + "collapsed": false |
| 287 | + }, |
| 288 | + "id": "b0e4c12fb7509fea" |
| 289 | + }, |
| 290 | + { |
| 291 | + "cell_type": "code", |
| 292 | + "execution_count": null, |
| 293 | + "outputs": [], |
| 294 | + "source": [ |
| 295 | + "# create a new text record\n", |
| 296 | + "chunk: Chunk = taskingai.retrieval.create_chunk(\n", |
| 297 | + " collection_id=collection.collection_id,\n", |
| 298 | + " content=\"The dog is a domesticated descendant of the wolf. Also called the domestic dog, it is derived from extinct gray wolves, and the gray wolf is the dog's closest living relative. The dog was the first species to be domesticated by humans.\",\n", |
| 299 | + ")\n", |
| 300 | + "print(f\"created chunk: {chunk.chunk_id} for collection: {collection.collection_id}\\n\")" |
| 301 | + ], |
| 302 | + "metadata": { |
| 303 | + "collapsed": false |
| 304 | + }, |
| 305 | + "id": "a395337f136500fc" |
| 306 | + }, |
| 307 | + { |
| 308 | + "cell_type": "code", |
| 309 | + "execution_count": null, |
| 310 | + "outputs": [], |
| 311 | + "source": [ |
| 312 | + "# update chunk metadata\n", |
| 313 | + "chunk = taskingai.retrieval.update_chunk(\n", |
| 314 | + " collection_id=collection.collection_id,\n", |
| 315 | + " chunk_id=chunk.chunk_id,\n", |
| 316 | + " metadata={\"k\": \"v\"},\n", |
| 317 | + ")\n", |
| 318 | + "print(f\"updated chunk: {chunk}\")" |
| 319 | + ], |
| 320 | + "metadata": { |
| 321 | + "collapsed": false |
| 322 | + }, |
| 323 | + "id": "309e1771251bb079" |
| 324 | + }, |
| 325 | + { |
| 326 | + "cell_type": "code", |
| 327 | + "execution_count": null, |
| 328 | + "outputs": [], |
| 329 | + "source": [ |
| 330 | + "# update chunk content\n", |
| 331 | + "chunk = taskingai.retrieval.update_chunk(\n", |
| 332 | + " collection_id=collection.collection_id,\n", |
| 333 | + " chunk_id=chunk.chunk_id,\n", |
| 334 | + " content=\"New content\",\n", |
| 335 | + ")\n", |
| 336 | + "print(f\"updated chunk: {chunk}\")" |
| 337 | + ], |
| 338 | + "metadata": { |
| 339 | + "collapsed": false |
| 340 | + }, |
| 341 | + "id": "a9d68db12329b558" |
| 342 | + }, |
| 343 | + { |
| 344 | + "cell_type": "code", |
| 345 | + "execution_count": null, |
| 346 | + "outputs": [], |
| 347 | + "source": [ |
| 348 | + "# get chunk\n", |
| 349 | + "chunk = taskingai.retrieval.get_chunk(\n", |
| 350 | + " collection_id=collection.collection_id,\n", |
| 351 | + " chunk_id=chunk.chunk_id\n", |
| 352 | + ")\n", |
| 353 | + "print(f\"got chunk: {chunk}\\n\")" |
| 354 | + ], |
| 355 | + "metadata": { |
| 356 | + "collapsed": false |
| 357 | + }, |
| 358 | + "id": "d3899097cd6d0cf2" |
| 359 | + }, |
| 360 | + { |
| 361 | + "cell_type": "code", |
| 362 | + "execution_count": null, |
| 363 | + "outputs": [], |
| 364 | + "source": [ |
| 365 | + "# delete chunk\n", |
| 366 | + "taskingai.retrieval.delete_chunk(\n", |
| 367 | + " collection_id=collection.collection_id,\n", |
| 368 | + " chunk_id=chunk.chunk_id,\n", |
| 369 | + ")\n", |
| 370 | + "print(f\"deleted chunk {chunk.chunk_id} from collection {collection.collection_id}\\n\")" |
| 371 | + ], |
| 372 | + "metadata": { |
| 373 | + "collapsed": false |
| 374 | + }, |
| 375 | + "id": "27e643ad8e8636ed" |
| 376 | + }, |
| 377 | + { |
| 378 | + "cell_type": "code", |
| 379 | + "execution_count": null, |
| 380 | + "outputs": [], |
| 381 | + "source": [ |
| 382 | + "# create a new text record and a new chunk\n", |
| 383 | + "taskingai.retrieval.create_record(\n", |
| 384 | + " collection_id=collection.collection_id,\n", |
| 385 | + " content=\"Machine learning is a subfield of artificial intelligence (AI) that involves the development of algorithms that allow computers to learn from and make decisions or predictions based on data. The term \\\"machine learning\\\" was coined by Arthur Samuel in 1959. In other words, machine learning enables a system to automatically learn and improve from experience without being explicitly programmed. This is achieved by feeding the system massive amounts of data, which it uses to learn patterns and make inferences. There are three main types of machine learning: 1. Supervised Learning: This is where the model is given labeled training data and the goal of learning is to generalize from the training data to unseen situations in a principled way. 2. Unsupervised Learning: This involves training on a dataset without explicit labels. The goal might be to discover inherent groupings or patterns within the data. 3. Reinforcement Learning: In this type, an agent learns to perform actions based on reward/penalty feedback to achieve a goal. It's commonly used in robotics, gaming, and navigation. Deep learning, a subset of machine learning, uses neural networks with many layers (\\\"deep\\\" structures) and has been responsible for many recent breakthroughs in AI, including speech recognition, image recognition, and natural language processing. It's important to note that machine learning is a rapidly developing field, with new techniques and applications emerging regularly.\",\n", |
| 386 | + " text_splitter=TokenTextSplitter(chunk_size=200, chunk_overlap=20)\n", |
| 387 | + ")\n", |
| 388 | + "\n", |
| 389 | + "taskingai.retrieval.create_chunk(\n", |
| 390 | + " collection_id=collection.collection_id,\n", |
| 391 | + " content=\"The dog is a domesticated descendant of the wolf. Also called the domestic dog, it is derived from extinct gray wolves, and the gray wolf is the dog's closest living relative. The dog was the first species to be domesticated by humans.\",\n", |
| 392 | + ")" |
| 393 | + ], |
| 394 | + "metadata": { |
| 395 | + "collapsed": false |
| 396 | + }, |
| 397 | + "id": "a74dd7615ec28528" |
| 398 | + }, |
| 399 | + { |
| 400 | + "cell_type": "code", |
| 401 | + "execution_count": null, |
| 402 | + "outputs": [], |
| 403 | + "source": [ |
| 404 | + "# list chunks\n", |
| 405 | + "chunks = taskingai.retrieval.list_chunks(collection_id=collection.collection_id)\n", |
| 406 | + "for chunk in chunks:\n", |
| 407 | + " print(chunk)\n", |
| 408 | + " print(\"-\" * 50)" |
| 409 | + ], |
| 410 | + "metadata": { |
| 411 | + "collapsed": false |
| 412 | + }, |
| 413 | + "id": "55e9645ac41f8ca" |
| 414 | + }, |
| 415 | + { |
| 416 | + "cell_type": "code", |
| 417 | + "execution_count": null, |
| 418 | + "outputs": [], |
| 419 | + "source": [ |
| 420 | + "# delete collection\n", |
| 421 | + "taskingai.retrieval.delete_collection(collection_id=collection.collection_id)" |
| 422 | + ], |
| 423 | + "metadata": { |
| 424 | + "collapsed": false |
| 425 | + }, |
| 426 | + "id": "b97aaa156f586e34" |
260 | 427 | }
|
261 | 428 | ],
|
262 | 429 | "metadata": {
|
|
0 commit comments