Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/examples/notebooks/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.pdf
186 changes: 106 additions & 80 deletions src/examples/notebooks/vectorize.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
"display_name": "Python 3 (ipykernel)",
"language": "python"
},
"language_info": {
"name": "python"
Expand All @@ -34,25 +35,32 @@
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "06daf601-2ed5-45f2-b3cb-9c5d8ba85654"
"outputId": "06daf601-2ed5-45f2-b3cb-9c5d8ba85654",
"ExecuteTime": {
"end_time": "2025-03-06T17:01:18.767327Z",
"start_time": "2025-03-06T17:01:18.083827Z"
}
},
"execution_count": 3,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: vectorize-client in /usr/local/lib/python3.11/dist-packages (0.1.2)\n",
"Requirement already satisfied: pydantic>=2 in /usr/local/lib/python3.11/dist-packages (from vectorize-client) (2.10.6)\n",
"Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from vectorize-client) (2.8.2)\n",
"Requirement already satisfied: typing-extensions>=4.7.1 in /usr/local/lib/python3.11/dist-packages (from vectorize-client) (4.12.2)\n",
"Requirement already satisfied: urllib3<3.0.0,>=1.25.3 in /usr/local/lib/python3.11/dist-packages (from vectorize-client) (2.3.0)\n",
"Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic>=2->vectorize-client) (0.7.0)\n",
"Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic>=2->vectorize-client) (2.27.2)\n",
"Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->vectorize-client) (1.17.0)\n"
"Requirement already satisfied: vectorize-client in /Users/nicoloboschi/dev/vectorize-client-generator/tests/python/.venv/lib/python3.11/site-packages (1.0.0)\r\n",
"Requirement already satisfied: urllib3<3.0.0,>=1.25.3 in /Users/nicoloboschi/dev/vectorize-client-generator/tests/python/.venv/lib/python3.11/site-packages (from vectorize-client) (2.3.0)\r\n",
"Requirement already satisfied: python-dateutil>=2.8.2 in /Users/nicoloboschi/dev/vectorize-client-generator/tests/python/.venv/lib/python3.11/site-packages (from vectorize-client) (2.9.0.post0)\r\n",
"Requirement already satisfied: pydantic>=2 in /Users/nicoloboschi/dev/vectorize-client-generator/tests/python/.venv/lib/python3.11/site-packages (from vectorize-client) (2.10.6)\r\n",
"Requirement already satisfied: typing-extensions>=4.7.1 in /Users/nicoloboschi/dev/vectorize-client-generator/tests/python/.venv/lib/python3.11/site-packages (from vectorize-client) (4.12.2)\r\n",
"Requirement already satisfied: annotated-types>=0.6.0 in /Users/nicoloboschi/dev/vectorize-client-generator/tests/python/.venv/lib/python3.11/site-packages (from pydantic>=2->vectorize-client) (0.7.0)\r\n",
"Requirement already satisfied: pydantic-core==2.27.2 in /Users/nicoloboschi/dev/vectorize-client-generator/tests/python/.venv/lib/python3.11/site-packages (from pydantic>=2->vectorize-client) (2.27.2)\r\n",
"Requirement already satisfied: six>=1.5 in /Users/nicoloboschi/dev/vectorize-client-generator/tests/python/.venv/lib/python3.11/site-packages (from python-dateutil>=2.8.2->vectorize-client) (1.17.0)\r\n",
"\r\n",
"\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m A new release of pip is available: \u001B[0m\u001B[31;49m24.3.1\u001B[0m\u001B[39;49m -> \u001B[0m\u001B[32;49m25.0.1\u001B[0m\r\n",
"\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m To update, run: \u001B[0m\u001B[32;49mpip install --upgrade pip\u001B[0m\r\n"
]
}
]
],
"execution_count": 2
},
{
"cell_type": "code",
Expand All @@ -66,27 +74,22 @@
"base_uri": "https://localhost:8080/"
},
"id": "Wtp1hi4Reh8q",
"outputId": "39503ec7-e8ca-4ff3-bf3b-ba80fff2af0b"
},
"execution_count": 6,
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Vectorize Organization ID:582893ba-291c-4ec7-a6ee-e85c26888817\n",
"Vectorize Token:··········\n"
]
"outputId": "39503ec7-e8ca-4ff3-bf3b-ba80fff2af0b",
"ExecuteTime": {
"end_time": "2025-03-06T17:02:02.626216Z",
"start_time": "2025-03-06T17:01:29.119649Z"
}
]
},
"outputs": [],
"execution_count": 3
},
{
"cell_type": "code",
"source": [
"import vectorize_client as v\n",
"\n",
"\n",
"api = v.ApiClient(v.Configuration(access_token=token))\n",
"api = v.ApiClient(v.Configuration(access_token=token, host=\"http://localhost:3000/api\"), \"x-lambda-api-key\", token)\n",
"pipelines = v.PipelinesApi(api)\n",
"\n",
"response = pipelines.get_pipelines(org)\n",
Expand All @@ -98,30 +101,56 @@
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "694382e2-27e4-42c2-9da0-304ce9b588ce"
"outputId": "694382e2-27e4-42c2-9da0-304ce9b588ce",
"ExecuteTime": {
"end_time": "2025-03-06T17:09:50.998069Z",
"start_time": "2025-03-06T17:09:32.057335Z"
}
},
"execution_count": 13,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"output_type": "stream",
"text": [
"Pipeline: My Pipeline From API\n",
"Pipeline: from api\n",
"Pipeline: Test pipeline\n",
"Pipeline: Test pipeline\n",
"Pipeline: Test pipeline\n"
]
}
]
],
"execution_count": 9
},
{
"cell_type": "code",
"source": "!wget -O apple.pdf https://www.apple.com/newsroom/pdfs/fy2024-q1/FY24_Q1_Consolidated_Financial_Statements.pdf ",
"metadata": {
"id": "sb43XYA1mEfN"
"id": "sb43XYA1mEfN",
"ExecuteTime": {
"end_time": "2025-03-06T17:12:57.720738Z",
"start_time": "2025-03-06T17:12:52.794818Z"
}
},
"execution_count": 25,
"outputs": []
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"--2025-03-06 18:12:53-- https://www.apple.com/newsroom/pdfs/fy2024-q1/FY24_Q1_Consolidated_Financial_Statements.pdf\r\n",
"Resolving www.apple.com (www.apple.com)... 23.205.48.213\r\n",
"Connecting to www.apple.com (www.apple.com)|23.205.48.213|:443... connected.\r\n",
"HTTP request sent, awaiting response... 200 OK\r\n",
"Length: 3129308 (3.0M) [application/pdf]\r\n",
"Saving to: ‘apple.pdf’\r\n",
"\r\n",
"apple.pdf 100%[===================>] 2.98M 778KB/s in 3.8s \r\n",
"\r\n",
"2025-03-06 18:12:57 (799 KB/s) - ‘apple.pdf’ saved [3129308/3129308]\r\n",
"\r\n"
]
}
],
"execution_count": 12
},
{
"cell_type": "code",
Expand All @@ -140,24 +169,25 @@
"height": 35
},
"id": "FHuFKLufe0fi",
"outputId": "5d1b887e-1925-4ff9-d44c-3479fa394575"
"outputId": "5d1b887e-1925-4ff9-d44c-3479fa394575",
"ExecuteTime": {
"end_time": "2025-03-06T17:12:30.433445Z",
"start_time": "2025-03-06T17:12:16.696363Z"
}
},
"execution_count": 8,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"'cdfa4981-c0fa-4b1d-9fba-83845cc103f0'"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "string"
}
"'e49a6ede-83cb-4698-b751-dd0364918c53'"
]
},
"execution_count": 10,
"metadata": {},
"execution_count": 8
"output_type": "execute_result"
}
]
],
"execution_count": 10
},
{
"cell_type": "code",
Expand Down Expand Up @@ -190,18 +220,22 @@
"base_uri": "https://localhost:8080/"
},
"id": "TZs5os0Ae4kb",
"outputId": "3f47b11e-556b-464b-e3a4-4ebca7bfba52"
"outputId": "3f47b11e-556b-464b-e3a4-4ebca7bfba52",
"ExecuteTime": {
"end_time": "2025-03-06T17:13:10.495588Z",
"start_time": "2025-03-06T17:13:00.038846Z"
}
},
"execution_count": 10,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"output_type": "stream",
"text": [
"Upload successful\n"
]
}
]
],
"execution_count": 13
},
{
"cell_type": "code",
Expand All @@ -213,18 +247,27 @@
"builtin_vector_db = [c.id for c in vector_databases.destination_connectors if c.type == \"VECTORIZE\"][0]"
],
"metadata": {
"id": "3rqzxB3aeoif"
"id": "3rqzxB3aeoif",
"ExecuteTime": {
"end_time": "2025-03-06T17:13:33.985317Z",
"start_time": "2025-03-06T17:13:11.451383Z"
}
},
"execution_count": 11,
"outputs": []
"outputs": [],
"execution_count": 14
},
{
"cell_type": "code",
"source": [
"response = pipelines.create_pipeline(org, v.PipelineConfigurationSchema(\n",
" source_connectors=[v.SourceConnectorSchema(id=source_connector_id, type=\"FILE_UPLOAD\", config={})],\n",
" destination_connector=v.DestinationConnectorSchema(id=builtin_vector_db, type=\"VECTORIZE\", config={}),\n",
" ai_platform=v.AIPlatformSchema(id=builtin_ai_platform, type=\"VECTORIZE\", config={}),\n",
" ai_platform=v.AIPlatformSchema(id=builtin_ai_platform, type=\"VECTORIZE\", config={\n",
" \"chunkSize\": 600,\n",
" \"chunkingStrategy\": \"FIXED\",\n",
" \"embeddingModel\": \"VECTORIZE_OPEN_AI_TEXT_EMBEDDING_3_LARGE\",\n",
" #\"extractionStrategy\": \"MIXED\"\n",
" }),\n",
" pipeline_name=\"My Pipeline From API\",\n",
" schedule=v.ScheduleSchema(type=\"manual\")\n",
"))\n",
Expand All @@ -237,24 +280,25 @@
"height": 35
},
"id": "59b9VSaykDSh",
"outputId": "f43499ce-08e3-4f23-c502-cead17f2e1c0"
"outputId": "f43499ce-08e3-4f23-c502-cead17f2e1c0",
"ExecuteTime": {
"end_time": "2025-03-06T17:25:59.003091Z",
"start_time": "2025-03-06T17:25:29.022232Z"
}
},
"execution_count": 12,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"'aipf9ab5-702c-40de-a2f7-8a2139129ba5'"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "string"
}
"'aipea5dd-bfa6-4e78-9211-019f65e106d2'"
]
},
"execution_count": 21,
"metadata": {},
"execution_count": 12
"output_type": "execute_result"
}
]
],
"execution_count": 21
},
{
"cell_type": "code",
Expand Down Expand Up @@ -786,24 +830,6 @@
]
}
]
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "hCLlbeZ3lprg"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "GAbtIAv8lFdB"
},
"execution_count": null,
"outputs": []
}
]
}
2 changes: 1 addition & 1 deletion src/python/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "vectorize-client"
version = "0.1.2"
version = "1.0.0"
description = "Python client for the Vectorize API"
authors = [ "Vectorize <contact@vectorize.io>" ]
license = "MIT"
Expand Down
Loading
Loading