diff --git a/.flake8 b/.flake8 index dca890b..3cadd1d 100644 --- a/.flake8 +++ b/.flake8 @@ -1,5 +1,5 @@ [flake8] - ignore = E203, E266, E501, W503, F403, F401 + ignore = F401 max-line-length = 100 max-complexity = 18 select = B,C,E,F,W,T4,B9 diff --git a/.github/workflows/development_pipeline.yml b/.github/workflows/development_pipeline.yml index b7af9fc..583ab98 100644 --- a/.github/workflows/development_pipeline.yml +++ b/.github/workflows/development_pipeline.yml @@ -39,7 +39,7 @@ jobs: run: black --check ./image-search-engine/. - name: Run flake8 - run: flake8 --ignore=E501,W503 ./image-search-engine + run: flake8 --ignore=E501,W503,F401 ./image-search-engine # - name: Run Pylint # run: pylint ./image-search-engine/*.py @@ -95,7 +95,7 @@ jobs: run: black --check ./text-search-engine/. - name: Run flake8 - run: flake8 --ignore=E501,W503 ./text-search-engine + run: flake8 --ignore=E501,W503,F401 ./text-search-engine # - name: Run Pylint # run: pylint ./image-search-engine/*.py diff --git a/.gitignore b/.gitignore index 3aa1880..9e20c1f 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,9 @@ image-search-engine/assets/uploaded_images/* # Model *.pth +*.pt +*.onnx +*.engine # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/docker-compose-prod.yaml b/docker-compose-prod.yaml new file mode 100644 index 0000000..fa78f0e --- /dev/null +++ b/docker-compose-prod.yaml @@ -0,0 +1,142 @@ +version: "3" +services: + triton-server: + container_name: triton-server + image: nvcr.io/nvidia/tritonserver:23.09-py3 + ports: + - 9000:8000 + - 9001:8001 + - 9002:8002 + command: tritonserver --model-repository=/models + volumes: + - ./image-search-engine/model_repository:/models + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] + + qdrant-vector-database: + container_name: qdrant-vector-database + image: qdrant/qdrant:v1.5.1 + ports: + - 6333:6333 + - 6334:6334 + volumes: + - ./qdrant-vector-database:/qdrant/storage + + image-search-engine: + container_name: image-search-container + image: vectornguyen76/image-search-engine + build: + context: ./image-search-engine + dockerfile: Dockerfile + environment: + - QDRANT_URL=http://qdrant-vector-database:6334 + - TRITON_SERVER_URL=triton-server:8001 + ports: + - 7000:7000 + volumes: + - ./image-search-engine/logs:/app/logs + depends_on: + - qdrant-vector-database + profiles: + - dev.frontend + - prod + + elasticsearch: + image: docker.elastic.co/elasticsearch/elasticsearch:7.10.0 + container_name: elasticsearch + environment: + - node.name=elasticsearch + - cluster.name=es-docker-cluster + - discovery.type=single-node + - bootstrap.memory_lock=true + - "ES_JAVA_OPTS=-Xms1024m -Xmx1024m" + ulimits: + memlock: + soft: -1 + hard: -1 + ports: + - 9200:9200 + healthcheck: + test: curl --fail http://localhost:9200/_cat/health || exit 1 + interval: 10s + timeout: 1s + retries: 10 + volumes: + - data-elastic-search:/usr/share/elasticsearch/data + + kibana: + image: docker.elastic.co/kibana/kibana:7.10.0 + container_name: kibana + ports: + - 5601:5601 + environment: + ELASTICSEARCH_URL: http://elasticsearch:9200 + ELASTICSEARCH_HOSTS: http://elasticsearch:9200 + depends_on: + elasticsearch: + condition: service_healthy + + text-search-engine: + container_name: text-search-container + image: vectornguyen76/text-search-engine + build: + context: ./text-search-engine + dockerfile: Dockerfile + ports: + - 8000:8000 + environment: + ELASTICSEARCH_HOST: http://elasticsearch:9200 + depends_on: + elasticsearch: + condition: service_healthy + profiles: + - dev.frontend + - prod + + frontend_dev_service: + container_name: frontend_dev_container + image: vectornguyen76/frontend_dev_image + build: + context: ./frontend + dockerfile: Dockerfile.dev + ports: + - 3000:3000 + profiles: + - dev.backend + + frontend_service: + container_name: frontend_prod_container + image: vectornguyen76/frontend_prod_image + build: + context: ./frontend + dockerfile: Dockerfile + ports: + - 3000:3000 + profiles: + - prod + + nginx_service: + container_name: nginx_container + image: nginx:1.25.1-alpine + ports: + - 80:80 + volumes: + - ./nginx-server/default.conf:/etc/nginx/conf.d/default.conf + - ./nginx-server/log:/var/log/nginx/ + depends_on: + - frontend_service + - image-search-engine + - text-search-engine + profiles: + - prod + +volumes: + data-elastic-search: + driver: local + qdrant-vector-database: + driver: local diff --git a/docker-compose-pro.yaml b/docker-compose-test.yaml similarity index 100% rename from docker-compose-pro.yaml rename to docker-compose-test.yaml diff --git a/docker-compose.yaml b/docker-compose.yaml index f53a388..f625f1e 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -1,8 +1,8 @@ version: "3" services: - triton: - image: nvcr.io/nvidia/tritonserver:22.02-py3 - ipc: host + triton-server: + container_name: triton-server + image: nvcr.io/nvidia/tritonserver:23.09-py3 ports: - 9000:8000 - 9001:8001 @@ -10,46 +10,13 @@ services: command: tritonserver --model-repository=/models volumes: - ./image-search-engine/model_repository:/models - # deploy: - # resources: - # reservations: - # devices: - # - capabilities: [gpu] - - elasticsearch: - image: docker.elastic.co/elasticsearch/elasticsearch:7.10.0 - container_name: elasticsearch - environment: - - node.name=elasticsearch - - cluster.name=es-docker-cluster - - discovery.type=single-node - - bootstrap.memory_lock=true - - "ES_JAVA_OPTS=-Xms1024m -Xmx1024m" - ulimits: - memlock: - soft: -1 - hard: -1 - ports: - - 9200:9200 - healthcheck: - test: curl --fail http://localhost:9200/_cat/health || exit 1 - interval: 10s - timeout: 1s - retries: 10 - volumes: - - data-elastic-search:/usr/share/elasticsearch/data - - kibana: - image: docker.elastic.co/kibana/kibana:7.10.0 - container_name: kibana - ports: - - 5601:5601 - environment: - ELASTICSEARCH_URL: http://elasticsearch:9200 - ELASTICSEARCH_HOSTS: http://elasticsearch:9200 - depends_on: - elasticsearch: - condition: service_healthy + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] qdrant-vector-database: container_name: qdrant-vector-database @@ -67,73 +34,38 @@ services: context: ./image-search-engine dockerfile: Dockerfile environment: - - QDRANT_HOST=qdrant-vector-database + - QDRANT_URL=http://qdrant-vector-database:6334 + - TRITON_SERVER_URL=triton-server:8001 ports: - 7000:7000 volumes: - ./image-search-engine/logs:/app/logs depends_on: - qdrant-vector-database - profiles: - - dev.frontend - - prod - text-search-engine: - container_name: text-search-container - image: vectornguyen76/text-search-engine - build: - context: ./text-search-engine - dockerfile: Dockerfile - ports: - - 8000:8000 - environment: - ELASTICSEARCH_HOST: http://elasticsearch:9200 - depends_on: - elasticsearch: - condition: service_healthy - profiles: - - dev.frontend - - prod - - frontend_dev_service: - container_name: frontend_dev_container - image: vectornguyen76/frontend_dev_image - build: - context: ./frontend - dockerfile: Dockerfile.dev - ports: - - 3000:3000 - profiles: - - dev.backend - - frontend_service: - container_name: frontend_prod_container - image: vectornguyen76/frontend_prod_image - build: - context: ./frontend - dockerfile: Dockerfile - ports: - - 3000:3000 - profiles: - - prod - - nginx_service: - container_name: nginx_container - image: nginx:1.25.1-alpine - ports: - - 80:80 - volumes: - - ./nginx-server/default.conf:/etc/nginx/conf.d/default.conf - - ./nginx-server/log:/var/log/nginx/ - depends_on: - - frontend_service - - image-search-engine - - text-search-engine - profiles: - - prod + # image-search-engine-gpu: + # container_name: image-search-container-gpu + # image: vectornguyen76/image-search-engine-gpu + # build: + # context: ./image-search-engine + # dockerfile: Dockerfile.gpu + # environment: + # - QDRANT_URL=http://qdrant-vector-database:6334 + # - TRITON_SERVER_URL=triton-server:8001 + # ports: + # - 7000:7000 + # volumes: + # - ./image-search-engine/logs:/app/logs + # depends_on: + # - qdrant-vector-database + # deploy: + # resources: + # reservations: + # devices: + # - driver: nvidia + # count: 1 + # capabilities: [gpu] volumes: - data-elastic-search: - driver: local qdrant-vector-database: driver: local diff --git a/image-search-engine/.dockerignore b/image-search-engine/.dockerignore index b228923..32f608d 100644 --- a/image-search-engine/.dockerignore +++ b/image-search-engine/.dockerignore @@ -1 +1,4 @@ +data +locust faiss_store +model_repository diff --git a/image-search-engine/Dockerfile b/image-search-engine/Dockerfile index a3a86b3..d83038d 100644 --- a/image-search-engine/Dockerfile +++ b/image-search-engine/Dockerfile @@ -4,6 +4,9 @@ FROM python:3.9-slim # Set the working directory inside the container to /app WORKDIR /app +# Install for OpenCV +RUN apt-get update && apt-get install ffmpeg libsm6 libxext6 -y + # Copy the requirements.txt file from the host into the container's /app directory COPY requirements.txt /app @@ -31,9 +34,6 @@ RUN python -c 'from torchvision.models import efficientnet_b3, EfficientNet_B3_W # Copy all files from the host into the container's /app directory COPY . /app -# Ingest data to Faiss -RUN python faiss_ingest.py - # Expose port 7000 to the host machine EXPOSE 7000 diff --git a/image-search-engine/Dockerfile.gpu b/image-search-engine/Dockerfile.gpu new file mode 100644 index 0000000..8a89b67 --- /dev/null +++ b/image-search-engine/Dockerfile.gpu @@ -0,0 +1,44 @@ +# Use the Python 3.9 slim image as the base image +FROM python:3.9-slim + +# Set the working directory inside the container to /app +WORKDIR /app + +# Install for OpenCV +RUN apt-get update && apt-get install ffmpeg libsm6 libxext6 -y + +# Copy the requirements.txt file from the host into the container's /app directory +COPY requirements.txt /app + +# Upgrade pip +RUN pip install --upgrade pip + +# Download pytorch GPU +RUN pip install torch==2.0.1 torchvision==0.15.2 torchaudio==2.0.2 + +# Install the Python dependencies from requirements.txt +RUN pip install --no-cache-dir -r requirements.txt + +# Download load csv data +RUN gdown https://drive.google.com/uc?id=1I-Rv676N45aUgY7tcsazQE7fLzzb2LFr + +# Download the image features from a Google Drive link +RUN gdown https://drive.google.com/uc?id=103LTvXhmbjPOVjDVbhwjjzHrsQLpskWt + +# Move data.csv, image_features.npz to a directory named /data +RUN mkdir --parents /app/data && mv data.csv /app/data && mv image_features.npz /app/data + +# Download model +RUN python -c 'from torchvision.models import efficientnet_b3, EfficientNet_B3_Weights; efficientnet_b3(weights=EfficientNet_B3_Weights.IMAGENET1K_V1)' + +# Copy all files from the host into the container's /app directory +COPY . /app + +# Expose port 7000 to the host machine +EXPOSE 7000 + +# Chmod to entrypoint.sh +RUN chmod +x ./entrypoint.sh + +# Run entrypoint.sh +ENTRYPOINT ["/app/entrypoint.sh"] diff --git a/image-search-engine/README.md b/image-search-engine/README.md index daea9df..de2b699 100644 --- a/image-search-engine/README.md +++ b/image-search-engine/README.md @@ -15,6 +15,11 @@ 5. [Strategies for Improving Image Retrieval](#strategies-for-improving-image-retrieval) 6. [Development Environment](#development-environment) 7. [Testing and Results](#testing-and-results) + - [Locust Tool](#locust-tool) + - [Test Faiss](#test-faiss) + - [Test Qdrant](#test-qdrant) + - [Inference without Triton](#inference-without-triton) + - [Test Triton](#test-triton) 8. [References](#references) ## About the Solution @@ -357,7 +362,17 @@ This project implements an image search engine for Shopee using qdrant as the ve ## Testing and Results -### Locust - Load Testing +- Ubuntu: 20.04 +- CUDA Version: 12.2 +- EC2: g4dn.xlarge +- CPU: Intel Xeon Family 4-vCPUs +- RAM: 16GB +- GPU: NVIDIA T4 Tensor Core +- VRAM: 16GB +- Uvicorn Workers: 4 + FastAPI, Qdrant, Faiss, Triton, Locust are executed on the same device. + +### Locust Tool 1. **Overview** @@ -380,7 +395,6 @@ This project implements an image search engine for Shopee using qdrant as the ve
Locust Load Test

-
3. **References** @@ -389,16 +403,165 @@ This project implements an image search engine for Shopee using qdrant as the ve - [Increasing Performance](http://docs.locust.io/en/stable/increase-performance.html) - [Running Distributed Tests](http://docs.locust.io/en/stable/running-distributed.html) -### Results +### Test Faiss -

-Qdrant Vector Store -
-Qdrant Vector Store -

-
-- Created and added 100,000 points in 6 minutes in qdrant. -- p95: [Provide Performance Data] +1. **Ingest Data Time** + Create Faiss Index ~ 5 seconds. + +2. **Search Time CPU** +

+ Faiss Search Time in CPU +
+ Faiss Search Time in CPU +

+
+ + About 39ms + +### Test Qdrant + +1. **Ingest Data Time** +

+ Point in Qdrant +
+ Point in Qdrant +

+
+ +

+ Qdrant Info +
+ Qdrant Info +

+ Create Collection and add 100,000 points take 6 minutes. + +2. **Search Time CPU** +

+ Qdrant Search Time in CPU +
+ Qdrant Search Time in CPU +

+ About 3ms + +### Inference without Triton + +1. **Report** + + - Original efficientnet_b3 model.pt + - Uvicorn workers = 1 + - User spawn rate = 1 + + | Id | Request Concurrency | Device | p95 Latency (ms) | RPS | Max GPU Memory Usage (MB) | + | :-: | :-----------------: | :----: | :--------------: | :-: | :-----------------------: | + | 1 | 1 | CPU | 130 | 9 | 0 | + | 2 | 1 | GPU | 21 | 48 | 525 | + | 3 | 4 | CPU | 460 | 9 | 0 | + | 4 | 4 | GPU | 85 | 49 | 525 | + | 5 | 8 | CPU | 920 | 9 | 0 | + | 6 | 8 | GPU | 170 | 49 | 525 | + | 7 | 16 | CPU | 1800 | 9 | 0 | + | 8 | 16 | GPU | 340 | 49 | 525 | + | 9 | 32 | CPU | 3600 | 9 | 0 | + | 10 | 32 | GPU | 650 | 50 | 525 | + +
+

+ 32 Request Concurrency - GPU +
+ 32 Request Concurrency - GPU +

+ +### Test Triton + +1. **Folder layout** + + ``` + model_repository/ + ├── efficientnet_b3 + │ ├── 1 + │ │ └── model.pt + │ └── config.pbtxt + └── efficientnet_b3_onnx + ├── 1 + │ └── model.onnx + └── config.pbtxt + ``` + +2. **Dynamic Batching** + + Dynamic batching, in reference to the Triton Inference Server, refers to the functionality which allows the combining of one or more inference requests into a single batch (which has to be created dynamically) to maximize throughput. + + Dynamic batching can be enabled and configured on per model basis by specifying selections in the model's config.pbtxt. Dynamic Batching can be enabled with its default settings by adding the following to the config.pbtxt file: + + ``` + dynamic_batching { } + ``` + + While Triton batches these incoming requests without any delay, users can choose to allocate a limited delay for the scheduler to collect more inference requests to be used by the dynamic batcher. + + ``` + dynamic_batching { + max_queue_delay_microseconds: 100 + } + ``` + +

+ Dynamic Batching +
+ Dynamic Batching +

+ +3. **Concurrent Model Execution** + + The Triton Inference Server can spin up multiple instances of the same model, which can process queries in parallel. Triton can spawn instances on the same device (GPU), or a different device on the same node as per the user's specifications. This customizability is especially useful when considering ensembles that have models with different throughputs. Multiple copies of heavier models can be spawned on a separate GPU to allow for more parallel processing. This is enabled via the use of instance groups option in a model's configuration. + +

+ Concurrent Model Execution +
+ Concurrent Model Execution +

+ +4. **Report** + + - Test diffence configurations + - Only triton inference step + - Original efficientnet_b3 model.pt + - Uvicorn workers = 1 + - User spawn rate = 1 + + | Id | Request Concurrency | Max Batch Size | Dynamic Batching | Instance Count | p95 Latency (ms) | RPS | Max GPU Memory Usage (MB) | Average GPU Utilization (%) | + | :-: | :-----------------: | :------------: | :--------------: | :------------: | :--------------: | :-: | :-----------------------: | :-------------------------: | + | 1 | 1 | 1 | Disabled | 1:CPU | 140 | 8 | 160 | 0 | + | 2 | 1 | 1 | Disabled | 1:GPU | 14 | 74 | 313 | 35 | + | 3 | 8 | 1 | Disabled | 1:GPU | 83 | 120 | 313 | 55 | + | 4 | 8 | 8 | Disabled | 1:GPU | 85 | 113 | 313 | 55 | + | 5 | 8 | 8 | Disabled | 2:GPU | 69 | 140 | 439 | 66 | + | 6 | 8 | 8 | Enabled | 2:GPU | 68 | 150 | 1019 | 69 | + | 7 | 16 | 1 | Disabled | 1:GPU | 170 | 111 | 313 | 58 | + | 8 | 16 | 16 | Disabled | 1:GPU | 170 | 116 | 313 | 55 | + | 9 | 16 | 16 | Disabled | 2:GPU | 130 | 143 | 439 | 66 | + | 10 | 16 | 16 | Enabled | 2:GPU | 130 | 155 | 1667 | 75 | + | 11 | 32 | 1 | Disabled | 1:GPU | 330 | 112 | 313 | 55 | + | 12 | 32 | 32 | Disabled | 1:GPU | 310 | 116 | 313 | 58 | + | 13 | 32 | 32 | Disabled | 3:GPU | 290 | 137 | 547 | 66 | + | 14 | 32 | 32 | Enabled | 3:GPU | 270 | 150 | 7395 | 73 | + | 15 | 64 | 1 | Disabled | 1:GPU | 610 | 118 | 313 | 55 | + | 16 | 64 | 64 | Disabled | 1:GPU | 570 | 117 | 313 | 55 | + | 17 | 64 | 64 | Disabled | 3:GPU | 610 | 132 | 547 | 65 | + | 18 | 64 | 64 | Enabled | 3:GPU | 640 | 143 | 14023 | 75 | + | 19 | 64 | 64 | Enabled | 5:GPU | 670 | 137 | 11183 | 70 | + | 20 | 64 | 64 | Enabled | 1:CPU | 13000 | 5 | 160 | 0 | + +
+

+ 32 Request Concurrency - GPU +
+ 32 Request Concurrency - Max Batch Size 32 - Dynamic Batching - 3:GPU +

+ +5. **References** + +- [Triton Conceptual Guides](https://github.com/triton-inference-server/tutorials/tree/main/Conceptual_Guide) ## References diff --git a/image-search-engine/app.py b/image-search-engine/app.py index 85ef5c9..35c58dc 100644 --- a/image-search-engine/app.py +++ b/image-search-engine/app.py @@ -1,16 +1,14 @@ -from datetime import datetime +import time from config import settings -from faiss_search.searcher import FaissSearch from fastapi import FastAPI, File, HTTPException, UploadFile from fastapi.middleware.cors import CORSMiddleware -from feature_extractor import FeatureExtractor -from log_config import configure_logging from qdrant_client.http.exceptions import UnexpectedResponse -from qdrant_search.searcher import QdrantSearch -from schemas import Product - -logger = configure_logging(__name__) +from src.faiss_search.searcher import FaissSearch +from src.feature_extraction.extractor import FeatureExtractor +from src.qdrant_search.searcher import QdrantSearch +from src.schemas import ImageBase64Request, Product +from src.utils import LOGGER, save_image_file # Initialize the feature extractor and FaissSearch instances feature_extractor = FeatureExtractor() @@ -36,30 +34,31 @@ def healthcheck() -> bool: return True -@app.post("/search-image", response_model=list[Product]) -async def search_image_qdrant(file: UploadFile = File(...)): - """ - Endpoint to upload an image, extract features, and perform a search. +@app.post("/search-image-faiss", response_model=list[Product]) +async def search_image_faiss(file: UploadFile = File(...)): + # start_time = time.time() + try: + image_path = await save_image_file(file=file) - Args: - file (UploadFile): The image file to be uploaded. + # Extract features from the uploaded image using the feature extractor + feature = feature_extractor.extract_feature(image_path=image_path) - Returns: - dict: A dictionary containing search results, including item information. - """ - try: - # Prepend the current datetime to the filename - file.filename = datetime.now().strftime("%Y%m%d-%H%M%S-") + file.filename + # Perform a search using the extracted feature vector + search_results = faiss_search.search(query_vector=feature, top_k=20) - # Construct the full image path based on the settings - image_path = settings.IMAGEDIR + file.filename + # LOGGER.info(f"Faiss search executed in {time.time() - start_time:.4f} seconds.") + return search_results + + except Exception as e: + LOGGER.error("Could not perform search: %s", e) + raise HTTPException(status_code=500, detail=e) - # Read the contents of the uploaded file asynchronously - contents = await file.read() - # Write the uploaded contents to the specified image path - with open(image_path, "wb") as f: - f.write(contents) +@app.post("/search-image-qdrant", response_model=list[Product]) +async def search_image_qdrant(file: UploadFile = File(...)): + # start_time = time.time() + try: + image_path = await save_image_file(file=file) # Extract features from the uploaded image using the feature extractor feature = feature_extractor.extract_feature(image_path=image_path) @@ -69,19 +68,20 @@ async def search_image_qdrant(file: UploadFile = File(...)): result = [Product.from_point(point) for point in search_results.result] - logger.info(f"Search image successfully, file name: {file.filename}") - + # LOGGER.info( + # f"Qdrant search executed in {time.time() - start_time:.4f} seconds." + # ) return result except UnexpectedResponse as e: # Handle the case when Qdrant returns an error and convert it to an exception # that FastAPI will understand and return to the client - logger.error("Could not perform search: %s", e) + LOGGER.error("Could not perform search: %s", e) raise HTTPException(status_code=500, detail=e.reason_phrase) -@app.post("/search-image-faiss", response_model=list[Product]) -async def upload_image(file: UploadFile = File(...)): +@app.post("/search-image", response_model=list[Product]) +async def search_image_qdrant_triton(file: UploadFile = File(...)): """ Endpoint to upload an image, extract features, and perform a search. @@ -91,30 +91,27 @@ async def upload_image(file: UploadFile = File(...)): Returns: dict: A dictionary containing search results, including item information. """ - try: - # Prepend the current datetime to the filename - file.filename = datetime.now().strftime("%Y%m%d-%H%M%S-") + file.filename + image_path = await save_image_file(file=file) - # Construct the full image path based on the settings - image_path = settings.IMAGEDIR + file.filename + # Extract features from the uploaded image using the feature extractor + feature = await feature_extractor.triton_extract_feature(image_path=image_path) - # Read the contents of the uploaded file asynchronously - contents = await file.read() + # Perform a search using the extracted feature vector + search_results = await qdrant_search.search(query_vector=feature, top_k=20) - # Write the uploaded contents to the specified image path - with open(image_path, "wb") as f: - f.write(contents) + result = [Product.from_point(point) for point in search_results.result] - # Extract features from the uploaded image using the feature extractor - feature = feature_extractor.extract_feature(image_path=image_path) + return result - # Perform a search using the extracted feature vector - search_results = faiss_search.search(query_vector=feature, top_k=20) - logger.info(f"Search image use faiss successfully, file name: {file.filename}") +@app.post("/search-image-base64", response_model=list[Product]) +async def search_image_base64(data: ImageBase64Request): + # Extract features from the uploaded image using the feature extractor + feature = await feature_extractor.triton_extract_base64(image=data.image) - return search_results + # Perform a search using the extracted feature vector + search_results = await qdrant_search.search(query_vector=feature, top_k=20) - except Exception as e: - logger.error("Could not perform search: %s", e) - raise HTTPException(status_code=500, detail=e) + result = [Product.from_point(point) for point in search_results.result] + + return result diff --git a/image-search-engine/assets/documents/dynamic_batching.png b/image-search-engine/assets/documents/dynamic_batching.png new file mode 100644 index 0000000..f720bbf Binary files /dev/null and b/image-search-engine/assets/documents/dynamic_batching.png differ diff --git a/image-search-engine/assets/documents/gpu.jpg b/image-search-engine/assets/documents/gpu.jpg new file mode 100644 index 0000000..110d6e4 Binary files /dev/null and b/image-search-engine/assets/documents/gpu.jpg differ diff --git a/image-search-engine/assets/documents/info-qdrant.jpg b/image-search-engine/assets/documents/info-qdrant.jpg new file mode 100644 index 0000000..d99def5 Binary files /dev/null and b/image-search-engine/assets/documents/info-qdrant.jpg differ diff --git a/image-search-engine/assets/documents/multi_instance.png b/image-search-engine/assets/documents/multi_instance.png new file mode 100644 index 0000000..2698390 Binary files /dev/null and b/image-search-engine/assets/documents/multi_instance.png differ diff --git a/image-search-engine/assets/documents/no-triton-test-1.jpg b/image-search-engine/assets/documents/no-triton-test-1.jpg new file mode 100644 index 0000000..8a71106 Binary files /dev/null and b/image-search-engine/assets/documents/no-triton-test-1.jpg differ diff --git a/image-search-engine/assets/documents/no-triton-test-10.jpg b/image-search-engine/assets/documents/no-triton-test-10.jpg new file mode 100644 index 0000000..9fc7233 Binary files /dev/null and b/image-search-engine/assets/documents/no-triton-test-10.jpg differ diff --git a/image-search-engine/assets/documents/no-triton-test-2.jpg b/image-search-engine/assets/documents/no-triton-test-2.jpg new file mode 100644 index 0000000..a0cc823 Binary files /dev/null and b/image-search-engine/assets/documents/no-triton-test-2.jpg differ diff --git a/image-search-engine/assets/documents/no-triton-test-3.jpg b/image-search-engine/assets/documents/no-triton-test-3.jpg new file mode 100644 index 0000000..edb0ef5 Binary files /dev/null and b/image-search-engine/assets/documents/no-triton-test-3.jpg differ diff --git a/image-search-engine/assets/documents/no-triton-test-4.jpg b/image-search-engine/assets/documents/no-triton-test-4.jpg new file mode 100644 index 0000000..f32397a Binary files /dev/null and b/image-search-engine/assets/documents/no-triton-test-4.jpg differ diff --git a/image-search-engine/assets/documents/no-triton-test-5.jpg b/image-search-engine/assets/documents/no-triton-test-5.jpg new file mode 100644 index 0000000..efc5c39 Binary files /dev/null and b/image-search-engine/assets/documents/no-triton-test-5.jpg differ diff --git a/image-search-engine/assets/documents/no-triton-test-6.jpg b/image-search-engine/assets/documents/no-triton-test-6.jpg new file mode 100644 index 0000000..ce43785 Binary files /dev/null and b/image-search-engine/assets/documents/no-triton-test-6.jpg differ diff --git a/image-search-engine/assets/documents/no-triton-test-7.jpg b/image-search-engine/assets/documents/no-triton-test-7.jpg new file mode 100644 index 0000000..0088ba0 Binary files /dev/null and b/image-search-engine/assets/documents/no-triton-test-7.jpg differ diff --git a/image-search-engine/assets/documents/no-triton-test-8.jpg b/image-search-engine/assets/documents/no-triton-test-8.jpg new file mode 100644 index 0000000..15403c0 Binary files /dev/null and b/image-search-engine/assets/documents/no-triton-test-8.jpg differ diff --git a/image-search-engine/assets/documents/no-triton-test-9.jpg b/image-search-engine/assets/documents/no-triton-test-9.jpg new file mode 100644 index 0000000..b3d4a61 Binary files /dev/null and b/image-search-engine/assets/documents/no-triton-test-9.jpg differ diff --git a/image-search-engine/assets/documents/point-qdrant.jpg b/image-search-engine/assets/documents/point-qdrant.jpg new file mode 100644 index 0000000..f5757a3 Binary files /dev/null and b/image-search-engine/assets/documents/point-qdrant.jpg differ diff --git a/image-search-engine/assets/documents/time-faiss-cpu.jpg b/image-search-engine/assets/documents/time-faiss-cpu.jpg new file mode 100644 index 0000000..a708e20 Binary files /dev/null and b/image-search-engine/assets/documents/time-faiss-cpu.jpg differ diff --git a/image-search-engine/assets/documents/time-qdrant-cpu.jpg b/image-search-engine/assets/documents/time-qdrant-cpu.jpg new file mode 100644 index 0000000..e541d5c Binary files /dev/null and b/image-search-engine/assets/documents/time-qdrant-cpu.jpg differ diff --git a/image-search-engine/assets/documents/triton-test-1.jpg b/image-search-engine/assets/documents/triton-test-1.jpg new file mode 100644 index 0000000..849c332 Binary files /dev/null and b/image-search-engine/assets/documents/triton-test-1.jpg differ diff --git a/image-search-engine/assets/documents/triton-test-10.jpg b/image-search-engine/assets/documents/triton-test-10.jpg new file mode 100644 index 0000000..49a3f55 Binary files /dev/null and b/image-search-engine/assets/documents/triton-test-10.jpg differ diff --git a/image-search-engine/assets/documents/triton-test-11.jpg b/image-search-engine/assets/documents/triton-test-11.jpg new file mode 100644 index 0000000..bb7836c Binary files /dev/null and b/image-search-engine/assets/documents/triton-test-11.jpg differ diff --git a/image-search-engine/assets/documents/triton-test-12.jpg b/image-search-engine/assets/documents/triton-test-12.jpg new file mode 100644 index 0000000..1b3756e Binary files /dev/null and b/image-search-engine/assets/documents/triton-test-12.jpg differ diff --git a/image-search-engine/assets/documents/triton-test-13.jpg b/image-search-engine/assets/documents/triton-test-13.jpg new file mode 100644 index 0000000..c4bc1c1 Binary files /dev/null and b/image-search-engine/assets/documents/triton-test-13.jpg differ diff --git a/image-search-engine/assets/documents/triton-test-14.jpg b/image-search-engine/assets/documents/triton-test-14.jpg new file mode 100644 index 0000000..549918c Binary files /dev/null and b/image-search-engine/assets/documents/triton-test-14.jpg differ diff --git a/image-search-engine/assets/documents/triton-test-15.jpg b/image-search-engine/assets/documents/triton-test-15.jpg new file mode 100644 index 0000000..1c6a07f Binary files /dev/null and b/image-search-engine/assets/documents/triton-test-15.jpg differ diff --git a/image-search-engine/assets/documents/triton-test-16.jpg b/image-search-engine/assets/documents/triton-test-16.jpg new file mode 100644 index 0000000..a73bf10 Binary files /dev/null and b/image-search-engine/assets/documents/triton-test-16.jpg differ diff --git a/image-search-engine/assets/documents/triton-test-17.jpg b/image-search-engine/assets/documents/triton-test-17.jpg new file mode 100644 index 0000000..1c0e6b1 Binary files /dev/null and b/image-search-engine/assets/documents/triton-test-17.jpg differ diff --git a/image-search-engine/assets/documents/triton-test-18.jpg b/image-search-engine/assets/documents/triton-test-18.jpg new file mode 100644 index 0000000..0d013a6 Binary files /dev/null and b/image-search-engine/assets/documents/triton-test-18.jpg differ diff --git a/image-search-engine/assets/documents/triton-test-19.jpg b/image-search-engine/assets/documents/triton-test-19.jpg new file mode 100644 index 0000000..91200e6 Binary files /dev/null and b/image-search-engine/assets/documents/triton-test-19.jpg differ diff --git a/image-search-engine/assets/documents/triton-test-2.jpg b/image-search-engine/assets/documents/triton-test-2.jpg new file mode 100644 index 0000000..2cdb944 Binary files /dev/null and b/image-search-engine/assets/documents/triton-test-2.jpg differ diff --git a/image-search-engine/assets/documents/triton-test-20.jpg b/image-search-engine/assets/documents/triton-test-20.jpg new file mode 100644 index 0000000..592466e Binary files /dev/null and b/image-search-engine/assets/documents/triton-test-20.jpg differ diff --git a/image-search-engine/assets/documents/triton-test-3.jpg b/image-search-engine/assets/documents/triton-test-3.jpg new file mode 100644 index 0000000..fa57090 Binary files /dev/null and b/image-search-engine/assets/documents/triton-test-3.jpg differ diff --git a/image-search-engine/assets/documents/triton-test-4.jpg b/image-search-engine/assets/documents/triton-test-4.jpg new file mode 100644 index 0000000..fba32bb Binary files /dev/null and b/image-search-engine/assets/documents/triton-test-4.jpg differ diff --git a/image-search-engine/assets/documents/triton-test-5.jpg b/image-search-engine/assets/documents/triton-test-5.jpg new file mode 100644 index 0000000..46e4426 Binary files /dev/null and b/image-search-engine/assets/documents/triton-test-5.jpg differ diff --git a/image-search-engine/assets/documents/triton-test-6.jpg b/image-search-engine/assets/documents/triton-test-6.jpg new file mode 100644 index 0000000..1619f4c Binary files /dev/null and b/image-search-engine/assets/documents/triton-test-6.jpg differ diff --git a/image-search-engine/assets/documents/triton-test-7.jpg b/image-search-engine/assets/documents/triton-test-7.jpg new file mode 100644 index 0000000..93c9c60 Binary files /dev/null and b/image-search-engine/assets/documents/triton-test-7.jpg differ diff --git a/image-search-engine/assets/documents/triton-test-8.jpg b/image-search-engine/assets/documents/triton-test-8.jpg new file mode 100644 index 0000000..28ced99 Binary files /dev/null and b/image-search-engine/assets/documents/triton-test-8.jpg differ diff --git a/image-search-engine/assets/documents/triton-test-9.jpg b/image-search-engine/assets/documents/triton-test-9.jpg new file mode 100644 index 0000000..bc29b7c Binary files /dev/null and b/image-search-engine/assets/documents/triton-test-9.jpg differ diff --git a/image-search-engine/pipeline/pipeline_get_features.ipynb b/image-search-engine/baseline/baseline.ipynb similarity index 100% rename from image-search-engine/pipeline/pipeline_get_features.ipynb rename to image-search-engine/baseline/baseline.ipynb diff --git a/image-search-engine/config.py b/image-search-engine/config.py index 9eb0f90..75d7b85 100644 --- a/image-search-engine/config.py +++ b/image-search-engine/config.py @@ -23,11 +23,18 @@ class Settings(BaseSettings): TOP_K: int = 3 # Faiss configuration - INDEX_PATH: str = "./faiss_search/index.faiss" + INDEX_PATH: str = "./src/faiss_search/index.faiss" # Qdrant configuration - QDRANT_HOST: str = os.environ.get("QDRANT_HOST", "localhost") + QDRANT_URL: str = os.environ.get("QDRANT_URL", "http://localhost:6334") QDRANT_COLLECTION: str = "image-search-engine" + # Triton configuration + TRITON_SERVER_URL: str = os.environ.get("TRITON_SERVER_URL", "localhost:9001") + TORCH_MODEL_NAME: str = "efficientnet_b3" + ONNX_MODEL_NAME: str = "efficientnet_b3_onnx" + MODEL_INPUT_NAME: str = "input" + MODEL_OUTPUT_NAME: str = "output" + settings = Settings() diff --git a/image-search-engine/entrypoint.sh b/image-search-engine/entrypoint.sh index 86f26b0..07fee29 100644 --- a/image-search-engine/entrypoint.sh +++ b/image-search-engine/entrypoint.sh @@ -1,26 +1,10 @@ #!/bin/sh -echo "Start run entrypoint script..." +echo "Start ingest data to Faiss search..." +python faiss_ingest.py -echo "Waiting for qdrant..." - -# Define the URL to check -url="http://$QDRANT_HOST:6333/healthz" - -while true; do - response=$(curl -s -o /dev/null -w "%{http_code}" "$url") - - if [ "$response" -ne 200 ]; then - echo "URL $url is not healthy (HTTP status code: $response)" - sleep 0.5 - else - echo "URL $url is healthy (HTTP status code: $response)" - break # Exit the loop when the URL is healthy - fi -done - -echo "Start ingest data to qdrant search..." +echo "Start ingest data to Qdrant search..." python qdrant_ingest.py echo "Run app with uvicorn server..." -uvicorn app:app --port 7000 --host 0.0.0.0 +uvicorn app:app --port 7000 --host 0.0.0.0 --workers 1 diff --git a/image-search-engine/faiss_ingest.py b/image-search-engine/faiss_ingest.py index 4b4781a..a5dde37 100644 --- a/image-search-engine/faiss_ingest.py +++ b/image-search-engine/faiss_ingest.py @@ -1,46 +1,11 @@ -import faiss -import numpy as np -from config import settings +from src.faiss_search.ingest_data import FaissIngest +from src.utils import LOGGER +# Create an instance of FaissIngest +faiss_ingest = FaissIngest() -class FaissIngest: - """ - A class for ingesting data into a Faiss index. - - Attributes: - image_features (numpy.ndarray): Array of features to be indexed. - """ - - def __init__(self): - """ - Initializes a FaissIngest instance and loads array features from a file. - """ - # Load array features from the specified file - self.image_features = np.load(settings.FEATURES_PATH, allow_pickle=True) - - def create_index(self): - """ - Creates a Faiss index, adds array features to it, and saves the index to disk. - - Returns: - None - """ - # Create an index with FAISS using L2 distance metric - index_faiss = faiss.IndexFlatL2(settings.DIMENSIONS) - - # Add the array features to the Faiss index - index_faiss.add(self.image_features["image_features"]) - - # Save the index to disk - faiss.write_index(index_faiss, settings.INDEX_PATH) - - # Print a success message - print("Faiss index created successfully!") - - -if __name__ == "__main__": - # Create an instance of FaissIngest - faiss_ingest = FaissIngest() - +if faiss_ingest.check_index_exists(): + LOGGER.info("Faiss index already exists!") +else: # Create and save the Faiss index faiss_ingest.create_index() diff --git a/image-search-engine/feature_extractor.py b/image-search-engine/feature_extractor.py deleted file mode 100644 index d501cb9..0000000 --- a/image-search-engine/feature_extractor.py +++ /dev/null @@ -1,79 +0,0 @@ -import torch -from torchvision.io import read_image -from torchvision.models import EfficientNet_B3_Weights, efficientnet_b3 - - -class FeatureExtractor: - def __init__(self): - """ - Initializes the FeatureExtractor class. - - This class is used to extract features from images using the EfficientNet-B3 model. - - Attributes: - - device (torch.device): Represents the device (CPU/GPU) where the model will be loaded. - - weights (EfficientNet_B3_Weights): Specifies the pre-trained weights to be used. - - model (torch.nn.Module): The loaded EfficientNet-B3 model. - """ - self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - self.weights = EfficientNet_B3_Weights.IMAGENET1K_V1 - self.model = self.load_model() - - def load_model(self): - """ - Loads the pre-trained EfficientNet-B3 model. - - Returns: - - torch.nn.Module: The loaded model. - """ - # Load the pre-trained model - model = efficientnet_b3(weights=self.weights) - - # Set the model to evaluation mode - model.eval() - - # Use a GPU (if available) for inference - model = model.to(self.device) - - return model - - def preprocess_input(self, image_path): - """ - Preprocesses the input image for inference. - - Args: - - image_path (str): The path to the input image. - - Returns: - - torch.Tensor: Preprocessed image tensor. - """ - image = read_image(image_path) - - # Initialize the inference transforms - preprocess = self.weights.transforms(antialias=True) - - # Process RGBA image - image = image.narrow(0, 0, 3) - - # Apply inference preprocessing transforms - image = preprocess(image).unsqueeze(0) - - return image - - def extract_feature(self, image_path): - """ - Extracts features from the input image. - - Args: - - image_path (str): The path to the input image. - - Returns: - - numpy.ndarray: Extracted features as a numpy array. - """ - image = self.preprocess_input(image_path) - - feature = self.model(image) - - feature = feature.detach().numpy() - - return feature diff --git a/image-search-engine/locust/image_test.jpg b/image-search-engine/locust/image_test.jpg new file mode 100644 index 0000000..c70bae5 Binary files /dev/null and b/image-search-engine/locust/image_test.jpg differ diff --git a/image-search-engine/locust/locustfile.py b/image-search-engine/locust/locustfile.py index 9b80578..1997c37 100644 --- a/image-search-engine/locust/locustfile.py +++ b/image-search-engine/locust/locustfile.py @@ -1,18 +1,48 @@ -from locust import FastHttpUser, task +import base64 +from typing import Any + +import cv2 +from locust import FastHttpUser, constant, constant_throughput, task + + +def encode_img_base64(image_path): + # Load an RGB image using OpenCV + image = cv2.imread(image_path) + + # Encode the image to a JPEG format (you can choose other formats as well) + _, encoded_image = cv2.imencode(".jpg", image) + + # Convert the encoded image to a Base64 string + encoded_image_base64 = base64.b64encode(encoded_image).decode("utf-8") + + return encoded_image_base64 class SearchImageUser(FastHttpUser): - @task - def search_image(self): - """ - Simulate a user uploading an image and performing a search. - """ + host = "http://localhost:7000" + # wait_time = constant(1) + # wait_time = constant_throughput(1) + + def __init__(self, *args: Any, **kwargs: Any): + super().__init__(*args, **kwargs) + self.image_path = "image_test.jpg" + # self.request = {"image": encode_img_base64(self.image_path)} - # Create a random image file - image_file = open("test.jpg", "rb") + # @task + # def search_image_faiss(self): + # image_file = open(self.image_path, "rb") + # self.client.post("/search-image-faiss", files={"file": image_file}) - # Construct the request body - request_body = {"file": image_file} + # @task + # def search_image_qdrant(self): + # image_file = open(self.image_path, "rb") + # self.client.post("/search-image-qdrant", files={"file": image_file}) + + @task + def search_image(self): + image_file = open(self.image_path, "rb") + self.client.post("/search-image", files={"file": image_file}) - # Send the request to the API and get the response - self.client.post("/search-image", files=request_body) + # @task + # def search_image_base64(self): + # self.client.post("/search-image-base64", json=self.request) diff --git a/image-search-engine/log_config.py b/image-search-engine/log_config.py deleted file mode 100644 index b881ef4..0000000 --- a/image-search-engine/log_config.py +++ /dev/null @@ -1,47 +0,0 @@ -import logging -from datetime import datetime - -import pytz -from config import settings - - -def configure_logging(name): - # Create a logger instance - logger = logging.getLogger(name) - - # Set the logging level - logger.setLevel(logging.DEBUG) - - # Set the timezone to Vietnam - vietnam_timezone = pytz.timezone("Asia/Ho_Chi_Minh") - - # Configure logging with the Vietnam timezone - logging.Formatter.converter = ( - lambda *args: pytz.utc.localize(datetime.utcnow()) - .astimezone(vietnam_timezone) - .timetuple() - ) - - # Define the log format - console_log_format = "%(asctime)s - %(levelname)s - %(message)s" - file_log_format = ( - "%(asctime)s - %(levelname)s - %(message)s - (%(filename)s:%(lineno)d)" - ) - - # Create a console handler - console_handler = logging.StreamHandler() - console_handler.setLevel(logging.DEBUG) - console_handler.setFormatter( - logging.Formatter(console_log_format, datefmt=settings.DATE_FMT) - ) - logger.addHandler(console_handler) - - # Create a file handler - file_handler = logging.FileHandler(filename=settings.LOG_DIR, encoding="utf-8") - file_handler.setLevel(logging.DEBUG) - file_handler.setFormatter( - logging.Formatter(file_log_format, datefmt=settings.DATE_FMT) - ) - logger.addHandler(file_handler) - - return logger diff --git a/image-search-engine/logs/time_execute_extract_feature.html b/image-search-engine/logs/time_execute_extract_feature.html new file mode 100644 index 0000000..356d0bb --- /dev/null +++ b/image-search-engine/logs/time_execute_extract_feature.html @@ -0,0 +1,2102 @@ + + + + + + +
+ + + + + + + diff --git a/image-search-engine/faiss_search/__init__.py b/image-search-engine/logs/time_execute_triton_extract_base.html similarity index 100% rename from image-search-engine/faiss_search/__init__.py rename to image-search-engine/logs/time_execute_triton_extract_base.html diff --git a/image-search-engine/logs/time_execute_triton_extract_feature.html b/image-search-engine/logs/time_execute_triton_extract_feature.html new file mode 100644 index 0000000..0ca01ec --- /dev/null +++ b/image-search-engine/logs/time_execute_triton_extract_feature.html @@ -0,0 +1,3919 @@ + + + + + + +
+ + + + + + + diff --git a/image-search-engine/model_repository/efficientnet_b3/config.pbtxt b/image-search-engine/model_repository/efficientnet_b3/config.pbtxt index 31ec46f..665512d 100644 --- a/image-search-engine/model_repository/efficientnet_b3/config.pbtxt +++ b/image-search-engine/model_repository/efficientnet_b3/config.pbtxt @@ -1,24 +1,24 @@ name: "efficientnet_b3" platform: "pytorch_libtorch" -max_batch_size: 32 +max_batch_size : 64 dynamic_batching { } instance_group [ { - count: 1 - kind: KIND_CPU + count: 2 + kind: KIND_GPU } ] input [ { - name: "input__0" + name: "input" data_type: TYPE_FP32 - dims: [ 3, 320, 320 ] + dims: [ 3, 300, 300 ] } ] output [ { - name: "output__0" + name: "output" data_type: TYPE_FP32 - dims: [ 1000 ] + dims: [ 1, 1000 ] } ] diff --git a/image-search-engine/model_repository/efficientnet_b3_onnx/config.pbtxt b/image-search-engine/model_repository/efficientnet_b3_onnx/config.pbtxt new file mode 100644 index 0000000..3b6246f --- /dev/null +++ b/image-search-engine/model_repository/efficientnet_b3_onnx/config.pbtxt @@ -0,0 +1,24 @@ +name: "efficientnet_b3_onnx" +platform: "onnxruntime_onnx" +max_batch_size : 32 + +instance_group [ + { + count: 2 + kind: KIND_GPU + } +] +input [ + { + name: "input" + data_type: TYPE_FP32 + dims: [ 3, 300, 300 ] + } +] +output [ + { + name: "output" + data_type: TYPE_FP32 + dims: [ 1000 ] + } +] diff --git a/image-search-engine/model_repository/fetch_model.py b/image-search-engine/model_repository/fetch_model.py new file mode 100644 index 0000000..39d3466 --- /dev/null +++ b/image-search-engine/model_repository/fetch_model.py @@ -0,0 +1,28 @@ +import torch +from torchvision.models import EfficientNet_B3_Weights, efficientnet_b3 + +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + +# Instantiate the model +model = efficientnet_b3(weights=EfficientNet_B3_Weights.IMAGENET1K_V1) + +# Set the model to evaluation mode +model.eval() + +# Use a GPU (if available) for inference +model = model.to(device) + +# Save the entire model to a file +traced_model = torch.jit.trace(model, torch.randn(1, 3, 300, 300).to(device)) +torch.jit.save(traced_model, "./efficientnet_b3/1/model.pt") + +# Export the model to ONNX with dynamic batch size +torch.onnx.export( + model, + torch.randn(1, 3, 300, 300).to(device), + "./efficientnet_b3_onnx/1/model.onnx", + input_names=["input"], + output_names=["output"], + dynamic_axes={"input": {0: "batch_size"}, "output": {0: "batch_size"}}, + verbose=True, +) diff --git a/image-search-engine/model_repository/onnx_to_tensorrt.py b/image-search-engine/model_repository/onnx_to_tensorrt.py new file mode 100644 index 0000000..3643b31 --- /dev/null +++ b/image-search-engine/model_repository/onnx_to_tensorrt.py @@ -0,0 +1,338 @@ +import argparse +import os +import time + +import numpy as np +import onnx +import onnxruntime as rt +import tensorrt as trt +import torch +import torchvision.datasets as datasets +import torchvision.models as models +import torchvision.transforms as transforms +from cuda import cuda +from torchvision.utils import save_image + +TRT_LOGGER = trt.Logger() + + +def parse_args(): + parser = argparse.ArgumentParser(description="Convert ONNX models to TensorRT") + + parser.add_argument("--device", help="cuda or not", default="cuda") + + # Sample image + parser.add_argument("--batch_size", type=int, help="data batch size", default=1) + parser.add_argument("--img_size", help="input size", default=[3, 300, 300]) + parser.add_argument( + "--sample_folder_path", help="sample image folder path", default="./../assets/" + ) + # parser.add_argument('--sample_image_path', help='sample image path', + # default='./sample.jpg') + + # Model path + parser.add_argument( + "--onnx_model_path", help="onnx model path", default="./onnx_output.onnx" + ) + parser.add_argument( + "--tensorrt_engine_path", + help="tensorrt engine path", + default="./tensorrt_engine.engine", + ) + + # TensorRT engine params + parser.add_argument( + "--dynamic_axes", help="dynamic batch input or output", default="True" + ) + parser.add_argument( + "--engine_precision", + help="precision of TensorRT engine", + choices=["FP32", "FP16"], + default="FP16", + ) + parser.add_argument( + "--min_engine_batch_size", + type=int, + help="set the min input data size of model for inference", + default=1, + ) + parser.add_argument( + "--opt_engine_batch_size", + type=int, + help="set the most used input data size of model for inference", + default=1, + ) + parser.add_argument( + "--max_engine_batch_size", + type=int, + help="set the max input data size of model for inference", + default=8, + ) + parser.add_argument( + "--engine_workspace", type=int, help="workspace of engine", default=1024 + ) + + args = string_to_bool(parser.parse_args()) + + return args + + +def string_to_bool(args): + if args.dynamic_axes.lower() in ("true"): + args.dynamic_axes = True + else: + args.dynamic_axes = False + + return args + + +def get_transform(img_size): + options = [] + options.append(transforms.Resize((img_size[1], img_size[2]))) + options.append(transforms.ToTensor()) + # options.append(transforms.Normalize(mean=[0.5,0.5,0.5],std=[0.5,0.5,0.5])) + transform = transforms.Compose(options) + return transform + + +""" +def load_image(img_path, size): + img_raw = io.imread(img_path) + img_raw = np.rollaxis(img_raw, 2, 0) + img_resize = resize(img_raw / 255, size, anti_aliasing=True) + img_resize = img_resize.astype(np.float32) + return img_resize, img_raw + """ + + +def load_image_folder(folder_path, img_size, batch_size): + transforming = get_transform(img_size) + dataset = datasets.ImageFolder(folder_path, transform=transforming) + data_loader = torch.utils.data.DataLoader( + dataset, batch_size=batch_size, shuffle=True, num_workers=1 + ) + data_iter = iter(data_loader) + torch_images, class_list = next(data_iter) + print("class:", class_list) + print("torch images size:", torch_images.size()) + save_image(torch_images[0], "test.png") + + return torch_images.cpu().numpy() + + +def build_engine( + onnx_model_path, + tensorrt_engine_path, + engine_precision, + dynamic_axes, + img_size, + batch_size, + min_engine_batch_size, + opt_engine_batch_size, + max_engine_batch_size, +): + # Builder + logger = trt.Logger(trt.Logger.ERROR) + builder = trt.Builder(logger) + network = builder.create_network( + 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH) + ) + profile = builder.create_optimization_profile() + config = builder.create_builder_config() + # config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 3 << 30) + # Set FP16 + if engine_precision == "FP16": + config.set_flag(trt.BuilderFlag.FP16) + + # Onnx parser + parser = trt.OnnxParser(network, logger) + if not os.path.exists(onnx_model_path): + print("Failed finding ONNX file!") + exit() + print("Succeeded finding ONNX file!") + with open(onnx_model_path, "rb") as model: + if not parser.parse(model.read()): + print("Failed parsing .onnx file!") + for error in range(parser.num_errors): + print(parser.get_error(error)) + exit() + print("Succeeded parsing .onnx file!") + + # Input + inputTensor = network.get_input(0) + # Dynamic batch (min, opt, max) + print("inputTensor.name:", inputTensor.name) + if dynamic_axes: + profile.set_shape( + inputTensor.name, + (min_engine_batch_size, img_size[0], img_size[1], img_size[2]), + (opt_engine_batch_size, img_size[0], img_size[1], img_size[2]), + (max_engine_batch_size, img_size[0], img_size[1], img_size[2]), + ) + print("Set dynamic") + else: + profile.set_shape( + inputTensor.name, + (batch_size, img_size[0], img_size[1], img_size[2]), + (batch_size, img_size[0], img_size[1], img_size[2]), + (batch_size, img_size[0], img_size[1], img_size[2]), + ) + config.add_optimization_profile(profile) + # network.unmark_output(network.get_output(0)) + + # Write engine + engineString = builder.build_serialized_network(network, config) + if engineString is None: + print("Failed building engine!") + exit() + print("Succeeded building engine!") + with open(tensorrt_engine_path, "wb") as f: + f.write(engineString) + + +def trt_inference(engine, context, data): + nInput = np.sum([engine.binding_is_input(i) for i in range(engine.num_bindings)]) + nOutput = engine.num_bindings - nInput + print("nInput:", nInput) + print("nOutput:", nOutput) + + for i in range(nInput): + print( + "Bind[%2d]:i[%2d]->" % (i, i), + engine.get_binding_dtype(i), + engine.get_binding_shape(i), + context.get_binding_shape(i), + engine.get_binding_name(i), + ) + for i in range(nInput, nInput + nOutput): + print( + "Bind[%2d]:o[%2d]->" % (i, i - nInput), + engine.get_binding_dtype(i), + engine.get_binding_shape(i), + context.get_binding_shape(i), + engine.get_binding_name(i), + ) + + bufferH = [] + bufferH.append(np.ascontiguousarray(data.reshape(-1))) + + for i in range(nInput, nInput + nOutput): + bufferH.append( + np.empty( + context.get_binding_shape(i), + dtype=trt.nptype(engine.get_binding_dtype(i)), + ) + ) + + bufferD = [] + for i in range(nInput + nOutput): + bufferD.append(cuda.cuMemAlloc(bufferH[i].nbytes)[1]) + + for i in range(nInput): + cuda.cuMemcpyHtoD(bufferD[i], bufferH[i].ctypes.data, bufferH[i].nbytes) + + context.execute_v2(bufferD) + + for i in range(nInput, nInput + nOutput): + cuda.cuMemcpyDtoH(bufferH[i].ctypes.data, bufferD[i], bufferH[i].nbytes) + + for b in bufferD: + cuda.cuMemFree(b) + + return bufferH + + +def main(): + args = parse_args() + + # Sample images (folder) + print(args.sample_folder_path) + img_resize = load_image_folder( + args.sample_folder_path, args.img_size, args.batch_size + ).astype(np.float32) + """ + # Sample (one image) + print(args.sample_image_path) + img_resize, img_raw = load_image(args.sample_image_path, args.img_size) + """ + + print("inference image size:", img_resize.shape) + + # Build TensorRT engine + build_engine( + args.onnx_model_path, + args.tensorrt_engine_path, + args.engine_precision, + args.dynamic_axes, + args.img_size, + args.batch_size, + args.min_engine_batch_size, + args.opt_engine_batch_size, + args.max_engine_batch_size, + ) + + # Read the engine from the file and deserialize + with open(args.tensorrt_engine_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime: + engine = runtime.deserialize_cuda_engine(f.read()) + context = engine.create_execution_context() + + # TensorRT inference + context.set_binding_shape( + 0, (args.batch_size, args.img_size[0], args.img_size[1], args.img_size[2]) + ) + + trt_start_time = time.time() + trt_outputs = trt_inference(engine, context, img_resize) + trt_outputs = np.array(trt_outputs[1]).reshape(args.batch_size, -1) + trt_end_time = time.time() + + # ONNX inference + onnx_model = onnx.load(args.onnx_model_path) + sess = rt.InferenceSession(args.onnx_model_path) + + input_all = [node.name for node in onnx_model.graph.input] + input_initializer = [node.name for node in onnx_model.graph.initializer] + net_feed_input = list(set(input_all) - set(input_initializer)) + assert len(net_feed_input) == 1 + + sess_input = sess.get_inputs()[0].name + sess_output = sess.get_outputs()[0].name + + onnx_start_time = time.time() + onnx_result = sess.run([sess_output], {sess_input: img_resize})[0] + onnx_end_time = time.time() + + # Pytorch inference + efficientnet_b3 = models.efficientnet_b3(pretrained=True).to(args.device) + efficientnet_b3.eval() + + img_resize_torch = torch.Tensor(img_resize).to(args.device) + torch_start_time = time.time() + pytorch_result = efficientnet_b3(img_resize_torch) + torch_end_time = time.time() + pytorch_result = pytorch_result.detach().cpu().numpy() + + # Comparision output of TensorRT and output of onnx model + + # Time Efficiency & output + print("--pytorch--") + print(pytorch_result.shape) # (batch_size, 1000) + print(pytorch_result[0][:10]) + print(np.argmax(pytorch_result, axis=1)) + print("Time:", torch_end_time - torch_start_time) + + print("--onnx--") + print(onnx_result.shape) + print(onnx_result[0][:10]) + print(np.argmax(onnx_result, axis=1)) + print("Time: ", onnx_end_time - onnx_start_time) + + print("--tensorrt--") + print(trt_outputs.shape) + print(trt_outputs[0][:10]) + print(np.argmax(trt_outputs, axis=1)) + print("Time: ", trt_end_time - trt_start_time) + + +if __name__ == "__main__": + main() diff --git a/image-search-engine/model_repository/requirements.txt b/image-search-engine/model_repository/requirements.txt new file mode 100644 index 0000000..ae9fc6b --- /dev/null +++ b/image-search-engine/model_repository/requirements.txt @@ -0,0 +1,45 @@ +charset-normalizer==3.3.1 +cmake==3.27.7 +coloredlogs==15.0.1 +cuda-python==12.3.0 +filelock==3.13.0 +flatbuffers==23.5.26 +humanfriendly==10.0 +idna==3.4 +Jinja2==3.1.2 +lit==17.0.3 +MarkupSafe==2.1.3 +mpmath==1.3.0 +networkx==3.2.1 +numpy==1.26.1 +nvidia-cublas-cu11==11.10.3.66 +nvidia-cublas-cu12==12.3.2.9 +nvidia-cuda-cupti-cu11==11.7.101 +nvidia-cuda-nvrtc-cu11==11.7.99 +nvidia-cuda-nvrtc-cu12==12.3.52 +nvidia-cuda-runtime-cu11==11.7.99 +nvidia-cuda-runtime-cu12==12.3.52 +nvidia-cudnn-cu11==8.5.0.96 +nvidia-cudnn-cu12==8.9.4.25 +nvidia-cufft-cu11==10.9.0.58 +nvidia-curand-cu11==10.2.10.91 +nvidia-cusolver-cu11==11.4.0.1 +nvidia-cusparse-cu11==11.7.4.91 +nvidia-nccl-cu11==2.14.3 +nvidia-nvtx-cu11==11.7.91 +onnx==1.15.0 +onnxruntime==1.16.1 +packaging==23.2 +Pillow==10.1.0 +protobuf==4.24.4 +requests==2.31.0 +sympy==1.12 +tensorrt==8.6.1.post1 +tensorrt-bindings==8.6.1 +tensorrt-libs==8.6.1 +torch==1.13.0 +torchaudio==0.13.0 +torchvision==0.14.0 +triton==2.0.0 +typing_extensions==4.8.0 +urllib3==2.0.7 diff --git a/image-search-engine/model_repository/torch_to_onnx.py b/image-search-engine/model_repository/torch_to_onnx.py new file mode 100644 index 0000000..e4b9860 --- /dev/null +++ b/image-search-engine/model_repository/torch_to_onnx.py @@ -0,0 +1,226 @@ +import argparse +import time + +import numpy as np +import onnx +import onnxruntime as rt +import torch +import torchvision.datasets as datasets +import torchvision.models as models +import torchvision.transforms as transforms +from torchvision.utils import save_image + + +def parse_args(): + parser = argparse.ArgumentParser(description="Convert Pytorch models to ONNX") + + parser.add_argument("--device", help="cuda or not", default="cuda") + + # Sample image + parser.add_argument( + "--batch_size", type=int, help="onnx sample batch size", default=1 + ) + parser.add_argument("--img_size", help="image size", default=[3, 300, 300]) + parser.add_argument( + "--sample_folder_path", help="sample image folder path", default="./../assets/" + ) + # parser.add_argument('--sample_image_path', help='sample image path', + # default='./sample.jpg') + + parser.add_argument( + "--output_path", help="onnx model path", default="./onnx_output.onnx" + ) + + # ONNX params + parser.add_argument( + "--dynamic_axes", help="dynamic batch input or output", default="True" + ) + parser.add_argument( + "--keep_initializers_as_inputs", + help="""If True, all the initializers (typically corresponding to parameters) + in the exported graph will also be added as inputs to the graph. If False, + then initializers are not added as inputs to the graph, + and only the non-parameter inputs are added as inputs.""", + default="True", + ) + parser.add_argument( + "--export_params", + help="""If specified, all parameters will be exported. + Set this to False if you want to export an untrained model.""", + default="True", + ) + parser.add_argument("--opset_version", type=int, help="opset version", default=11) + + args = string_to_bool(parser.parse_args()) + + return args + + +def string_to_bool(args): + if args.dynamic_axes.lower() in ("true"): + args.dynamic_axes = True + else: + args.dynamic_axes = False + + if args.keep_initializers_as_inputs.lower() in ("true"): + args.keep_initializers_as_inputs = True + else: + args.keep_initializers_as_inputs = False + + if args.export_params.lower() in ("true"): + args.export_params = True + else: + args.export_params = False + + return args + + +def get_transform(img_size): + options = [] + options.append(transforms.Resize((img_size[1], img_size[2]))) + options.append(transforms.ToTensor()) + # options.append(transforms.Normalize(mean=[0.5,0.5,0.5],std=[0.5,0.5,0.5])) + transform = transforms.Compose(options) + return transform + + +"""def load_image(img_path, size): + img_raw = io.imread(img_path) + img_raw = np.rollaxis(img_raw, 2, 0) + img_resize = resize(img_raw / 255, size, anti_aliasing=True) + img_resize = img_resize.astype(np.float32) + return img_resize, img_raw""" + + +def load_image_folder(folder_path, img_size, batch_size): + transforming = get_transform(img_size) + dataset = datasets.ImageFolder(folder_path, transform=transforming) + data_loader = torch.utils.data.DataLoader( + dataset, batch_size=batch_size, shuffle=True, num_workers=1 + ) + data_iter = iter(data_loader) + torch_images, class_list = next(data_iter) + save_image(torch_images[0], "test.png") + + return torch_images.cpu().numpy() + + +if __name__ == "__main__": + args = parse_args() + + # Load pretrained model + efficientnet_b3 = models.efficientnet_b3(pretrained=True).to(args.device) + + """ + fc = nn.Sequential(OrderedDict([ + ('fc1', nn.Linear(512,1000)), + ('output',nn.Softmax(dim=1)) + ])) + efficientnet_b3.fc = fc + """ + + print(efficientnet_b3) + + efficientnet_b3.eval() + + # Sample images (folder) + print(args.sample_folder_path) + img_resize = load_image_folder( + args.sample_folder_path, args.img_size, args.batch_size + ).astype(np.float32) + """ + # Sample (one image) + print(args.sample_image_path) + img_resize, img_raw = load_image(args.sample_image_path, args.img_size) + """ + + sample_input = torch.randn( + args.batch_size, args.img_size[0], args.img_size[1], args.img_size[2] + ).to(args.device) + print( + "inference image size:", + img_resize.shape, + "sample input size:", + sample_input.shape, + ) + + if args.dynamic_axes: + # Dynamic input + dynamic_axes = {"input": {0: "batch_size"}, "output": {0: "batch_size"}} + + # Export onnx + torch.onnx.export( + efficientnet_b3, + sample_input, + args.output_path, + export_params=args.export_params, + keep_initializers_as_inputs=args.keep_initializers_as_inputs, + opset_version=args.opset_version, + input_names=["input"], # input vect name + output_names=["output"], # output vect name + dynamic_axes=dynamic_axes, # dynamic input + verbose=False, + ) + else: + # Export onnx + torch.onnx.export( + efficientnet_b3, + sample_input, + args.output_path, + export_params=args.export_params, + keep_initializers_as_inputs=args.keep_initializers_as_inputs, + opset_version=args.opset_version, + input_names=["input"], # input vect name + output_names=["output"], # output vect name + verbose=False, + ) + + # Load the ONNX model + onnx_model = onnx.load(args.output_path) + sess = rt.InferenceSession(args.output_path) + + # Check that the IR is well formed + onnx.checker.check_model(onnx_model) + + # Print a human readable representation of the graph + # with open("OnnxShape.txt", "w") as f: + # f.write(f"{onnx.helper.printable_graph(onnx_model.graph)}") + + # Comparision output of onnx and output of Pytorch model + # Pytorch results + img_resize_torch = torch.Tensor(img_resize).to(args.device) + torch_start_time = time.time() + pytorch_result = efficientnet_b3(img_resize_torch) + torch_end_time = time.time() + pytorch_result = pytorch_result.detach().cpu().numpy() + + # ONNX results + input_all = [node.name for node in onnx_model.graph.input] + input_initializer = [node.name for node in onnx_model.graph.initializer] + net_feed_input = list(set(input_all) - set(input_initializer)) + assert len(net_feed_input) == 1 + + sess_input = sess.get_inputs()[0].name + sess_output = sess.get_outputs()[0].name + + onnx_start_time = time.time() + onnx_result = sess.run([sess_output], {sess_input: img_resize})[0] + onnx_end_time = time.time() + + print("--pytorch--") + print(pytorch_result.shape) # (batch_size, 1000) + print(pytorch_result[0][:10]) + print(np.argmax(pytorch_result, axis=1)) + print("Time:", torch_end_time - torch_start_time) + + print("--onnx--") + print(onnx_result.shape) + print(onnx_result[0][:10]) + print(np.argmax(onnx_result, axis=1)) + print("Time:", onnx_end_time - onnx_start_time) + + # Comparision + assert np.allclose( + pytorch_result, onnx_result, atol=1.0e-2 + ), "The outputs are different (Pytorch and ONNX)" + print("The numerical values are same (Pytorch and ONNX)") diff --git a/image-search-engine/qdrant_ingest.py b/image-search-engine/qdrant_ingest.py index f7cf73f..6d7b79f 100644 --- a/image-search-engine/qdrant_ingest.py +++ b/image-search-engine/qdrant_ingest.py @@ -1,156 +1,18 @@ -import time +from src.qdrant_search.ingest_data import QdrantIngest +from src.utils import LOGGER -import numpy as np -import pandas as pd -from config import settings -from qdrant_client import QdrantClient, grpc -from tqdm import tqdm +# Instantiate the QdrantIngest class +qdrant_ingest = QdrantIngest() +try: + response = qdrant_ingest.check_collection() + if response.result.status == 1: + LOGGER.info("Collection already exists!") +except Exception as e: + LOGGER.info(f"Error checking collection: {e}") -class QdrantIngest: - """ - A class for ingesting data into Qdrant. + LOGGER.info("Create collection!") + response = qdrant_ingest.create_collection() + LOGGER.info(response) - Attributes: - client_grpc (QdrantClient): A client for interacting with Qdrant. - - item_path (numpy.ndarray): Array of item URLs. - item_image (numpy.ndarray): Array of item images. - item_name (numpy.ndarray): Array of item names. - fixed_item_price (numpy.ndarray): Array of item prices. - sale_item_price (numpy.ndarray): Array of sale item prices. - sales_number (numpy.ndarray): Array of sales numbers. - shop_path (numpy.ndarray): Array of shop paths. - shop_name (numpy.ndarray): Array of shop names. - image_features (numpy.ndarray): Array of features to be ingested. - """ - - def __init__(self): - """ - Initializes a QdrantIngest instance, creates a Qdrant client, and loads data. - """ - # Create a client to interact with Qdrant - self.client_grpc = QdrantClient( - url=f"http://{settings.QDRANT_HOST}:6334", prefer_grpc=True - ) - - # Load the dataset - data = pd.read_csv(settings.DATA_PATH) - - # Extract attributes from the dataset - self.item_path = data["item_path"] - self.item_image = data["item_image"] - self.item_name = data["item_name"] - self.fixed_item_price = data["fixed_item_price"] - self.sale_item_price = data["sale_item_price"] - self.sales_number = data["sales_number"] - self.shop_path = data["shop_path"] - self.shop_name = data["shop_name"] - - # Load array features - self.image_features = np.load(settings.FEATURES_PATH, allow_pickle=True) - - def create_collection(self): - """ - Creates a collection in Qdrant. - - Returns: - grpc.CreateCollectionResponse: The response from Qdrant after creating the collection. - """ - # Create collection - response = self.client_grpc.grpc_collections.Create( - grpc.CreateCollection( - collection_name=settings.QDRANT_COLLECTION, - vectors_config=grpc.VectorsConfig( - params=grpc.VectorParams( - size=settings.DIMENSIONS, - distance=grpc.Distance.Cosine, - ) - ), - timeout=10, - ) - ) - - return response - - def check_collection(self): - """ - Checks if the collection already exists in Qdrant. - - Returns: - grpc.GetCollectionInfoResponse: The response from Qdrant containing collection information. - """ - response = self.client_grpc.grpc_collections.Get( - grpc.GetCollectionInfoRequest(collection_name=settings.QDRANT_COLLECTION) - ) - return response - - def add_points(self, batch_size=1000): - """ - Adds data points to the Qdrant collection. - - Args: - batch_size (int): Batch size for uploading data points. - - Returns: - None - """ - start_time = time.time() - - num_features = self.image_features["image_features"].shape[0] - # num_features = 2000 - num_batches = (num_features + batch_size - 1) // batch_size - - for i in tqdm(range(num_batches)): - # Split into batches - start_idx = i * batch_size - end_idx = min((i + 1) * batch_size, num_features) - - ids = list(range(start_idx, end_idx)) - - payloads = [ - { - "item_path": self.item_path[idx], - "item_image": self.item_image[idx], - "item_name": self.item_name[idx], - "fixed_item_price": int(self.fixed_item_price[idx]), - "sale_item_price": int(self.sale_item_price[idx]), - "sale_rate": float( - 1 - self.sale_item_price[idx] / self.fixed_item_price[idx] - ), - "sales_number": int(self.sales_number[idx]), - "shop_path": self.shop_path[idx], - "shop_name": self.shop_name[idx], - } - for idx in range(start_idx, end_idx) - ] - - vectors = self.image_features["image_features"][start_idx:end_idx] - - self.client_grpc.upload_collection( - collection_name=settings.QDRANT_COLLECTION, - vectors=vectors, - payload=payloads, - ids=ids, - ) - - print("Done adding points to the collection!") - print(f"Time: {time.time() - start_time}") - - -if __name__ == "__main__": - # Instantiate the QdrantIngest class - qdrant_ingest = QdrantIngest() - - try: - response = qdrant_ingest.check_collection() - if response.result.status == 1: - print(f"Collection {settings.QDRANT_COLLECTION} already exists!") - except Exception as e: - print(f"Error checking collection: {e}") - - print("Create collection!") - response = qdrant_ingest.create_collection() - print(response) - - qdrant_ingest.add_points() + qdrant_ingest.add_points() diff --git a/image-search-engine/requirements.txt b/image-search-engine/requirements.txt index 6333f80..d65fd76 100644 --- a/image-search-engine/requirements.txt +++ b/image-search-engine/requirements.txt @@ -6,6 +6,8 @@ black==23.9.1 charset-normalizer==3.2.0 click==8.1.7 colorama==0.4.6 +cuda-python==12.2.0 +Cython==3.0.3 dill==0.3.7 exceptiongroup==1.1.3 faiss-cpu==1.7.4 @@ -31,6 +33,8 @@ mpmath==1.3.0 mypy-extensions==1.0.0 networkx==3.1 numpy==1.25.2 +onnx==1.14.1 +opencv-python==4.8.1.78 packaging==23.1 pandas==2.0.3 pathspec==0.11.2 @@ -43,11 +47,13 @@ pydantic==2.2.0 pydantic-settings==2.0.3 pydantic_core==2.6.0 pyflakes==3.1.0 +pyinstrument==4.6.0 pylint==2.17.5 PySocks==1.7.1 python-dateutil==2.8.2 python-dotenv==1.0.0 python-multipart==0.0.6 +python-rapidjson==1.12 pytz==2023.3 qdrant-client==1.5.4 requests==2.31.0 @@ -59,6 +65,7 @@ sympy==1.12 tomli==2.0.1 tomlkit==0.12.1 tqdm==4.66.1 +tritonclient==2.38.0 typing_extensions==4.7.1 tzdata==2023.3 urllib3==1.26.14 diff --git a/image-search-engine/qdrant_search/__init__.py b/image-search-engine/src/faiss_search/__init__.py similarity index 100% rename from image-search-engine/qdrant_search/__init__.py rename to image-search-engine/src/faiss_search/__init__.py diff --git a/image-search-engine/src/faiss_search/ingest_data.py b/image-search-engine/src/faiss_search/ingest_data.py new file mode 100644 index 0000000..5e52164 --- /dev/null +++ b/image-search-engine/src/faiss_search/ingest_data.py @@ -0,0 +1,45 @@ +import os + +import faiss +import numpy as np +from config import settings +from src.utils import LOGGER, time_profiling + + +class FaissIngest: + """ + A class for ingesting data into a Faiss index. + + Attributes: + image_features (numpy.ndarray): Array of features to be indexed. + """ + + def __init__(self): + """ + Initializes a FaissIngest instance and loads array features from a file. + """ + # Load array features from the specified file + self.image_features = np.load(settings.FEATURES_PATH, allow_pickle=True) + + def check_index_exists(self): + return os.path.exists(settings.INDEX_PATH) + + @time_profiling + def create_index(self): + """ + Creates a Faiss index, adds array features to it, and saves the index to disk. + + Returns: + None + """ + # Create an index with FAISS using L2 distance metric + index_faiss = faiss.IndexFlatL2(settings.DIMENSIONS) + + # Add the array features to the Faiss index + index_faiss.add(self.image_features["image_features"]) + + # Save the index to disk + faiss.write_index(index_faiss, settings.INDEX_PATH) + + # Print a success message + LOGGER.info("Faiss index created successfully!") diff --git a/image-search-engine/faiss_search/searcher.py b/image-search-engine/src/faiss_search/searcher.py similarity index 97% rename from image-search-engine/faiss_search/searcher.py rename to image-search-engine/src/faiss_search/searcher.py index 70a2ffa..018fdc1 100644 --- a/image-search-engine/faiss_search/searcher.py +++ b/image-search-engine/src/faiss_search/searcher.py @@ -2,6 +2,7 @@ import numpy as np import pandas as pd from config import settings +from src.utils import time_profiling class FaissSearch: @@ -44,13 +45,14 @@ def __init__(self): self.shop_path = data["shop_path"] self.shop_name = data["shop_name"] + # @time_profiling def search(self, query_vector, top_k=settings.TOP_K): """ Performs a similarity search using the provided query vector. Args: - query_vector (np.ndarray): The query vector for similarity search. - - top_k (int, optional): The number of nearest neighbors to retrieve. Default is specified in settings. + - top_k (int, optional): The number of nearest neighbors to retrieve. Returns: - list: A list of dictionaries containing search results, including item information. diff --git a/image-search-engine/src/feature_extraction/__init__.py b/image-search-engine/src/feature_extraction/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/image-search-engine/src/feature_extraction/extractor.py b/image-search-engine/src/feature_extraction/extractor.py new file mode 100644 index 0000000..1008ea7 --- /dev/null +++ b/image-search-engine/src/feature_extraction/extractor.py @@ -0,0 +1,151 @@ +import torch +import tritonclient.grpc.aio as grpcclient +from config import settings +from src.utils import LOGGER, decode_img, py_profiling, time_profiling +from torchvision.io import read_image +from torchvision.models import EfficientNet_B3_Weights, efficientnet_b3 + + +class FeatureExtractor: + def __init__(self): + """ + Initializes the FeatureExtractor class. + + This class is used to extract features from images using the EfficientNet-B3 model. + + Attributes: + - device (torch.device): Represents the device (CPU/GPU) where the model will be loaded. + - weights (EfficientNet_B3_Weights): Specifies the pre-trained weights to be used. + - model (torch.nn.Module): The loaded EfficientNet-B3 model. + """ + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + LOGGER.info(f"Model run on {self.device} device") + self.weights = EfficientNet_B3_Weights.IMAGENET1K_V1 + self.model = self.load_model() + self.triton_client = grpcclient.InferenceServerClient( + url=settings.TRITON_SERVER_URL + ) + + def load_model(self): + """ + Loads the pre-trained EfficientNet-B3 model. + + Returns: + - torch.nn.Module: The loaded model. + """ + # Load the pre-trained model + model = efficientnet_b3(weights=self.weights) + + # Set the model to evaluation mode + model.eval() + + # Use a GPU (if available) for inference + model = model.to(self.device) + + return model + + # @time_profiling + def preprocess_input(self, image_path): + """ + Preprocesses the input image for inference. + + Args: + - image_path (str): The path to the input image. + + Returns: + - torch.Tensor: Preprocessed image tensor. [1, 3, 300, 300] + """ + image = read_image(image_path) + + # Initialize the inference transforms + preprocess = self.weights.transforms(antialias=True) + + # Process RGBA image + image = image.narrow(0, 0, 3) + + # Apply inference preprocessing transforms + image = preprocess(image).unsqueeze(0) + + return image + + # @py_profiling + # @time_profiling + def extract_feature(self, image_path): + """ + Extracts features from the input image. + + Args: + - image_path (str): The path to the input image. + + Returns: + - numpy.ndarray: Extracted features as a numpy array. (1, 1000) + """ + image = self.preprocess_input(image_path) + + feature = self.model(image.to(self.device)) + + feature = feature.cpu().detach().numpy() + + return feature + + async def triton_inference(self, image, model_name, inputs_name, outputs_name): + inputs = [grpcclient.InferInput(inputs_name, image.shape, datatype="FP32")] + outputs = [grpcclient.InferRequestedOutput(outputs_name)] + + inputs[0].set_data_from_numpy(image.numpy()) + + results = await self.triton_client.infer( + model_name=model_name, inputs=inputs, outputs=outputs + ) + + feature = results.as_numpy(outputs_name) + + return feature + + # @async_py_profiling + # @async_time_profiling + async def triton_extract_feature_onnx(self, image_path): + image = self.preprocess_input(image_path) + + feature = await self.triton_inference( + image=image, + model_name=settings.ONNX_MODEL_NAME, + inputs_name=settings.MODEL_INPUT_NAME, + outputs_name=settings.MODEL_OUTPUT_NAME, + ) + + return feature + + # @async_py_profiling + # @async_time_profiling + async def triton_extract_feature(self, image_path): + image = self.preprocess_input(image_path) + + feature = await self.triton_inference( + image=image, + model_name=settings.TORCH_MODEL_NAME, + inputs_name=settings.MODEL_INPUT_NAME, + outputs_name=settings.MODEL_OUTPUT_NAME, + ) + + return feature + + # @async_py_profiling + # @async_time_profiling + async def triton_extract_base64(self, image): + image = decode_img(image) + + # Initialize the inference transforms + preprocess = self.weights.transforms(antialias=True) + + # Apply inference preprocessing transforms + image = preprocess(image).unsqueeze(0) + + feature = await self.triton_inference( + image=image, + model_name=settings.TORCH_MODEL_NAME, + inputs_name=settings.MODEL_INPUT_NAME, + outputs_name=settings.MODEL_OUTPUT_NAME, + ) + + return feature diff --git a/image-search-engine/src/qdrant_search/__init__.py b/image-search-engine/src/qdrant_search/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/image-search-engine/src/qdrant_search/ingest_data.py b/image-search-engine/src/qdrant_search/ingest_data.py new file mode 100644 index 0000000..0563ac8 --- /dev/null +++ b/image-search-engine/src/qdrant_search/ingest_data.py @@ -0,0 +1,121 @@ +import numpy as np +import pandas as pd +from config import settings +from qdrant_client import QdrantClient, grpc +from src.utils import LOGGER, time_profiling +from tqdm import tqdm + + +class QdrantIngest: + """ + A class for ingesting data into Qdrant. + + Attributes: + client_grpc (QdrantClient): A client for interacting with Qdrant. + + item_path (numpy.ndarray): Array of item URLs. + item_image (numpy.ndarray): Array of item images. + item_name (numpy.ndarray): Array of item names. + fixed_item_price (numpy.ndarray): Array of item prices. + sale_item_price (numpy.ndarray): Array of sale item prices. + sales_number (numpy.ndarray): Array of sales numbers. + shop_path (numpy.ndarray): Array of shop paths. + shop_name (numpy.ndarray): Array of shop names. + image_features (numpy.ndarray): Array of features to be ingested. + """ + + def __init__(self): + """ + Initializes a QdrantIngest instance, creates a Qdrant client, and loads data. + """ + # Create a client to interact with Qdrant + self.client_grpc = QdrantClient(url=settings.QDRANT_URL, prefer_grpc=True) + + # Load the dataset + data = pd.read_csv(settings.DATA_PATH) + + # Extract attributes from the dataset + self.item_path = data["item_path"] + self.item_image = data["item_image"] + self.item_name = data["item_name"] + self.fixed_item_price = data["fixed_item_price"] + self.sale_item_price = data["sale_item_price"] + self.sales_number = data["sales_number"] + self.shop_path = data["shop_path"] + self.shop_name = data["shop_name"] + + # Load array features + self.image_features = np.load(settings.FEATURES_PATH, allow_pickle=True) + + def create_collection(self): + """ + Creates a collection in Qdrant. + """ + # Create collection + response = self.client_grpc.grpc_collections.Create( + grpc.CreateCollection( + collection_name=settings.QDRANT_COLLECTION, + vectors_config=grpc.VectorsConfig( + params=grpc.VectorParams( + size=settings.DIMENSIONS, + distance=grpc.Distance.Cosine, + ) + ), + timeout=10, + ) + ) + + return response + + def check_collection(self): + """ + Checks if the collection already exists in Qdrant. + """ + response = self.client_grpc.grpc_collections.Get( + grpc.GetCollectionInfoRequest(collection_name=settings.QDRANT_COLLECTION) + ) + return response + + @time_profiling + def add_points(self, batch_size=1000): + """ + Adds data points to the Qdrant collection. + """ + num_features = self.image_features["image_features"].shape[0] + # num_features = 2000 + num_batches = (num_features + batch_size - 1) // batch_size + + for i in tqdm(range(num_batches)): + # Split into batches + start_idx = i * batch_size + end_idx = min((i + 1) * batch_size, num_features) + + ids = list(range(start_idx, end_idx)) + + payloads = [ + { + "item_path": self.item_path[idx], + "item_image": self.item_image[idx], + "item_name": self.item_name[idx], + "fixed_item_price": int(self.fixed_item_price[idx]), + "sale_item_price": int(self.sale_item_price[idx]), + "sale_rate": float( + 1 - self.sale_item_price[idx] / self.fixed_item_price[idx] + ), + "sales_number": int(self.sales_number[idx]), + "shop_path": self.shop_path[idx], + "shop_name": self.shop_name[idx], + } + for idx in range(start_idx, end_idx) + ] + + vectors = self.image_features["image_features"][start_idx:end_idx] + + self.client_grpc.upload_collection( + collection_name=settings.QDRANT_COLLECTION, + vectors=vectors, + payload=payloads, + ids=ids, + ) + + LOGGER.info("Done adding points to the collection!") diff --git a/image-search-engine/qdrant_search/searcher.py b/image-search-engine/src/qdrant_search/searcher.py similarity index 91% rename from image-search-engine/qdrant_search/searcher.py rename to image-search-engine/src/qdrant_search/searcher.py index 14a057b..ea69d6a 100644 --- a/image-search-engine/qdrant_search/searcher.py +++ b/image-search-engine/src/qdrant_search/searcher.py @@ -3,6 +3,7 @@ import numpy as np from config import settings from qdrant_client import QdrantClient, grpc +from src.utils import async_time_profiling class QdrantSearch: @@ -18,10 +19,9 @@ def __init__(self): Initializes a QdrantSearch instance and creates a Qdrant client. """ # Create a client to interact with Qdrant - self.client_grpc = QdrantClient( - url=f"http://{settings.QDRANT_HOST}:6334", prefer_grpc=True - ) + self.client_grpc = QdrantClient(url=settings.QDRANT_URL, prefer_grpc=True) + # @async_time_profiling async def search(self, query_vector, top_k=settings.TOP_K): """ Performs a similarity search in Qdrant using a query vector. diff --git a/image-search-engine/schemas.py b/image-search-engine/src/schemas.py similarity index 80% rename from image-search-engine/schemas.py rename to image-search-engine/src/schemas.py index 338ce70..afaccc8 100644 --- a/image-search-engine/schemas.py +++ b/image-search-engine/src/schemas.py @@ -1,4 +1,12 @@ -from pydantic import BaseModel +from pydantic import BaseModel, Field + + +class ImageBase64Request(BaseModel): + image: str = Field( + ..., + title="Utf-8 string from a base64 encoded image", + example="base64encodedimage", + ) class Product(BaseModel): diff --git a/image-search-engine/src/utils.py b/image-search-engine/src/utils.py new file mode 100644 index 0000000..d563e28 --- /dev/null +++ b/image-search-engine/src/utils.py @@ -0,0 +1,144 @@ +import base64 +import functools +import logging +import time +from datetime import datetime + +import cv2 +import numpy as np +import pyinstrument +import pytz +from config import settings +from torchvision import transforms + + +def time_profiling(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + start_time = time.time() + result = func(*args, **kwargs) + LOGGER.info( + f"Function {func.__name__} executed in {time.time() - start_time:.4f} seconds." + ) + return result + + return wrapper + + +def async_time_profiling(func): + @functools.wraps(func) + async def wrapper(*args, **kwargs): + start_time = time.time() + result = await func(*args, **kwargs) + LOGGER.info( + f"Function {func.__name__} executed in {time.time() - start_time:.4f} seconds." + ) + return result + + return wrapper + + +def async_py_profiling(func): + async def wrapper(*args, **kwargs): + profiler = pyinstrument.Profiler(async_mode="enabled") + profiler.start() + result = await func(*args, **kwargs) + profiler.stop() + with open(f"./logs/time_execute_{func.__name__}.html", "w") as f: + f.write(profiler.output_html()) + return result + + return wrapper + + +def py_profiling(func): + def wrapper(*args, **kwargs): + profiler = pyinstrument.Profiler() + profiler.start() + result = func(*args, **kwargs) + profiler.stop() + with open(f"./logs/time_execute_{func.__name__}.html", "w") as f: + f.write(profiler.output_html()) + return result + + return wrapper + + +# @async_time_profiling +async def save_image_file(file): + # Prepend the current datetime to the filename + file.filename = datetime.now().strftime("%Y%m%d-%H%M%S-") + file.filename + + # Construct the full image path based on the settings + image_path = settings.IMAGEDIR + file.filename + + # Read the contents of the uploaded file asynchronously + contents = await file.read() + + # Write the uploaded contents to the specified image path + with open(image_path, "wb") as f: + f.write(contents) + + return image_path + + +def decode_img(img: str) -> np.ndarray: + image_array = np.frombuffer(base64.urlsafe_b64decode(img), dtype=np.uint8) + # Decode the image using OpenCV + image = cv2.imdecode(image_array, cv2.IMREAD_COLOR) + + # Convert BGR image to RGB image + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + + # Define a transform to convert + # the image to torch tensor + transform = transforms.Compose([transforms.ToTensor()]) + + # Convert the image to Torch tensor + tensor = transform(image) + return tensor + + +def initial_logger(): + # Create a logger instance + logger = logging.getLogger("app") + + # Set the logging level + logger.setLevel(logging.DEBUG) + + # Set the timezone to Vietnam + vietnam_timezone = pytz.timezone("Asia/Ho_Chi_Minh") + + # Configure logging with the Vietnam timezone + logging.Formatter.converter = ( + lambda *args: pytz.utc.localize(datetime.utcnow()) + .astimezone(vietnam_timezone) + .timetuple() + ) + + # Define the log format + console_log_format = "%(asctime)s - %(levelname)s - %(message)s" + file_log_format = ( + "%(asctime)s - %(levelname)s - %(message)s - (%(filename)s:%(lineno)d)" + ) + + # Create a console handler + console_handler = logging.StreamHandler() + console_handler.setLevel(logging.DEBUG) + console_handler.setFormatter( + logging.Formatter(console_log_format, datefmt=settings.DATE_FMT) + ) + logger.addHandler(console_handler) + + # Create a file handler + file_handler = logging.FileHandler(filename=settings.LOG_DIR, encoding="utf-8") + file_handler.setLevel(logging.DEBUG) + file_handler.setFormatter( + logging.Formatter(file_log_format, datefmt=settings.DATE_FMT) + ) + logger.addHandler(file_handler) + + return logger + + +LOGGER = initial_logger() diff --git a/text-search-engine/elastic_ingest.py b/text-search-engine/elastic_ingest.py index f436ded..f9cf5f6 100644 --- a/text-search-engine/elastic_ingest.py +++ b/text-search-engine/elastic_ingest.py @@ -173,7 +173,7 @@ def indexing_batch_document(self): def check_index_exists(self): """Check index name exists""" - return not self.elastic_search.indices.exists(index=self.index_name) + return self.elastic_search.indices.exists(index=self.index_name) def main():