## Build a vector store from a duckdb table using custom code

In [1]:
import os

# Please change the paths to your own paths
paper_parser_dir = "/Users/jy006/Documents/Code/BioMedGPS/paper-parser"
parquet_file = "/Users/jy006/Documents/Code/BioMedGPS/paper-parser/benchmarks/antibody/text_chunks/paragraphs.parquet"

import os
import sys

script_dir = os.path.join(paper_parser_dir, "scripts")
sys.path.append(script_dir)

print(sys.path)

['/Users/jy006/miniconda3/envs/paper-parser/lib/python310.zip', '/Users/jy006/miniconda3/envs/paper-parser/lib/python3.10', '/Users/jy006/miniconda3/envs/paper-parser/lib/python3.10/lib-dynload', '', '/Users/jy006/miniconda3/envs/paper-parser/lib/python3.10/site-packages', '/Users/jy006/miniconda3/envs/paper-parser/lib/python3.10/site-packages/setuptools/_vendor', '/Users/jy006/Documents/Code/BioMedGPS/paper-parser/scripts']


In [None]:
!pip install -qU qdrant-client duckdb

In [2]:
import duckdb

conn = duckdb.connect()
conn.sql(f"SELECT COUNT(*) FROM read_parquet('{parquet_file}')").show()

┌──────────────┐
│ count_star() │
│    int64     │
├──────────────┤
│          961 │
└──────────────┘



In [3]:
from embedding import DuckDBLoader

raw_documents = DuckDBLoader(
    query=f"SELECT * FROM read_parquet('{parquet_file}') LIMIT 100",
    page_content_column="text",
    metadata_columns=["pmid", "pmc", "doi", "pubdate"],
).load()

sentences = [doc.get("text") for doc in list(raw_documents)[:100]]

__init__ took 0.00 milliseconds
load took 0.00 milliseconds


### Run NV-Embed-v2

In [1]:
from sentence_transformers import SentenceTransformer
import torch

model =  SentenceTransformer("nvidia/NV-Embed-v2", trust_remote_code=True, model_kwargs={"torch_dtype": torch.float16}, device="mps")

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [4]:
import os
from embedding import build_vector_db

vector_db_dir = "/Users/jy006/Documents/Code/BioMedGPS/paper-parser/benchmarks/antibody/pmc_embedding/test_rocksdb_no_speedup"
batch_size = 8
num_documents = 100

build_vector_db(
    cache_filepath=vector_db_dir,
    raw_document_path=parquet_file,
    # model_name="nvidia/NV-Embed-v2",
    model=model,
    page_content_column="text",
    metadata_columns=["pmid", "pmc", "doi", "pubdate"],
    num_documents=num_documents,
    batch_size=batch_size,
    allow_batch_mode=True
)

Loading documents...
__init__ took 0.00 milliseconds
load took 0.00 milliseconds
Initializing vector database...
Loading model...
Initializing cache database...
__init__ took 68.21 milliseconds
Computing embeddings and inserting into vector database...
Starting multi-process pool...
Processing batch 1
Time taken to check: 0.0017609596252441406
Missing 0 texts, 8 cached
Processing batch 2
Time taken to check: 0.0004668235778808594
Missing 0 texts, 8 cached
Processing batch 3
Time taken to check: 0.0005509853363037109
Missing 0 texts, 8 cached
Processing batch 4
Time taken to check: 6.818771362304688e-05
Missing 8 texts, 0 cached


  self.gen = func(*args, **kwds)
  self.gen = func(*args, **kwds)
  self.gen = func(*args, **kwds)
  self.gen = func(*args, **kwds)


Time taken to encode: 481.90996074676514
Time taken to put: 0.011041879653930664
Time taken to release GPU memory: 0.00011491775512695312
Processing batch 5
Time taken to check: 6.508827209472656e-05
Missing 8 texts, 0 cached


KeyboardInterrupt: 

### Run all-MiniLM-L6-v2

In [3]:
import os
from embedding import build_vector_db

vector_db_dir = "/Users/jy006/Documents/Code/BioMedGPS/paper-parser/benchmarks/antibody/pmc_embedding/test_rocksdb_no_speedup_1"
batch_size = 8
num_documents = 0

build_vector_db(
    cache_filepath=vector_db_dir,
    raw_document_path=parquet_file,
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    # model=model,
    page_content_column="text",
    metadata_columns=["pmid", "pmc", "doi", "pubdate"],
    num_documents=num_documents,
    batch_size=batch_size,
    allow_batch_mode=True
)

2025-01-25 09:53:37,695 - INFO - Loading documents...
INFO:embedding:Loading documents...
2025-01-25 09:53:37,696 - INFO - __init__ took 0.00 milliseconds
INFO:embedding:__init__ took 0.00 milliseconds
2025-01-25 09:53:37,696 - INFO - load took 0.00 milliseconds
INFO:embedding:load took 0.00 milliseconds
2025-01-25 09:53:37,697 - INFO - Initializing vector database...
INFO:embedding:Initializing vector database...
2025-01-25 09:53:37,698 - INFO - Loading model...
INFO:embedding:Loading model...
INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: mps
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2
2025-01-25 09:53:38,537 - INFO - Initializing cache database...
INFO:embedding:Initializing cache database...
2025-01-25 09:53:38,592 - INFO - __init__ took 894.19 milliseconds
INFO:embedding:__init__ took 894.19 milliseconds
2025-01-25 09:53:38,593 - INFO - Computing embeddings and inserting into ve

Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:47,275 - INFO - Time taken to encode: 0.10180878639221191
INFO:embedding:Time taken to encode: 0.10180878639221191
2025-01-25 09:53:47,277 - INFO - Time taken to put: 0.0017070770263671875
INFO:embedding:Time taken to put: 0.0017070770263671875
2025-01-25 09:53:47,278 - INFO - Time taken to release GPU memory: 2.9087066650390625e-05
INFO:embedding:Time taken to release GPU memory: 2.9087066650390625e-05
2025-01-25 09:53:47,282 - INFO - 
Processing batch 2
INFO:embedding:
Processing batch 2
2025-01-25 09:53:47,283 - INFO - Time taken to check: 0.00023174285888671875
INFO:embedding:Time taken to check: 0.00023174285888671875
2025-01-25 09:53:47,283 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:47,377 - INFO - Time taken to encode: 0.09264302253723145
INFO:embedding:Time taken to encode: 0.09264302253723145
2025-01-25 09:53:47,380 - INFO - Time taken to put: 0.002841949462890625
INFO:embedding:Time taken to put: 0.002841949462890625
2025-01-25 09:53:47,381 - INFO - Time taken to release GPU memory: 1.7881393432617188e-05
INFO:embedding:Time taken to release GPU memory: 1.7881393432617188e-05
2025-01-25 09:53:47,384 - INFO - 
Processing batch 3
INFO:embedding:
Processing batch 3
2025-01-25 09:53:47,385 - INFO - Time taken to check: 0.0002639293670654297
INFO:embedding:Time taken to check: 0.0002639293670654297
2025-01-25 09:53:47,386 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:47,537 - INFO - Time taken to encode: 0.15041494369506836
INFO:embedding:Time taken to encode: 0.15041494369506836
2025-01-25 09:53:47,540 - INFO - Time taken to put: 0.0024480819702148438
INFO:embedding:Time taken to put: 0.0024480819702148438
2025-01-25 09:53:47,541 - INFO - Time taken to release GPU memory: 1.5735626220703125e-05
INFO:embedding:Time taken to release GPU memory: 1.5735626220703125e-05
2025-01-25 09:53:47,543 - INFO - 
Processing batch 4
INFO:embedding:
Processing batch 4
2025-01-25 09:53:47,544 - INFO - Time taken to check: 0.0007958412170410156
INFO:embedding:Time taken to check: 0.0007958412170410156
2025-01-25 09:53:47,545 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:47,654 - INFO - Time taken to encode: 0.10814714431762695
INFO:embedding:Time taken to encode: 0.10814714431762695
2025-01-25 09:53:47,656 - INFO - Time taken to put: 0.0020210742950439453
INFO:embedding:Time taken to put: 0.0020210742950439453
2025-01-25 09:53:47,657 - INFO - Time taken to release GPU memory: 1.621246337890625e-05
INFO:embedding:Time taken to release GPU memory: 1.621246337890625e-05
2025-01-25 09:53:47,659 - INFO - 
Processing batch 5
INFO:embedding:
Processing batch 5
2025-01-25 09:53:47,660 - INFO - Time taken to check: 0.00031113624572753906
INFO:embedding:Time taken to check: 0.00031113624572753906
2025-01-25 09:53:47,661 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:47,757 - INFO - Time taken to encode: 0.09605622291564941
INFO:embedding:Time taken to encode: 0.09605622291564941
2025-01-25 09:53:47,760 - INFO - Time taken to put: 0.002229928970336914
INFO:embedding:Time taken to put: 0.002229928970336914
2025-01-25 09:53:47,761 - INFO - Time taken to release GPU memory: 1.5020370483398438e-05
INFO:embedding:Time taken to release GPU memory: 1.5020370483398438e-05
2025-01-25 09:53:47,763 - INFO - 
Processing batch 6
INFO:embedding:
Processing batch 6
2025-01-25 09:53:47,764 - INFO - Time taken to check: 0.0002167224884033203
INFO:embedding:Time taken to check: 0.0002167224884033203
2025-01-25 09:53:47,765 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:47,895 - INFO - Time taken to encode: 0.12892603874206543
INFO:embedding:Time taken to encode: 0.12892603874206543
2025-01-25 09:53:47,898 - INFO - Time taken to put: 0.0025179386138916016
INFO:embedding:Time taken to put: 0.0025179386138916016
2025-01-25 09:53:47,899 - INFO - Time taken to release GPU memory: 1.71661376953125e-05
INFO:embedding:Time taken to release GPU memory: 1.71661376953125e-05
2025-01-25 09:53:47,901 - INFO - 
Processing batch 7
INFO:embedding:
Processing batch 7
2025-01-25 09:53:47,902 - INFO - Time taken to check: 0.00020623207092285156
INFO:embedding:Time taken to check: 0.00020623207092285156
2025-01-25 09:53:47,903 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:47,995 - INFO - Time taken to encode: 0.09137392044067383
INFO:embedding:Time taken to encode: 0.09137392044067383
2025-01-25 09:53:47,997 - INFO - Time taken to put: 0.0016429424285888672
INFO:embedding:Time taken to put: 0.0016429424285888672
2025-01-25 09:53:47,998 - INFO - Time taken to release GPU memory: 1.5974044799804688e-05
INFO:embedding:Time taken to release GPU memory: 1.5974044799804688e-05
2025-01-25 09:53:48,001 - INFO - 
Processing batch 8
INFO:embedding:
Processing batch 8
2025-01-25 09:53:48,001 - INFO - Time taken to check: 0.00015115737915039062
INFO:embedding:Time taken to check: 0.00015115737915039062
2025-01-25 09:53:48,002 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:48,108 - INFO - Time taken to encode: 0.1048440933227539
INFO:embedding:Time taken to encode: 0.1048440933227539
2025-01-25 09:53:48,110 - INFO - Time taken to put: 0.0014369487762451172
INFO:embedding:Time taken to put: 0.0014369487762451172
2025-01-25 09:53:48,111 - INFO - Time taken to release GPU memory: 1.5974044799804688e-05
INFO:embedding:Time taken to release GPU memory: 1.5974044799804688e-05
2025-01-25 09:53:48,113 - INFO - 
Processing batch 9
INFO:embedding:
Processing batch 9
2025-01-25 09:53:48,114 - INFO - Time taken to check: 0.0003230571746826172
INFO:embedding:Time taken to check: 0.0003230571746826172
2025-01-25 09:53:48,115 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:48,265 - INFO - Time taken to encode: 0.1496272087097168
INFO:embedding:Time taken to encode: 0.1496272087097168
2025-01-25 09:53:48,267 - INFO - Time taken to put: 0.0013933181762695312
INFO:embedding:Time taken to put: 0.0013933181762695312
2025-01-25 09:53:48,268 - INFO - Time taken to release GPU memory: 1.4781951904296875e-05
INFO:embedding:Time taken to release GPU memory: 1.4781951904296875e-05
2025-01-25 09:53:48,270 - INFO - 
Processing batch 10
INFO:embedding:
Processing batch 10
2025-01-25 09:53:48,271 - INFO - Time taken to check: 0.0003571510314941406
INFO:embedding:Time taken to check: 0.0003571510314941406
2025-01-25 09:53:48,271 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:48,364 - INFO - Time taken to encode: 0.09194707870483398
INFO:embedding:Time taken to encode: 0.09194707870483398
2025-01-25 09:53:48,366 - INFO - Time taken to put: 0.0015299320220947266
INFO:embedding:Time taken to put: 0.0015299320220947266
2025-01-25 09:53:48,367 - INFO - Time taken to release GPU memory: 2.7894973754882812e-05
INFO:embedding:Time taken to release GPU memory: 2.7894973754882812e-05
2025-01-25 09:53:48,369 - INFO - 
Processing batch 11
INFO:embedding:
Processing batch 11
2025-01-25 09:53:48,370 - INFO - Time taken to check: 0.00035500526428222656
INFO:embedding:Time taken to check: 0.00035500526428222656
2025-01-25 09:53:48,371 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:48,457 - INFO - Time taken to encode: 0.08608007431030273
INFO:embedding:Time taken to encode: 0.08608007431030273
2025-01-25 09:53:48,460 - INFO - Time taken to put: 0.00173187255859375
INFO:embedding:Time taken to put: 0.00173187255859375
2025-01-25 09:53:48,461 - INFO - Time taken to release GPU memory: 1.5020370483398438e-05
INFO:embedding:Time taken to release GPU memory: 1.5020370483398438e-05
2025-01-25 09:53:48,463 - INFO - 
Processing batch 12
INFO:embedding:
Processing batch 12
2025-01-25 09:53:48,463 - INFO - Time taken to check: 0.0002989768981933594
INFO:embedding:Time taken to check: 0.0002989768981933594
2025-01-25 09:53:48,464 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:48,614 - INFO - Time taken to encode: 0.1484518051147461
INFO:embedding:Time taken to encode: 0.1484518051147461
2025-01-25 09:53:48,617 - INFO - Time taken to put: 0.002101898193359375
INFO:embedding:Time taken to put: 0.002101898193359375
2025-01-25 09:53:48,617 - INFO - Time taken to release GPU memory: 1.6927719116210938e-05
INFO:embedding:Time taken to release GPU memory: 1.6927719116210938e-05
2025-01-25 09:53:48,619 - INFO - 
Processing batch 13
INFO:embedding:
Processing batch 13
2025-01-25 09:53:48,620 - INFO - Time taken to check: 9.608268737792969e-05
INFO:embedding:Time taken to check: 9.608268737792969e-05
2025-01-25 09:53:48,620 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:48,714 - INFO - Time taken to encode: 0.0925896167755127
INFO:embedding:Time taken to encode: 0.0925896167755127
2025-01-25 09:53:48,716 - INFO - Time taken to put: 0.0013971328735351562
INFO:embedding:Time taken to put: 0.0013971328735351562
2025-01-25 09:53:48,717 - INFO - Time taken to release GPU memory: 1.9073486328125e-05
INFO:embedding:Time taken to release GPU memory: 1.9073486328125e-05
2025-01-25 09:53:48,719 - INFO - 
Processing batch 14
INFO:embedding:
Processing batch 14
2025-01-25 09:53:48,719 - INFO - Time taken to check: 0.0002460479736328125
INFO:embedding:Time taken to check: 0.0002460479736328125
2025-01-25 09:53:48,720 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:48,814 - INFO - Time taken to encode: 0.09287381172180176
INFO:embedding:Time taken to encode: 0.09287381172180176
2025-01-25 09:53:48,816 - INFO - Time taken to put: 0.0014400482177734375
INFO:embedding:Time taken to put: 0.0014400482177734375
2025-01-25 09:53:48,816 - INFO - Time taken to release GPU memory: 1.621246337890625e-05
INFO:embedding:Time taken to release GPU memory: 1.621246337890625e-05
2025-01-25 09:53:48,819 - INFO - 
Processing batch 15
INFO:embedding:
Processing batch 15
2025-01-25 09:53:48,819 - INFO - Time taken to check: 0.00019884109497070312
INFO:embedding:Time taken to check: 0.00019884109497070312
2025-01-25 09:53:48,820 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:48,957 - INFO - Time taken to encode: 0.13676881790161133
INFO:embedding:Time taken to encode: 0.13676881790161133
2025-01-25 09:53:48,960 - INFO - Time taken to put: 0.0016810894012451172
INFO:embedding:Time taken to put: 0.0016810894012451172
2025-01-25 09:53:48,961 - INFO - Time taken to release GPU memory: 1.5020370483398438e-05
INFO:embedding:Time taken to release GPU memory: 1.5020370483398438e-05
2025-01-25 09:53:48,963 - INFO - 
Processing batch 16
INFO:embedding:
Processing batch 16
2025-01-25 09:53:48,964 - INFO - Time taken to check: 0.0006840229034423828
INFO:embedding:Time taken to check: 0.0006840229034423828
2025-01-25 09:53:48,965 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:49,080 - INFO - Time taken to encode: 0.11445403099060059
INFO:embedding:Time taken to encode: 0.11445403099060059
2025-01-25 09:53:49,084 - INFO - Time taken to put: 0.0023040771484375
INFO:embedding:Time taken to put: 0.0023040771484375
2025-01-25 09:53:49,084 - INFO - Time taken to release GPU memory: 1.621246337890625e-05
INFO:embedding:Time taken to release GPU memory: 1.621246337890625e-05
2025-01-25 09:53:49,086 - INFO - 
Processing batch 17
INFO:embedding:
Processing batch 17
2025-01-25 09:53:49,087 - INFO - Time taken to check: 0.0005788803100585938
INFO:embedding:Time taken to check: 0.0005788803100585938
2025-01-25 09:53:49,088 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:49,192 - INFO - Time taken to encode: 0.1031501293182373
INFO:embedding:Time taken to encode: 0.1031501293182373
2025-01-25 09:53:49,195 - INFO - Time taken to put: 0.0022590160369873047
INFO:embedding:Time taken to put: 0.0022590160369873047
2025-01-25 09:53:49,195 - INFO - Time taken to release GPU memory: 1.5020370483398438e-05
INFO:embedding:Time taken to release GPU memory: 1.5020370483398438e-05
2025-01-25 09:53:49,197 - INFO - 
Processing batch 18
INFO:embedding:
Processing batch 18
2025-01-25 09:53:49,199 - INFO - Time taken to check: 0.0005476474761962891
INFO:embedding:Time taken to check: 0.0005476474761962891
2025-01-25 09:53:49,199 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:49,354 - INFO - Time taken to encode: 0.1537799835205078
INFO:embedding:Time taken to encode: 0.1537799835205078
2025-01-25 09:53:49,356 - INFO - Time taken to put: 0.0015439987182617188
INFO:embedding:Time taken to put: 0.0015439987182617188
2025-01-25 09:53:49,357 - INFO - Time taken to release GPU memory: 1.621246337890625e-05
INFO:embedding:Time taken to release GPU memory: 1.621246337890625e-05
2025-01-25 09:53:49,359 - INFO - 
Processing batch 19
INFO:embedding:
Processing batch 19
2025-01-25 09:53:49,360 - INFO - Time taken to check: 0.00018906593322753906
INFO:embedding:Time taken to check: 0.00018906593322753906
2025-01-25 09:53:49,361 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:49,457 - INFO - Time taken to encode: 0.09605813026428223
INFO:embedding:Time taken to encode: 0.09605813026428223
2025-01-25 09:53:49,460 - INFO - Time taken to put: 0.0015938282012939453
INFO:embedding:Time taken to put: 0.0015938282012939453
2025-01-25 09:53:49,460 - INFO - Time taken to release GPU memory: 1.4781951904296875e-05
INFO:embedding:Time taken to release GPU memory: 1.4781951904296875e-05
2025-01-25 09:53:49,462 - INFO - 
Processing batch 20
INFO:embedding:
Processing batch 20
2025-01-25 09:53:49,463 - INFO - Time taken to check: 0.00014209747314453125
INFO:embedding:Time taken to check: 0.00014209747314453125
2025-01-25 09:53:49,464 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:49,570 - INFO - Time taken to encode: 0.10535383224487305
INFO:embedding:Time taken to encode: 0.10535383224487305
2025-01-25 09:53:49,572 - INFO - Time taken to put: 0.0013811588287353516
INFO:embedding:Time taken to put: 0.0013811588287353516
2025-01-25 09:53:49,573 - INFO - Time taken to release GPU memory: 1.621246337890625e-05
INFO:embedding:Time taken to release GPU memory: 1.621246337890625e-05
2025-01-25 09:53:49,575 - INFO - 
Processing batch 21
INFO:embedding:
Processing batch 21
2025-01-25 09:53:49,576 - INFO - Time taken to check: 0.00037407875061035156
INFO:embedding:Time taken to check: 0.00037407875061035156
2025-01-25 09:53:49,576 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:49,725 - INFO - Time taken to encode: 0.14765191078186035
INFO:embedding:Time taken to encode: 0.14765191078186035
2025-01-25 09:53:49,727 - INFO - Time taken to put: 0.0014958381652832031
INFO:embedding:Time taken to put: 0.0014958381652832031
2025-01-25 09:53:49,728 - INFO - Time taken to release GPU memory: 1.7881393432617188e-05
INFO:embedding:Time taken to release GPU memory: 1.7881393432617188e-05
2025-01-25 09:53:49,730 - INFO - 
Processing batch 22
INFO:embedding:
Processing batch 22
2025-01-25 09:53:49,731 - INFO - Time taken to check: 0.0006480216979980469
INFO:embedding:Time taken to check: 0.0006480216979980469
2025-01-25 09:53:49,732 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:49,843 - INFO - Time taken to encode: 0.11112308502197266
INFO:embedding:Time taken to encode: 0.11112308502197266
2025-01-25 09:53:49,846 - INFO - Time taken to put: 0.0018031597137451172
INFO:embedding:Time taken to put: 0.0018031597137451172
2025-01-25 09:53:49,847 - INFO - Time taken to release GPU memory: 1.811981201171875e-05
INFO:embedding:Time taken to release GPU memory: 1.811981201171875e-05
2025-01-25 09:53:49,849 - INFO - 
Processing batch 23
INFO:embedding:
Processing batch 23
2025-01-25 09:53:49,850 - INFO - Time taken to check: 0.0002372264862060547
INFO:embedding:Time taken to check: 0.0002372264862060547
2025-01-25 09:53:49,851 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:49,954 - INFO - Time taken to encode: 0.10243368148803711
INFO:embedding:Time taken to encode: 0.10243368148803711
2025-01-25 09:53:49,957 - INFO - Time taken to put: 0.002043008804321289
INFO:embedding:Time taken to put: 0.002043008804321289
2025-01-25 09:53:49,957 - INFO - Time taken to release GPU memory: 1.811981201171875e-05
INFO:embedding:Time taken to release GPU memory: 1.811981201171875e-05
2025-01-25 09:53:49,960 - INFO - 
Processing batch 24
INFO:embedding:
Processing batch 24
2025-01-25 09:53:49,961 - INFO - Time taken to check: 0.00013899803161621094
INFO:embedding:Time taken to check: 0.00013899803161621094
2025-01-25 09:53:49,961 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:50,093 - INFO - Time taken to encode: 0.13101410865783691
INFO:embedding:Time taken to encode: 0.13101410865783691
2025-01-25 09:53:50,095 - INFO - Time taken to put: 0.0013458728790283203
INFO:embedding:Time taken to put: 0.0013458728790283203
2025-01-25 09:53:50,096 - INFO - Time taken to release GPU memory: 1.6927719116210938e-05
INFO:embedding:Time taken to release GPU memory: 1.6927719116210938e-05
2025-01-25 09:53:50,098 - INFO - 
Processing batch 25
INFO:embedding:
Processing batch 25
2025-01-25 09:53:50,099 - INFO - Time taken to check: 0.00026798248291015625
INFO:embedding:Time taken to check: 0.00026798248291015625
2025-01-25 09:53:50,100 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:50,190 - INFO - Time taken to encode: 0.08985519409179688
INFO:embedding:Time taken to encode: 0.08985519409179688
2025-01-25 09:53:50,192 - INFO - Time taken to put: 0.001413106918334961
INFO:embedding:Time taken to put: 0.001413106918334961
2025-01-25 09:53:50,193 - INFO - Time taken to release GPU memory: 1.5974044799804688e-05
INFO:embedding:Time taken to release GPU memory: 1.5974044799804688e-05
2025-01-25 09:53:50,195 - INFO - 
Processing batch 26
INFO:embedding:
Processing batch 26
2025-01-25 09:53:50,196 - INFO - Time taken to check: 0.00019598007202148438
INFO:embedding:Time taken to check: 0.00019598007202148438
2025-01-25 09:53:50,197 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:50,340 - INFO - Time taken to encode: 0.14290118217468262
INFO:embedding:Time taken to encode: 0.14290118217468262
2025-01-25 09:53:50,343 - INFO - Time taken to put: 0.0015397071838378906
INFO:embedding:Time taken to put: 0.0015397071838378906
2025-01-25 09:53:50,343 - INFO - Time taken to release GPU memory: 1.5020370483398438e-05
INFO:embedding:Time taken to release GPU memory: 1.5020370483398438e-05
2025-01-25 09:53:50,345 - INFO - 
Processing batch 27
INFO:embedding:
Processing batch 27
2025-01-25 09:53:50,346 - INFO - Time taken to check: 0.00017595291137695312
INFO:embedding:Time taken to check: 0.00017595291137695312
2025-01-25 09:53:50,347 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:50,449 - INFO - Time taken to encode: 0.10181784629821777
INFO:embedding:Time taken to encode: 0.10181784629821777
2025-01-25 09:53:50,452 - INFO - Time taken to put: 0.0022428035736083984
INFO:embedding:Time taken to put: 0.0022428035736083984
2025-01-25 09:53:50,453 - INFO - Time taken to release GPU memory: 2.09808349609375e-05
INFO:embedding:Time taken to release GPU memory: 2.09808349609375e-05
2025-01-25 09:53:50,455 - INFO - 
Processing batch 28
INFO:embedding:
Processing batch 28
2025-01-25 09:53:50,456 - INFO - Time taken to check: 0.0002548694610595703
INFO:embedding:Time taken to check: 0.0002548694610595703
2025-01-25 09:53:50,457 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:50,556 - INFO - Time taken to encode: 0.09830498695373535
INFO:embedding:Time taken to encode: 0.09830498695373535
2025-01-25 09:53:50,558 - INFO - Time taken to put: 0.001615762710571289
INFO:embedding:Time taken to put: 0.001615762710571289
2025-01-25 09:53:50,559 - INFO - Time taken to release GPU memory: 1.5974044799804688e-05
INFO:embedding:Time taken to release GPU memory: 1.5974044799804688e-05
2025-01-25 09:53:50,562 - INFO - 
Processing batch 29
INFO:embedding:
Processing batch 29
2025-01-25 09:53:50,562 - INFO - Time taken to check: 0.00021576881408691406
INFO:embedding:Time taken to check: 0.00021576881408691406
2025-01-25 09:53:50,563 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:50,693 - INFO - Time taken to encode: 0.12963509559631348
INFO:embedding:Time taken to encode: 0.12963509559631348
2025-01-25 09:53:50,696 - INFO - Time taken to put: 0.0016522407531738281
INFO:embedding:Time taken to put: 0.0016522407531738281
2025-01-25 09:53:50,696 - INFO - Time taken to release GPU memory: 1.71661376953125e-05
INFO:embedding:Time taken to release GPU memory: 1.71661376953125e-05
2025-01-25 09:53:50,699 - INFO - 
Processing batch 30
INFO:embedding:
Processing batch 30
2025-01-25 09:53:50,700 - INFO - Time taken to check: 0.00010013580322265625
INFO:embedding:Time taken to check: 0.00010013580322265625
2025-01-25 09:53:50,701 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:50,790 - INFO - Time taken to encode: 0.08865714073181152
INFO:embedding:Time taken to encode: 0.08865714073181152
2025-01-25 09:53:50,793 - INFO - Time taken to put: 0.0013909339904785156
INFO:embedding:Time taken to put: 0.0013909339904785156
2025-01-25 09:53:50,793 - INFO - Time taken to release GPU memory: 1.6927719116210938e-05
INFO:embedding:Time taken to release GPU memory: 1.6927719116210938e-05
2025-01-25 09:53:50,795 - INFO - 
Processing batch 31
INFO:embedding:
Processing batch 31
2025-01-25 09:53:50,796 - INFO - Time taken to check: 0.00037407875061035156
INFO:embedding:Time taken to check: 0.00037407875061035156
2025-01-25 09:53:50,797 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:50,889 - INFO - Time taken to encode: 0.09105777740478516
INFO:embedding:Time taken to encode: 0.09105777740478516
2025-01-25 09:53:50,892 - INFO - Time taken to put: 0.0017049312591552734
INFO:embedding:Time taken to put: 0.0017049312591552734
2025-01-25 09:53:50,892 - INFO - Time taken to release GPU memory: 1.5974044799804688e-05
INFO:embedding:Time taken to release GPU memory: 1.5974044799804688e-05
2025-01-25 09:53:50,895 - INFO - 
Processing batch 32
INFO:embedding:
Processing batch 32
2025-01-25 09:53:50,896 - INFO - Time taken to check: 0.00021123886108398438
INFO:embedding:Time taken to check: 0.00021123886108398438
2025-01-25 09:53:50,897 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:51,034 - INFO - Time taken to encode: 0.13643312454223633
INFO:embedding:Time taken to encode: 0.13643312454223633
2025-01-25 09:53:51,036 - INFO - Time taken to put: 0.001524209976196289
INFO:embedding:Time taken to put: 0.001524209976196289
2025-01-25 09:53:51,037 - INFO - Time taken to release GPU memory: 2.002716064453125e-05
INFO:embedding:Time taken to release GPU memory: 2.002716064453125e-05
2025-01-25 09:53:51,039 - INFO - 
Processing batch 33
INFO:embedding:
Processing batch 33
2025-01-25 09:53:51,040 - INFO - Time taken to check: 0.0002009868621826172
INFO:embedding:Time taken to check: 0.0002009868621826172
2025-01-25 09:53:51,040 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:51,136 - INFO - Time taken to encode: 0.09482502937316895
INFO:embedding:Time taken to encode: 0.09482502937316895
2025-01-25 09:53:51,138 - INFO - Time taken to put: 0.0016551017761230469
INFO:embedding:Time taken to put: 0.0016551017761230469
2025-01-25 09:53:51,139 - INFO - Time taken to release GPU memory: 1.6927719116210938e-05
INFO:embedding:Time taken to release GPU memory: 1.6927719116210938e-05
2025-01-25 09:53:51,141 - INFO - 
Processing batch 34
INFO:embedding:
Processing batch 34
2025-01-25 09:53:51,142 - INFO - Time taken to check: 0.0001671314239501953
INFO:embedding:Time taken to check: 0.0001671314239501953
2025-01-25 09:53:51,143 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:51,221 - INFO - Time taken to encode: 0.07823419570922852
INFO:embedding:Time taken to encode: 0.07823419570922852
2025-01-25 09:53:51,224 - INFO - Time taken to put: 0.001786947250366211
INFO:embedding:Time taken to put: 0.001786947250366211
2025-01-25 09:53:51,225 - INFO - Time taken to release GPU memory: 1.5974044799804688e-05
INFO:embedding:Time taken to release GPU memory: 1.5974044799804688e-05
2025-01-25 09:53:51,227 - INFO - 
Processing batch 35
INFO:embedding:
Processing batch 35
2025-01-25 09:53:51,228 - INFO - Time taken to check: 0.00010204315185546875
INFO:embedding:Time taken to check: 0.00010204315185546875
2025-01-25 09:53:51,228 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:51,358 - INFO - Time taken to encode: 0.12914204597473145
INFO:embedding:Time taken to encode: 0.12914204597473145
2025-01-25 09:53:51,360 - INFO - Time taken to put: 0.001409769058227539
INFO:embedding:Time taken to put: 0.001409769058227539
2025-01-25 09:53:51,361 - INFO - Time taken to release GPU memory: 1.5020370483398438e-05
INFO:embedding:Time taken to release GPU memory: 1.5020370483398438e-05
2025-01-25 09:53:51,363 - INFO - 
Processing batch 36
INFO:embedding:
Processing batch 36
2025-01-25 09:53:51,364 - INFO - Time taken to check: 0.0005090236663818359
INFO:embedding:Time taken to check: 0.0005090236663818359
2025-01-25 09:53:51,365 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:51,475 - INFO - Time taken to encode: 0.10943412780761719
INFO:embedding:Time taken to encode: 0.10943412780761719
2025-01-25 09:53:51,478 - INFO - Time taken to put: 0.0017440319061279297
INFO:embedding:Time taken to put: 0.0017440319061279297
2025-01-25 09:53:51,479 - INFO - Time taken to release GPU memory: 1.5974044799804688e-05
INFO:embedding:Time taken to release GPU memory: 1.5974044799804688e-05
2025-01-25 09:53:51,482 - INFO - 
Processing batch 37
INFO:embedding:
Processing batch 37
2025-01-25 09:53:51,483 - INFO - Time taken to check: 0.0003299713134765625
INFO:embedding:Time taken to check: 0.0003299713134765625
2025-01-25 09:53:51,483 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:51,585 - INFO - Time taken to encode: 0.10094070434570312
INFO:embedding:Time taken to encode: 0.10094070434570312
2025-01-25 09:53:51,587 - INFO - Time taken to put: 0.0014870166778564453
INFO:embedding:Time taken to put: 0.0014870166778564453
2025-01-25 09:53:51,588 - INFO - Time taken to release GPU memory: 1.6927719116210938e-05
INFO:embedding:Time taken to release GPU memory: 1.6927719116210938e-05
2025-01-25 09:53:51,590 - INFO - 
Processing batch 38
INFO:embedding:
Processing batch 38
2025-01-25 09:53:51,591 - INFO - Time taken to check: 0.00016498565673828125
INFO:embedding:Time taken to check: 0.00016498565673828125
2025-01-25 09:53:51,591 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:51,731 - INFO - Time taken to encode: 0.13857007026672363
INFO:embedding:Time taken to encode: 0.13857007026672363
2025-01-25 09:53:51,733 - INFO - Time taken to put: 0.00145721435546875
INFO:embedding:Time taken to put: 0.00145721435546875
2025-01-25 09:53:51,734 - INFO - Time taken to release GPU memory: 1.7881393432617188e-05
INFO:embedding:Time taken to release GPU memory: 1.7881393432617188e-05
2025-01-25 09:53:51,736 - INFO - 
Processing batch 39
INFO:embedding:
Processing batch 39
2025-01-25 09:53:51,737 - INFO - Time taken to check: 0.00010895729064941406
INFO:embedding:Time taken to check: 0.00010895729064941406
2025-01-25 09:53:51,737 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:51,837 - INFO - Time taken to encode: 0.09939026832580566
INFO:embedding:Time taken to encode: 0.09939026832580566
2025-01-25 09:53:51,840 - INFO - Time taken to put: 0.0018699169158935547
INFO:embedding:Time taken to put: 0.0018699169158935547
2025-01-25 09:53:51,841 - INFO - Time taken to release GPU memory: 1.6927719116210938e-05
INFO:embedding:Time taken to release GPU memory: 1.6927719116210938e-05
2025-01-25 09:53:51,843 - INFO - 
Processing batch 40
INFO:embedding:
Processing batch 40
2025-01-25 09:53:51,844 - INFO - Time taken to check: 0.00011014938354492188
INFO:embedding:Time taken to check: 0.00011014938354492188
2025-01-25 09:53:51,845 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:51,933 - INFO - Time taken to encode: 0.08761715888977051
INFO:embedding:Time taken to encode: 0.08761715888977051
2025-01-25 09:53:51,935 - INFO - Time taken to put: 0.0014088153839111328
INFO:embedding:Time taken to put: 0.0014088153839111328
2025-01-25 09:53:51,936 - INFO - Time taken to release GPU memory: 1.811981201171875e-05
INFO:embedding:Time taken to release GPU memory: 1.811981201171875e-05
2025-01-25 09:53:51,938 - INFO - 
Processing batch 41
INFO:embedding:
Processing batch 41
2025-01-25 09:53:51,939 - INFO - Time taken to check: 0.0001289844512939453
INFO:embedding:Time taken to check: 0.0001289844512939453
2025-01-25 09:53:51,940 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:52,074 - INFO - Time taken to encode: 0.13344931602478027
INFO:embedding:Time taken to encode: 0.13344931602478027
2025-01-25 09:53:52,076 - INFO - Time taken to put: 0.0014338493347167969
INFO:embedding:Time taken to put: 0.0014338493347167969
2025-01-25 09:53:52,077 - INFO - Time taken to release GPU memory: 1.71661376953125e-05
INFO:embedding:Time taken to release GPU memory: 1.71661376953125e-05
2025-01-25 09:53:52,079 - INFO - 
Processing batch 42
INFO:embedding:
Processing batch 42
2025-01-25 09:53:52,080 - INFO - Time taken to check: 0.000164031982421875
INFO:embedding:Time taken to check: 0.000164031982421875
2025-01-25 09:53:52,081 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:52,164 - INFO - Time taken to encode: 0.08268189430236816
INFO:embedding:Time taken to encode: 0.08268189430236816
2025-01-25 09:53:52,167 - INFO - Time taken to put: 0.0018808841705322266
INFO:embedding:Time taken to put: 0.0018808841705322266
2025-01-25 09:53:52,168 - INFO - Time taken to release GPU memory: 1.6927719116210938e-05
INFO:embedding:Time taken to release GPU memory: 1.6927719116210938e-05
2025-01-25 09:53:52,170 - INFO - 
Processing batch 43
INFO:embedding:
Processing batch 43
2025-01-25 09:53:52,171 - INFO - Time taken to check: 0.00015997886657714844
INFO:embedding:Time taken to check: 0.00015997886657714844
2025-01-25 09:53:52,171 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:52,276 - INFO - Time taken to encode: 0.10394597053527832
INFO:embedding:Time taken to encode: 0.10394597053527832
2025-01-25 09:53:52,278 - INFO - Time taken to put: 0.00138092041015625
INFO:embedding:Time taken to put: 0.00138092041015625
2025-01-25 09:53:52,279 - INFO - Time taken to release GPU memory: 1.811981201171875e-05
INFO:embedding:Time taken to release GPU memory: 1.811981201171875e-05
2025-01-25 09:53:52,281 - INFO - 
Processing batch 44
INFO:embedding:
Processing batch 44
2025-01-25 09:53:52,282 - INFO - Time taken to check: 0.00041413307189941406
INFO:embedding:Time taken to check: 0.00041413307189941406
2025-01-25 09:53:52,283 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:52,422 - INFO - Time taken to encode: 0.13854694366455078
INFO:embedding:Time taken to encode: 0.13854694366455078
2025-01-25 09:53:52,424 - INFO - Time taken to put: 0.0014789104461669922
INFO:embedding:Time taken to put: 0.0014789104461669922
2025-01-25 09:53:52,425 - INFO - Time taken to release GPU memory: 1.6689300537109375e-05
INFO:embedding:Time taken to release GPU memory: 1.6689300537109375e-05
2025-01-25 09:53:52,427 - INFO - 
Processing batch 45
INFO:embedding:
Processing batch 45
2025-01-25 09:53:52,428 - INFO - Time taken to check: 0.00028204917907714844
INFO:embedding:Time taken to check: 0.00028204917907714844
2025-01-25 09:53:52,429 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:52,506 - INFO - Time taken to encode: 0.07672905921936035
INFO:embedding:Time taken to encode: 0.07672905921936035
2025-01-25 09:53:52,509 - INFO - Time taken to put: 0.0015301704406738281
INFO:embedding:Time taken to put: 0.0015301704406738281
2025-01-25 09:53:52,510 - INFO - Time taken to release GPU memory: 1.5974044799804688e-05
INFO:embedding:Time taken to release GPU memory: 1.5974044799804688e-05
2025-01-25 09:53:52,512 - INFO - 
Processing batch 46
INFO:embedding:
Processing batch 46
2025-01-25 09:53:52,513 - INFO - Time taken to check: 0.0001201629638671875
INFO:embedding:Time taken to check: 0.0001201629638671875
2025-01-25 09:53:52,513 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:52,608 - INFO - Time taken to encode: 0.09373211860656738
INFO:embedding:Time taken to encode: 0.09373211860656738
2025-01-25 09:53:52,610 - INFO - Time taken to put: 0.0016102790832519531
INFO:embedding:Time taken to put: 0.0016102790832519531
2025-01-25 09:53:52,611 - INFO - Time taken to release GPU memory: 1.6927719116210938e-05
INFO:embedding:Time taken to release GPU memory: 1.6927719116210938e-05
2025-01-25 09:53:52,613 - INFO - 
Processing batch 47
INFO:embedding:
Processing batch 47
2025-01-25 09:53:52,614 - INFO - Time taken to check: 0.0003402233123779297
INFO:embedding:Time taken to check: 0.0003402233123779297
2025-01-25 09:53:52,614 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:52,741 - INFO - Time taken to encode: 0.12572503089904785
INFO:embedding:Time taken to encode: 0.12572503089904785
2025-01-25 09:53:52,743 - INFO - Time taken to put: 0.0015783309936523438
INFO:embedding:Time taken to put: 0.0015783309936523438
2025-01-25 09:53:52,744 - INFO - Time taken to release GPU memory: 1.811981201171875e-05
INFO:embedding:Time taken to release GPU memory: 1.811981201171875e-05
2025-01-25 09:53:52,746 - INFO - 
Processing batch 48
INFO:embedding:
Processing batch 48
2025-01-25 09:53:52,747 - INFO - Time taken to check: 0.00033473968505859375
INFO:embedding:Time taken to check: 0.00033473968505859375
2025-01-25 09:53:52,748 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:52,855 - INFO - Time taken to encode: 0.10727596282958984
INFO:embedding:Time taken to encode: 0.10727596282958984
2025-01-25 09:53:52,858 - INFO - Time taken to put: 0.0019729137420654297
INFO:embedding:Time taken to put: 0.0019729137420654297
2025-01-25 09:53:52,859 - INFO - Time taken to release GPU memory: 1.8835067749023438e-05
INFO:embedding:Time taken to release GPU memory: 1.8835067749023438e-05
2025-01-25 09:53:52,861 - INFO - 
Processing batch 49
INFO:embedding:
Processing batch 49
2025-01-25 09:53:52,862 - INFO - Time taken to check: 0.00010800361633300781
INFO:embedding:Time taken to check: 0.00010800361633300781
2025-01-25 09:53:52,863 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:52,950 - INFO - Time taken to encode: 0.08668804168701172
INFO:embedding:Time taken to encode: 0.08668804168701172
2025-01-25 09:53:52,952 - INFO - Time taken to put: 0.0013191699981689453
INFO:embedding:Time taken to put: 0.0013191699981689453
2025-01-25 09:53:52,953 - INFO - Time taken to release GPU memory: 1.71661376953125e-05
INFO:embedding:Time taken to release GPU memory: 1.71661376953125e-05
2025-01-25 09:53:52,955 - INFO - 
Processing batch 50
INFO:embedding:
Processing batch 50
2025-01-25 09:53:52,956 - INFO - Time taken to check: 0.00025963783264160156
INFO:embedding:Time taken to check: 0.00025963783264160156
2025-01-25 09:53:52,957 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:53,089 - INFO - Time taken to encode: 0.13088297843933105
INFO:embedding:Time taken to encode: 0.13088297843933105
2025-01-25 09:53:53,091 - INFO - Time taken to put: 0.0015668869018554688
INFO:embedding:Time taken to put: 0.0015668869018554688
2025-01-25 09:53:53,092 - INFO - Time taken to release GPU memory: 1.7881393432617188e-05
INFO:embedding:Time taken to release GPU memory: 1.7881393432617188e-05
2025-01-25 09:53:53,095 - INFO - 
Processing batch 51
INFO:embedding:
Processing batch 51
2025-01-25 09:53:53,095 - INFO - Time taken to check: 0.00018477439880371094
INFO:embedding:Time taken to check: 0.00018477439880371094
2025-01-25 09:53:53,096 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:53,189 - INFO - Time taken to encode: 0.09250617027282715
INFO:embedding:Time taken to encode: 0.09250617027282715
2025-01-25 09:53:53,191 - INFO - Time taken to put: 0.0014116764068603516
INFO:embedding:Time taken to put: 0.0014116764068603516
2025-01-25 09:53:53,192 - INFO - Time taken to release GPU memory: 1.7881393432617188e-05
INFO:embedding:Time taken to release GPU memory: 1.7881393432617188e-05
2025-01-25 09:53:53,194 - INFO - 
Processing batch 52
INFO:embedding:
Processing batch 52
2025-01-25 09:53:53,195 - INFO - Time taken to check: 0.00018978118896484375
INFO:embedding:Time taken to check: 0.00018978118896484375
2025-01-25 09:53:53,196 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:53,290 - INFO - Time taken to encode: 0.0938570499420166
INFO:embedding:Time taken to encode: 0.0938570499420166
2025-01-25 09:53:53,292 - INFO - Time taken to put: 0.001558065414428711
INFO:embedding:Time taken to put: 0.001558065414428711
2025-01-25 09:53:53,293 - INFO - Time taken to release GPU memory: 1.6927719116210938e-05
INFO:embedding:Time taken to release GPU memory: 1.6927719116210938e-05
2025-01-25 09:53:53,295 - INFO - 
Processing batch 53
INFO:embedding:
Processing batch 53
2025-01-25 09:53:53,296 - INFO - Time taken to check: 9.584426879882812e-05
INFO:embedding:Time taken to check: 9.584426879882812e-05
2025-01-25 09:53:53,296 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:53,417 - INFO - Time taken to encode: 0.12034320831298828
INFO:embedding:Time taken to encode: 0.12034320831298828
2025-01-25 09:53:53,419 - INFO - Time taken to put: 0.0014033317565917969
INFO:embedding:Time taken to put: 0.0014033317565917969
2025-01-25 09:53:53,420 - INFO - Time taken to release GPU memory: 3.2901763916015625e-05
INFO:embedding:Time taken to release GPU memory: 3.2901763916015625e-05
2025-01-25 09:53:53,423 - INFO - 
Processing batch 54
INFO:embedding:
Processing batch 54
2025-01-25 09:53:53,424 - INFO - Time taken to check: 0.00015687942504882812
INFO:embedding:Time taken to check: 0.00015687942504882812
2025-01-25 09:53:53,424 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:53,520 - INFO - Time taken to encode: 0.09486913681030273
INFO:embedding:Time taken to encode: 0.09486913681030273
2025-01-25 09:53:53,522 - INFO - Time taken to put: 0.0015790462493896484
INFO:embedding:Time taken to put: 0.0015790462493896484
2025-01-25 09:53:53,523 - INFO - Time taken to release GPU memory: 1.4781951904296875e-05
INFO:embedding:Time taken to release GPU memory: 1.4781951904296875e-05
2025-01-25 09:53:53,525 - INFO - 
Processing batch 55
INFO:embedding:
Processing batch 55
2025-01-25 09:53:53,526 - INFO - Time taken to check: 0.00017499923706054688
INFO:embedding:Time taken to check: 0.00017499923706054688
2025-01-25 09:53:53,526 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:53,617 - INFO - Time taken to encode: 0.09018778800964355
INFO:embedding:Time taken to encode: 0.09018778800964355
2025-01-25 09:53:53,620 - INFO - Time taken to put: 0.001661062240600586
INFO:embedding:Time taken to put: 0.001661062240600586
2025-01-25 09:53:53,620 - INFO - Time taken to release GPU memory: 1.8835067749023438e-05
INFO:embedding:Time taken to release GPU memory: 1.8835067749023438e-05
2025-01-25 09:53:53,622 - INFO - 
Processing batch 56
INFO:embedding:
Processing batch 56
2025-01-25 09:53:53,623 - INFO - Time taken to check: 0.00011992454528808594
INFO:embedding:Time taken to check: 0.00011992454528808594
2025-01-25 09:53:53,624 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:53,730 - INFO - Time taken to encode: 0.10589885711669922
INFO:embedding:Time taken to encode: 0.10589885711669922
2025-01-25 09:53:53,733 - INFO - Time taken to put: 0.0014431476593017578
INFO:embedding:Time taken to put: 0.0014431476593017578
2025-01-25 09:53:53,733 - INFO - Time taken to release GPU memory: 2.09808349609375e-05
INFO:embedding:Time taken to release GPU memory: 2.09808349609375e-05
2025-01-25 09:53:53,735 - INFO - 
Processing batch 57
INFO:embedding:
Processing batch 57
2025-01-25 09:53:53,736 - INFO - Time taken to check: 0.0003101825714111328
INFO:embedding:Time taken to check: 0.0003101825714111328
2025-01-25 09:53:53,737 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:53,841 - INFO - Time taken to encode: 0.10357522964477539
INFO:embedding:Time taken to encode: 0.10357522964477539
2025-01-25 09:53:53,844 - INFO - Time taken to put: 0.00185394287109375
INFO:embedding:Time taken to put: 0.00185394287109375
2025-01-25 09:53:53,845 - INFO - Time taken to release GPU memory: 1.9073486328125e-05
INFO:embedding:Time taken to release GPU memory: 1.9073486328125e-05
2025-01-25 09:53:53,847 - INFO - 
Processing batch 58
INFO:embedding:
Processing batch 58
2025-01-25 09:53:53,848 - INFO - Time taken to check: 0.0005390644073486328
INFO:embedding:Time taken to check: 0.0005390644073486328
2025-01-25 09:53:53,848 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:53,940 - INFO - Time taken to encode: 0.0915980339050293
INFO:embedding:Time taken to encode: 0.0915980339050293
2025-01-25 09:53:53,943 - INFO - Time taken to put: 0.0016701221466064453
INFO:embedding:Time taken to put: 0.0016701221466064453
2025-01-25 09:53:53,943 - INFO - Time taken to release GPU memory: 1.8835067749023438e-05
INFO:embedding:Time taken to release GPU memory: 1.8835067749023438e-05
2025-01-25 09:53:53,946 - INFO - 
Processing batch 59
INFO:embedding:
Processing batch 59
2025-01-25 09:53:53,947 - INFO - Time taken to check: 0.00022077560424804688
INFO:embedding:Time taken to check: 0.00022077560424804688
2025-01-25 09:53:53,948 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:54,090 - INFO - Time taken to encode: 0.14229297637939453
INFO:embedding:Time taken to encode: 0.14229297637939453
2025-01-25 09:53:54,094 - INFO - Time taken to put: 0.0023288726806640625
INFO:embedding:Time taken to put: 0.0023288726806640625
2025-01-25 09:53:54,094 - INFO - Time taken to release GPU memory: 1.8835067749023438e-05
INFO:embedding:Time taken to release GPU memory: 1.8835067749023438e-05
2025-01-25 09:53:54,096 - INFO - 
Processing batch 60
INFO:embedding:
Processing batch 60
2025-01-25 09:53:54,097 - INFO - Time taken to check: 9.107589721679688e-05
INFO:embedding:Time taken to check: 9.107589721679688e-05
2025-01-25 09:53:54,098 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:54,172 - INFO - Time taken to encode: 0.07414698600769043
INFO:embedding:Time taken to encode: 0.07414698600769043
2025-01-25 09:53:54,175 - INFO - Time taken to put: 0.0013551712036132812
INFO:embedding:Time taken to put: 0.0013551712036132812
2025-01-25 09:53:54,175 - INFO - Time taken to release GPU memory: 2.002716064453125e-05
INFO:embedding:Time taken to release GPU memory: 2.002716064453125e-05
2025-01-25 09:53:54,178 - INFO - 
Processing batch 61
INFO:embedding:
Processing batch 61
2025-01-25 09:53:54,179 - INFO - Time taken to check: 0.00028204917907714844
INFO:embedding:Time taken to check: 0.00028204917907714844
2025-01-25 09:53:54,179 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:54,270 - INFO - Time taken to encode: 0.09039926528930664
INFO:embedding:Time taken to encode: 0.09039926528930664
2025-01-25 09:53:54,273 - INFO - Time taken to put: 0.0015881061553955078
INFO:embedding:Time taken to put: 0.0015881061553955078
2025-01-25 09:53:54,273 - INFO - Time taken to release GPU memory: 2.002716064453125e-05
INFO:embedding:Time taken to release GPU memory: 2.002716064453125e-05
2025-01-25 09:53:54,276 - INFO - 
Processing batch 62
INFO:embedding:
Processing batch 62
2025-01-25 09:53:54,276 - INFO - Time taken to check: 0.00023603439331054688
INFO:embedding:Time taken to check: 0.00023603439331054688
2025-01-25 09:53:54,277 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:54,429 - INFO - Time taken to encode: 0.15166997909545898
INFO:embedding:Time taken to encode: 0.15166997909545898
2025-01-25 09:53:54,432 - INFO - Time taken to put: 0.0019137859344482422
INFO:embedding:Time taken to put: 0.0019137859344482422
2025-01-25 09:53:54,433 - INFO - Time taken to release GPU memory: 2.9802322387695312e-05
INFO:embedding:Time taken to release GPU memory: 2.9802322387695312e-05
2025-01-25 09:53:54,435 - INFO - 
Processing batch 63
INFO:embedding:
Processing batch 63
2025-01-25 09:53:54,436 - INFO - Time taken to check: 0.0003299713134765625
INFO:embedding:Time taken to check: 0.0003299713134765625
2025-01-25 09:53:54,437 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:54,539 - INFO - Time taken to encode: 0.10163187980651855
INFO:embedding:Time taken to encode: 0.10163187980651855
2025-01-25 09:53:54,541 - INFO - Time taken to put: 0.0014200210571289062
INFO:embedding:Time taken to put: 0.0014200210571289062
2025-01-25 09:53:54,542 - INFO - Time taken to release GPU memory: 1.71661376953125e-05
INFO:embedding:Time taken to release GPU memory: 1.71661376953125e-05
2025-01-25 09:53:54,544 - INFO - 
Processing batch 64
INFO:embedding:
Processing batch 64
2025-01-25 09:53:54,545 - INFO - Time taken to check: 0.00037789344787597656
INFO:embedding:Time taken to check: 0.00037789344787597656
2025-01-25 09:53:54,546 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:54,629 - INFO - Time taken to encode: 0.08190488815307617
INFO:embedding:Time taken to encode: 0.08190488815307617
2025-01-25 09:53:54,632 - INFO - Time taken to put: 0.002488851547241211
INFO:embedding:Time taken to put: 0.002488851547241211
2025-01-25 09:53:54,633 - INFO - Time taken to release GPU memory: 1.8835067749023438e-05
INFO:embedding:Time taken to release GPU memory: 1.8835067749023438e-05
2025-01-25 09:53:54,635 - INFO - 
Processing batch 65
INFO:embedding:
Processing batch 65
2025-01-25 09:53:54,636 - INFO - Time taken to check: 0.00027632713317871094
INFO:embedding:Time taken to check: 0.00027632713317871094
2025-01-25 09:53:54,636 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:54,778 - INFO - Time taken to encode: 0.1409001350402832
INFO:embedding:Time taken to encode: 0.1409001350402832
2025-01-25 09:53:54,780 - INFO - Time taken to put: 0.0017979145050048828
INFO:embedding:Time taken to put: 0.0017979145050048828
2025-01-25 09:53:54,781 - INFO - Time taken to release GPU memory: 1.7881393432617188e-05
INFO:embedding:Time taken to release GPU memory: 1.7881393432617188e-05
2025-01-25 09:53:54,783 - INFO - 
Processing batch 66
INFO:embedding:
Processing batch 66
2025-01-25 09:53:54,784 - INFO - Time taken to check: 0.0001857280731201172
INFO:embedding:Time taken to check: 0.0001857280731201172
2025-01-25 09:53:54,785 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:54,893 - INFO - Time taken to encode: 0.10709309577941895
INFO:embedding:Time taken to encode: 0.10709309577941895
2025-01-25 09:53:54,895 - INFO - Time taken to put: 0.0014698505401611328
INFO:embedding:Time taken to put: 0.0014698505401611328
2025-01-25 09:53:54,896 - INFO - Time taken to release GPU memory: 1.9073486328125e-05
INFO:embedding:Time taken to release GPU memory: 1.9073486328125e-05
2025-01-25 09:53:54,898 - INFO - 
Processing batch 67
INFO:embedding:
Processing batch 67
2025-01-25 09:53:54,898 - INFO - Time taken to check: 0.00014162063598632812
INFO:embedding:Time taken to check: 0.00014162063598632812
2025-01-25 09:53:54,899 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:54,997 - INFO - Time taken to encode: 0.09756112098693848
INFO:embedding:Time taken to encode: 0.09756112098693848
2025-01-25 09:53:55,000 - INFO - Time taken to put: 0.001363992691040039
INFO:embedding:Time taken to put: 0.001363992691040039
2025-01-25 09:53:55,001 - INFO - Time taken to release GPU memory: 1.8835067749023438e-05
INFO:embedding:Time taken to release GPU memory: 1.8835067749023438e-05
2025-01-25 09:53:55,003 - INFO - 
Processing batch 68
INFO:embedding:
Processing batch 68
2025-01-25 09:53:55,004 - INFO - Time taken to check: 0.00017499923706054688
INFO:embedding:Time taken to check: 0.00017499923706054688
2025-01-25 09:53:55,004 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:55,122 - INFO - Time taken to encode: 0.11756086349487305
INFO:embedding:Time taken to encode: 0.11756086349487305
2025-01-25 09:53:55,125 - INFO - Time taken to put: 0.0014493465423583984
INFO:embedding:Time taken to put: 0.0014493465423583984
2025-01-25 09:53:55,126 - INFO - Time taken to release GPU memory: 1.811981201171875e-05
INFO:embedding:Time taken to release GPU memory: 1.811981201171875e-05
2025-01-25 09:53:55,128 - INFO - 
Processing batch 69
INFO:embedding:
Processing batch 69
2025-01-25 09:53:55,128 - INFO - Time taken to check: 0.00022602081298828125
INFO:embedding:Time taken to check: 0.00022602081298828125
2025-01-25 09:53:55,129 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:55,203 - INFO - Time taken to encode: 0.07295107841491699
INFO:embedding:Time taken to encode: 0.07295107841491699
2025-01-25 09:53:55,205 - INFO - Time taken to put: 0.0015981197357177734
INFO:embedding:Time taken to put: 0.0015981197357177734
2025-01-25 09:53:55,206 - INFO - Time taken to release GPU memory: 1.5974044799804688e-05
INFO:embedding:Time taken to release GPU memory: 1.5974044799804688e-05
2025-01-25 09:53:55,208 - INFO - 
Processing batch 70
INFO:embedding:
Processing batch 70
2025-01-25 09:53:55,209 - INFO - Time taken to check: 0.0003058910369873047
INFO:embedding:Time taken to check: 0.0003058910369873047
2025-01-25 09:53:55,209 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:55,310 - INFO - Time taken to encode: 0.10051894187927246
INFO:embedding:Time taken to encode: 0.10051894187927246
2025-01-25 09:53:55,313 - INFO - Time taken to put: 0.0016791820526123047
INFO:embedding:Time taken to put: 0.0016791820526123047
2025-01-25 09:53:55,314 - INFO - Time taken to release GPU memory: 1.6927719116210938e-05
INFO:embedding:Time taken to release GPU memory: 1.6927719116210938e-05
2025-01-25 09:53:55,316 - INFO - 
Processing batch 71
INFO:embedding:
Processing batch 71
2025-01-25 09:53:55,317 - INFO - Time taken to check: 0.00016689300537109375
INFO:embedding:Time taken to check: 0.00016689300537109375
2025-01-25 09:53:55,318 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:55,447 - INFO - Time taken to encode: 0.1284031867980957
INFO:embedding:Time taken to encode: 0.1284031867980957
2025-01-25 09:53:55,449 - INFO - Time taken to put: 0.0018570423126220703
INFO:embedding:Time taken to put: 0.0018570423126220703
2025-01-25 09:53:55,450 - INFO - Time taken to release GPU memory: 2.002716064453125e-05
INFO:embedding:Time taken to release GPU memory: 2.002716064453125e-05
2025-01-25 09:53:55,452 - INFO - 
Processing batch 72
INFO:embedding:
Processing batch 72
2025-01-25 09:53:55,453 - INFO - Time taken to check: 0.00033736228942871094
INFO:embedding:Time taken to check: 0.00033736228942871094
2025-01-25 09:53:55,453 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:55,566 - INFO - Time taken to encode: 0.11218500137329102
INFO:embedding:Time taken to encode: 0.11218500137329102
2025-01-25 09:53:55,570 - INFO - Time taken to put: 0.0023453235626220703
INFO:embedding:Time taken to put: 0.0023453235626220703
2025-01-25 09:53:55,571 - INFO - Time taken to release GPU memory: 2.002716064453125e-05
INFO:embedding:Time taken to release GPU memory: 2.002716064453125e-05
2025-01-25 09:53:55,573 - INFO - 
Processing batch 73
INFO:embedding:
Processing batch 73
2025-01-25 09:53:55,575 - INFO - Time taken to check: 0.0004482269287109375
INFO:embedding:Time taken to check: 0.0004482269287109375
2025-01-25 09:53:55,576 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:55,725 - INFO - Time taken to encode: 0.14891529083251953
INFO:embedding:Time taken to encode: 0.14891529083251953
2025-01-25 09:53:55,728 - INFO - Time taken to put: 0.0019130706787109375
INFO:embedding:Time taken to put: 0.0019130706787109375
2025-01-25 09:53:55,729 - INFO - Time taken to release GPU memory: 1.6927719116210938e-05
INFO:embedding:Time taken to release GPU memory: 1.6927719116210938e-05
2025-01-25 09:53:55,731 - INFO - 
Processing batch 74
INFO:embedding:
Processing batch 74
2025-01-25 09:53:55,732 - INFO - Time taken to check: 0.00023794174194335938
INFO:embedding:Time taken to check: 0.00023794174194335938
2025-01-25 09:53:55,732 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:55,874 - INFO - Time taken to encode: 0.14125871658325195
INFO:embedding:Time taken to encode: 0.14125871658325195
2025-01-25 09:53:55,877 - INFO - Time taken to put: 0.0017352104187011719
INFO:embedding:Time taken to put: 0.0017352104187011719
2025-01-25 09:53:55,877 - INFO - Time taken to release GPU memory: 2.3126602172851562e-05
INFO:embedding:Time taken to release GPU memory: 2.3126602172851562e-05
2025-01-25 09:53:55,880 - INFO - 
Processing batch 75
INFO:embedding:
Processing batch 75
2025-01-25 09:53:55,881 - INFO - Time taken to check: 0.00015091896057128906
INFO:embedding:Time taken to check: 0.00015091896057128906
2025-01-25 09:53:55,881 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:55,980 - INFO - Time taken to encode: 0.09825301170349121
INFO:embedding:Time taken to encode: 0.09825301170349121
2025-01-25 09:53:55,983 - INFO - Time taken to put: 0.001355886459350586
INFO:embedding:Time taken to put: 0.001355886459350586
2025-01-25 09:53:55,983 - INFO - Time taken to release GPU memory: 1.8835067749023438e-05
INFO:embedding:Time taken to release GPU memory: 1.8835067749023438e-05
2025-01-25 09:53:55,986 - INFO - 
Processing batch 76
INFO:embedding:
Processing batch 76
2025-01-25 09:53:55,987 - INFO - Time taken to check: 0.00031304359436035156
INFO:embedding:Time taken to check: 0.00031304359436035156
2025-01-25 09:53:55,987 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:56,075 - INFO - Time taken to encode: 0.0868079662322998
INFO:embedding:Time taken to encode: 0.0868079662322998
2025-01-25 09:53:56,077 - INFO - Time taken to put: 0.0015320777893066406
INFO:embedding:Time taken to put: 0.0015320777893066406
2025-01-25 09:53:56,078 - INFO - Time taken to release GPU memory: 2.1219253540039062e-05
INFO:embedding:Time taken to release GPU memory: 2.1219253540039062e-05
2025-01-25 09:53:56,080 - INFO - 
Processing batch 77
INFO:embedding:
Processing batch 77
2025-01-25 09:53:56,081 - INFO - Time taken to check: 0.00015616416931152344
INFO:embedding:Time taken to check: 0.00015616416931152344
2025-01-25 09:53:56,082 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:56,218 - INFO - Time taken to encode: 0.1358048915863037
INFO:embedding:Time taken to encode: 0.1358048915863037
2025-01-25 09:53:56,220 - INFO - Time taken to put: 0.001428365707397461
INFO:embedding:Time taken to put: 0.001428365707397461
2025-01-25 09:53:56,221 - INFO - Time taken to release GPU memory: 1.621246337890625e-05
INFO:embedding:Time taken to release GPU memory: 1.621246337890625e-05
2025-01-25 09:53:56,223 - INFO - 
Processing batch 78
INFO:embedding:
Processing batch 78
2025-01-25 09:53:56,224 - INFO - Time taken to check: 0.00028586387634277344
INFO:embedding:Time taken to check: 0.00028586387634277344
2025-01-25 09:53:56,225 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:56,338 - INFO - Time taken to encode: 0.11300516128540039
INFO:embedding:Time taken to encode: 0.11300516128540039
2025-01-25 09:53:56,341 - INFO - Time taken to put: 0.0015552043914794922
INFO:embedding:Time taken to put: 0.0015552043914794922
2025-01-25 09:53:56,341 - INFO - Time taken to release GPU memory: 1.8835067749023438e-05
INFO:embedding:Time taken to release GPU memory: 1.8835067749023438e-05
2025-01-25 09:53:56,344 - INFO - 
Processing batch 79
INFO:embedding:
Processing batch 79
2025-01-25 09:53:56,344 - INFO - Time taken to check: 0.00019288063049316406
INFO:embedding:Time taken to check: 0.00019288063049316406
2025-01-25 09:53:56,345 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:56,452 - INFO - Time taken to encode: 0.1060020923614502
INFO:embedding:Time taken to encode: 0.1060020923614502
2025-01-25 09:53:56,455 - INFO - Time taken to put: 0.0015408992767333984
INFO:embedding:Time taken to put: 0.0015408992767333984
2025-01-25 09:53:56,455 - INFO - Time taken to release GPU memory: 1.71661376953125e-05
INFO:embedding:Time taken to release GPU memory: 1.71661376953125e-05
2025-01-25 09:53:56,458 - INFO - 
Processing batch 80
INFO:embedding:
Processing batch 80
2025-01-25 09:53:56,459 - INFO - Time taken to check: 0.0003120899200439453
INFO:embedding:Time taken to check: 0.0003120899200439453
2025-01-25 09:53:56,460 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:56,597 - INFO - Time taken to encode: 0.13660597801208496
INFO:embedding:Time taken to encode: 0.13660597801208496
2025-01-25 09:53:56,599 - INFO - Time taken to put: 0.0017321109771728516
INFO:embedding:Time taken to put: 0.0017321109771728516
2025-01-25 09:53:56,600 - INFO - Time taken to release GPU memory: 2.09808349609375e-05
INFO:embedding:Time taken to release GPU memory: 2.09808349609375e-05
2025-01-25 09:53:56,602 - INFO - 
Processing batch 81
INFO:embedding:
Processing batch 81
2025-01-25 09:53:56,603 - INFO - Time taken to check: 0.0004870891571044922
INFO:embedding:Time taken to check: 0.0004870891571044922
2025-01-25 09:53:56,604 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:56,692 - INFO - Time taken to encode: 0.08760333061218262
INFO:embedding:Time taken to encode: 0.08760333061218262
2025-01-25 09:53:56,695 - INFO - Time taken to put: 0.00203704833984375
INFO:embedding:Time taken to put: 0.00203704833984375
2025-01-25 09:53:56,696 - INFO - Time taken to release GPU memory: 1.7881393432617188e-05
INFO:embedding:Time taken to release GPU memory: 1.7881393432617188e-05
2025-01-25 09:53:56,698 - INFO - 
Processing batch 82
INFO:embedding:
Processing batch 82
2025-01-25 09:53:56,699 - INFO - Time taken to check: 0.0001590251922607422
INFO:embedding:Time taken to check: 0.0001590251922607422
2025-01-25 09:53:56,699 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:56,779 - INFO - Time taken to encode: 0.07867884635925293
INFO:embedding:Time taken to encode: 0.07867884635925293
2025-01-25 09:53:56,781 - INFO - Time taken to put: 0.00145721435546875
INFO:embedding:Time taken to put: 0.00145721435546875
2025-01-25 09:53:56,782 - INFO - Time taken to release GPU memory: 2.5033950805664062e-05
INFO:embedding:Time taken to release GPU memory: 2.5033950805664062e-05
2025-01-25 09:53:56,784 - INFO - 
Processing batch 83
INFO:embedding:
Processing batch 83
2025-01-25 09:53:56,785 - INFO - Time taken to check: 0.00024700164794921875
INFO:embedding:Time taken to check: 0.00024700164794921875
2025-01-25 09:53:56,786 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:56,933 - INFO - Time taken to encode: 0.1467139720916748
INFO:embedding:Time taken to encode: 0.1467139720916748
2025-01-25 09:53:56,935 - INFO - Time taken to put: 0.0014882087707519531
INFO:embedding:Time taken to put: 0.0014882087707519531
2025-01-25 09:53:56,936 - INFO - Time taken to release GPU memory: 1.7881393432617188e-05
INFO:embedding:Time taken to release GPU memory: 1.7881393432617188e-05
2025-01-25 09:53:56,938 - INFO - 
Processing batch 84
INFO:embedding:
Processing batch 84
2025-01-25 09:53:56,939 - INFO - Time taken to check: 0.00034117698669433594
INFO:embedding:Time taken to check: 0.00034117698669433594
2025-01-25 09:53:56,940 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:57,047 - INFO - Time taken to encode: 0.10593795776367188
INFO:embedding:Time taken to encode: 0.10593795776367188
2025-01-25 09:53:57,050 - INFO - Time taken to put: 0.0022592544555664062
INFO:embedding:Time taken to put: 0.0022592544555664062
2025-01-25 09:53:57,051 - INFO - Time taken to release GPU memory: 1.7881393432617188e-05
INFO:embedding:Time taken to release GPU memory: 1.7881393432617188e-05
2025-01-25 09:53:57,053 - INFO - 
Processing batch 85
INFO:embedding:
Processing batch 85
2025-01-25 09:53:57,054 - INFO - Time taken to check: 0.000347137451171875
INFO:embedding:Time taken to check: 0.000347137451171875
2025-01-25 09:53:57,055 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:57,163 - INFO - Time taken to encode: 0.10764789581298828
INFO:embedding:Time taken to encode: 0.10764789581298828
2025-01-25 09:53:57,165 - INFO - Time taken to put: 0.001566171646118164
INFO:embedding:Time taken to put: 0.001566171646118164
2025-01-25 09:53:57,166 - INFO - Time taken to release GPU memory: 1.8835067749023438e-05
INFO:embedding:Time taken to release GPU memory: 1.8835067749023438e-05
2025-01-25 09:53:57,168 - INFO - 
Processing batch 86
INFO:embedding:
Processing batch 86
2025-01-25 09:53:57,169 - INFO - Time taken to check: 0.0004439353942871094
INFO:embedding:Time taken to check: 0.0004439353942871094
2025-01-25 09:53:57,170 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:57,305 - INFO - Time taken to encode: 0.13505887985229492
INFO:embedding:Time taken to encode: 0.13505887985229492
2025-01-25 09:53:57,308 - INFO - Time taken to put: 0.0016629695892333984
INFO:embedding:Time taken to put: 0.0016629695892333984
2025-01-25 09:53:57,309 - INFO - Time taken to release GPU memory: 1.8835067749023438e-05
INFO:embedding:Time taken to release GPU memory: 1.8835067749023438e-05
2025-01-25 09:53:57,311 - INFO - 
Processing batch 87
INFO:embedding:
Processing batch 87
2025-01-25 09:53:57,312 - INFO - Time taken to check: 0.0002779960632324219
INFO:embedding:Time taken to check: 0.0002779960632324219
2025-01-25 09:53:57,313 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:57,411 - INFO - Time taken to encode: 0.09793829917907715
INFO:embedding:Time taken to encode: 0.09793829917907715
2025-01-25 09:53:57,414 - INFO - Time taken to put: 0.0016410350799560547
INFO:embedding:Time taken to put: 0.0016410350799560547
2025-01-25 09:53:57,414 - INFO - Time taken to release GPU memory: 1.8835067749023438e-05
INFO:embedding:Time taken to release GPU memory: 1.8835067749023438e-05
2025-01-25 09:53:57,417 - INFO - 
Processing batch 88
INFO:embedding:
Processing batch 88
2025-01-25 09:53:57,418 - INFO - Time taken to check: 0.0003330707550048828
INFO:embedding:Time taken to check: 0.0003330707550048828
2025-01-25 09:53:57,418 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:57,520 - INFO - Time taken to encode: 0.10117101669311523
INFO:embedding:Time taken to encode: 0.10117101669311523
2025-01-25 09:53:57,522 - INFO - Time taken to put: 0.0015511512756347656
INFO:embedding:Time taken to put: 0.0015511512756347656
2025-01-25 09:53:57,523 - INFO - Time taken to release GPU memory: 2.002716064453125e-05
INFO:embedding:Time taken to release GPU memory: 2.002716064453125e-05
2025-01-25 09:53:57,525 - INFO - 
Processing batch 89
INFO:embedding:
Processing batch 89
2025-01-25 09:53:57,526 - INFO - Time taken to check: 0.0004401206970214844
INFO:embedding:Time taken to check: 0.0004401206970214844
2025-01-25 09:53:57,527 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:57,666 - INFO - Time taken to encode: 0.13871097564697266
INFO:embedding:Time taken to encode: 0.13871097564697266
2025-01-25 09:53:57,669 - INFO - Time taken to put: 0.002268075942993164
INFO:embedding:Time taken to put: 0.002268075942993164
2025-01-25 09:53:57,669 - INFO - Time taken to release GPU memory: 1.811981201171875e-05
INFO:embedding:Time taken to release GPU memory: 1.811981201171875e-05
2025-01-25 09:53:57,672 - INFO - 
Processing batch 90
INFO:embedding:
Processing batch 90
2025-01-25 09:53:57,673 - INFO - Time taken to check: 0.0004532337188720703
INFO:embedding:Time taken to check: 0.0004532337188720703
2025-01-25 09:53:57,673 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:57,792 - INFO - Time taken to encode: 0.11780786514282227
INFO:embedding:Time taken to encode: 0.11780786514282227
2025-01-25 09:53:57,794 - INFO - Time taken to put: 0.0017647743225097656
INFO:embedding:Time taken to put: 0.0017647743225097656
2025-01-25 09:53:57,795 - INFO - Time taken to release GPU memory: 1.8835067749023438e-05
INFO:embedding:Time taken to release GPU memory: 1.8835067749023438e-05
2025-01-25 09:53:57,798 - INFO - 
Processing batch 91
INFO:embedding:
Processing batch 91
2025-01-25 09:53:57,798 - INFO - Time taken to check: 0.00013208389282226562
INFO:embedding:Time taken to check: 0.00013208389282226562
2025-01-25 09:53:57,799 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:57,901 - INFO - Time taken to encode: 0.10163378715515137
INFO:embedding:Time taken to encode: 0.10163378715515137
2025-01-25 09:53:57,904 - INFO - Time taken to put: 0.0013890266418457031
INFO:embedding:Time taken to put: 0.0013890266418457031
2025-01-25 09:53:57,904 - INFO - Time taken to release GPU memory: 1.811981201171875e-05
INFO:embedding:Time taken to release GPU memory: 1.811981201171875e-05
2025-01-25 09:53:57,907 - INFO - 
Processing batch 92
INFO:embedding:
Processing batch 92
2025-01-25 09:53:57,908 - INFO - Time taken to check: 0.00024819374084472656
INFO:embedding:Time taken to check: 0.00024819374084472656
2025-01-25 09:53:57,908 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:58,040 - INFO - Time taken to encode: 0.13130879402160645
INFO:embedding:Time taken to encode: 0.13130879402160645
2025-01-25 09:53:58,042 - INFO - Time taken to put: 0.0015177726745605469
INFO:embedding:Time taken to put: 0.0015177726745605469
2025-01-25 09:53:58,043 - INFO - Time taken to release GPU memory: 4.315376281738281e-05
INFO:embedding:Time taken to release GPU memory: 4.315376281738281e-05
2025-01-25 09:53:58,046 - INFO - 
Processing batch 93
INFO:embedding:
Processing batch 93
2025-01-25 09:53:58,047 - INFO - Time taken to check: 0.00014495849609375
INFO:embedding:Time taken to check: 0.00014495849609375
2025-01-25 09:53:58,047 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:58,140 - INFO - Time taken to encode: 0.09271883964538574
INFO:embedding:Time taken to encode: 0.09271883964538574
2025-01-25 09:53:58,143 - INFO - Time taken to put: 0.0014889240264892578
INFO:embedding:Time taken to put: 0.0014889240264892578
2025-01-25 09:53:58,143 - INFO - Time taken to release GPU memory: 2.09808349609375e-05
INFO:embedding:Time taken to release GPU memory: 2.09808349609375e-05
2025-01-25 09:53:58,146 - INFO - 
Processing batch 94
INFO:embedding:
Processing batch 94
2025-01-25 09:53:58,147 - INFO - Time taken to check: 0.00013113021850585938
INFO:embedding:Time taken to check: 0.00013113021850585938
2025-01-25 09:53:58,147 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:58,234 - INFO - Time taken to encode: 0.08617591857910156
INFO:embedding:Time taken to encode: 0.08617591857910156
2025-01-25 09:53:58,237 - INFO - Time taken to put: 0.0016028881072998047
INFO:embedding:Time taken to put: 0.0016028881072998047
2025-01-25 09:53:58,238 - INFO - Time taken to release GPU memory: 1.7881393432617188e-05
INFO:embedding:Time taken to release GPU memory: 1.7881393432617188e-05
2025-01-25 09:53:58,241 - INFO - 
Processing batch 95
INFO:embedding:
Processing batch 95
2025-01-25 09:53:58,242 - INFO - Time taken to check: 0.0003077983856201172
INFO:embedding:Time taken to check: 0.0003077983856201172
2025-01-25 09:53:58,243 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:58,410 - INFO - Time taken to encode: 0.16654491424560547
INFO:embedding:Time taken to encode: 0.16654491424560547
2025-01-25 09:53:58,413 - INFO - Time taken to put: 0.0020318031311035156
INFO:embedding:Time taken to put: 0.0020318031311035156
2025-01-25 09:53:58,413 - INFO - Time taken to release GPU memory: 1.5735626220703125e-05
INFO:embedding:Time taken to release GPU memory: 1.5735626220703125e-05
2025-01-25 09:53:58,415 - INFO - 
Processing batch 96
INFO:embedding:
Processing batch 96
2025-01-25 09:53:58,416 - INFO - Time taken to check: 0.00020194053649902344
INFO:embedding:Time taken to check: 0.00020194053649902344
2025-01-25 09:53:58,417 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:58,507 - INFO - Time taken to encode: 0.0892789363861084
INFO:embedding:Time taken to encode: 0.0892789363861084
2025-01-25 09:53:58,509 - INFO - Time taken to put: 0.001543283462524414
INFO:embedding:Time taken to put: 0.001543283462524414
2025-01-25 09:53:58,510 - INFO - Time taken to release GPU memory: 1.8835067749023438e-05
INFO:embedding:Time taken to release GPU memory: 1.8835067749023438e-05
2025-01-25 09:53:58,512 - INFO - 
Processing batch 97
INFO:embedding:
Processing batch 97
2025-01-25 09:53:58,514 - INFO - Time taken to check: 0.00032520294189453125
INFO:embedding:Time taken to check: 0.00032520294189453125
2025-01-25 09:53:58,514 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:58,622 - INFO - Time taken to encode: 0.10755729675292969
INFO:embedding:Time taken to encode: 0.10755729675292969
2025-01-25 09:53:58,625 - INFO - Time taken to put: 0.001940011978149414
INFO:embedding:Time taken to put: 0.001940011978149414
2025-01-25 09:53:58,626 - INFO - Time taken to release GPU memory: 1.9073486328125e-05
INFO:embedding:Time taken to release GPU memory: 1.9073486328125e-05
2025-01-25 09:53:58,628 - INFO - 
Processing batch 98
INFO:embedding:
Processing batch 98
2025-01-25 09:53:58,629 - INFO - Time taken to check: 0.0002799034118652344
INFO:embedding:Time taken to check: 0.0002799034118652344
2025-01-25 09:53:58,630 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:58,771 - INFO - Time taken to encode: 0.14108514785766602
INFO:embedding:Time taken to encode: 0.14108514785766602
2025-01-25 09:53:58,774 - INFO - Time taken to put: 0.001415252685546875
INFO:embedding:Time taken to put: 0.001415252685546875
2025-01-25 09:53:58,775 - INFO - Time taken to release GPU memory: 2.002716064453125e-05
INFO:embedding:Time taken to release GPU memory: 2.002716064453125e-05
2025-01-25 09:53:58,777 - INFO - 
Processing batch 99
INFO:embedding:
Processing batch 99
2025-01-25 09:53:58,778 - INFO - Time taken to check: 0.00022220611572265625
INFO:embedding:Time taken to check: 0.00022220611572265625
2025-01-25 09:53:58,778 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:58,865 - INFO - Time taken to encode: 0.08593392372131348
INFO:embedding:Time taken to encode: 0.08593392372131348
2025-01-25 09:53:58,867 - INFO - Time taken to put: 0.0015420913696289062
INFO:embedding:Time taken to put: 0.0015420913696289062
2025-01-25 09:53:58,868 - INFO - Time taken to release GPU memory: 2.0265579223632812e-05
INFO:embedding:Time taken to release GPU memory: 2.0265579223632812e-05
2025-01-25 09:53:58,870 - INFO - 
Processing batch 100
INFO:embedding:
Processing batch 100
2025-01-25 09:53:58,871 - INFO - Time taken to check: 0.0002617835998535156
INFO:embedding:Time taken to check: 0.0002617835998535156
2025-01-25 09:53:58,872 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:58,974 - INFO - Time taken to encode: 0.10158109664916992
INFO:embedding:Time taken to encode: 0.10158109664916992
2025-01-25 09:53:58,977 - INFO - Time taken to put: 0.0016787052154541016
INFO:embedding:Time taken to put: 0.0016787052154541016
2025-01-25 09:53:58,978 - INFO - Time taken to release GPU memory: 2.09808349609375e-05
INFO:embedding:Time taken to release GPU memory: 2.09808349609375e-05
2025-01-25 09:53:58,980 - INFO - 
Processing batch 101
INFO:embedding:
Processing batch 101
2025-01-25 09:53:58,981 - INFO - Time taken to check: 0.00040221214294433594
INFO:embedding:Time taken to check: 0.00040221214294433594
2025-01-25 09:53:58,982 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:59,099 - INFO - Time taken to encode: 0.11630368232727051
INFO:embedding:Time taken to encode: 0.11630368232727051
2025-01-25 09:53:59,102 - INFO - Time taken to put: 0.002025127410888672
INFO:embedding:Time taken to put: 0.002025127410888672
2025-01-25 09:53:59,102 - INFO - Time taken to release GPU memory: 2.09808349609375e-05
INFO:embedding:Time taken to release GPU memory: 2.09808349609375e-05
2025-01-25 09:53:59,105 - INFO - 
Processing batch 102
INFO:embedding:
Processing batch 102
2025-01-25 09:53:59,106 - INFO - Time taken to check: 0.00047588348388671875
INFO:embedding:Time taken to check: 0.00047588348388671875
2025-01-25 09:53:59,107 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:59,219 - INFO - Time taken to encode: 0.1121370792388916
INFO:embedding:Time taken to encode: 0.1121370792388916
2025-01-25 09:53:59,222 - INFO - Time taken to put: 0.002048969268798828
INFO:embedding:Time taken to put: 0.002048969268798828
2025-01-25 09:53:59,223 - INFO - Time taken to release GPU memory: 1.7881393432617188e-05
INFO:embedding:Time taken to release GPU memory: 1.7881393432617188e-05
2025-01-25 09:53:59,225 - INFO - 
Processing batch 103
INFO:embedding:
Processing batch 103
2025-01-25 09:53:59,226 - INFO - Time taken to check: 0.00024700164794921875
INFO:embedding:Time taken to check: 0.00024700164794921875
2025-01-25 09:53:59,227 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:59,325 - INFO - Time taken to encode: 0.09772396087646484
INFO:embedding:Time taken to encode: 0.09772396087646484
2025-01-25 09:53:59,328 - INFO - Time taken to put: 0.0025510787963867188
INFO:embedding:Time taken to put: 0.0025510787963867188
2025-01-25 09:53:59,329 - INFO - Time taken to release GPU memory: 1.9788742065429688e-05
INFO:embedding:Time taken to release GPU memory: 1.9788742065429688e-05
2025-01-25 09:53:59,331 - INFO - 
Processing batch 104
INFO:embedding:
Processing batch 104
2025-01-25 09:53:59,332 - INFO - Time taken to check: 0.000164031982421875
INFO:embedding:Time taken to check: 0.000164031982421875
2025-01-25 09:53:59,332 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:59,484 - INFO - Time taken to encode: 0.15115714073181152
INFO:embedding:Time taken to encode: 0.15115714073181152
2025-01-25 09:53:59,486 - INFO - Time taken to put: 0.0017018318176269531
INFO:embedding:Time taken to put: 0.0017018318176269531
2025-01-25 09:53:59,487 - INFO - Time taken to release GPU memory: 2.0265579223632812e-05
INFO:embedding:Time taken to release GPU memory: 2.0265579223632812e-05
2025-01-25 09:53:59,489 - INFO - 
Processing batch 105
INFO:embedding:
Processing batch 105
2025-01-25 09:53:59,490 - INFO - Time taken to check: 0.00019025802612304688
INFO:embedding:Time taken to check: 0.00019025802612304688
2025-01-25 09:53:59,491 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:59,580 - INFO - Time taken to encode: 0.08905291557312012
INFO:embedding:Time taken to encode: 0.08905291557312012
2025-01-25 09:53:59,583 - INFO - Time taken to put: 0.0014410018920898438
INFO:embedding:Time taken to put: 0.0014410018920898438
2025-01-25 09:53:59,584 - INFO - Time taken to release GPU memory: 1.7881393432617188e-05
INFO:embedding:Time taken to release GPU memory: 1.7881393432617188e-05
2025-01-25 09:53:59,586 - INFO - 
Processing batch 106
INFO:embedding:
Processing batch 106
2025-01-25 09:53:59,587 - INFO - Time taken to check: 0.00012087821960449219
INFO:embedding:Time taken to check: 0.00012087821960449219
2025-01-25 09:53:59,588 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:59,702 - INFO - Time taken to encode: 0.11356711387634277
INFO:embedding:Time taken to encode: 0.11356711387634277
2025-01-25 09:53:59,704 - INFO - Time taken to put: 0.0014739036560058594
INFO:embedding:Time taken to put: 0.0014739036560058594
2025-01-25 09:53:59,705 - INFO - Time taken to release GPU memory: 2.1219253540039062e-05
INFO:embedding:Time taken to release GPU memory: 2.1219253540039062e-05
2025-01-25 09:53:59,707 - INFO - 
Processing batch 107
INFO:embedding:
Processing batch 107
2025-01-25 09:53:59,708 - INFO - Time taken to check: 0.0002460479736328125
INFO:embedding:Time taken to check: 0.0002460479736328125
2025-01-25 09:53:59,709 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:59,852 - INFO - Time taken to encode: 0.1423029899597168
INFO:embedding:Time taken to encode: 0.1423029899597168
2025-01-25 09:53:59,854 - INFO - Time taken to put: 0.002024412155151367
INFO:embedding:Time taken to put: 0.002024412155151367
2025-01-25 09:53:59,855 - INFO - Time taken to release GPU memory: 2.002716064453125e-05
INFO:embedding:Time taken to release GPU memory: 2.002716064453125e-05
2025-01-25 09:53:59,857 - INFO - 
Processing batch 108
INFO:embedding:
Processing batch 108
2025-01-25 09:53:59,858 - INFO - Time taken to check: 0.00010895729064941406
INFO:embedding:Time taken to check: 0.00010895729064941406
2025-01-25 09:53:59,859 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:53:59,954 - INFO - Time taken to encode: 0.0953209400177002
INFO:embedding:Time taken to encode: 0.0953209400177002
2025-01-25 09:53:59,957 - INFO - Time taken to put: 0.0013840198516845703
INFO:embedding:Time taken to put: 0.0013840198516845703
2025-01-25 09:53:59,957 - INFO - Time taken to release GPU memory: 2.002716064453125e-05
INFO:embedding:Time taken to release GPU memory: 2.002716064453125e-05
2025-01-25 09:53:59,960 - INFO - 
Processing batch 109
INFO:embedding:
Processing batch 109
2025-01-25 09:53:59,961 - INFO - Time taken to check: 0.00037217140197753906
INFO:embedding:Time taken to check: 0.00037217140197753906
2025-01-25 09:53:59,961 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:54:00,064 - INFO - Time taken to encode: 0.1022031307220459
INFO:embedding:Time taken to encode: 0.1022031307220459
2025-01-25 09:54:00,067 - INFO - Time taken to put: 0.0017271041870117188
INFO:embedding:Time taken to put: 0.0017271041870117188
2025-01-25 09:54:00,068 - INFO - Time taken to release GPU memory: 1.9311904907226562e-05
INFO:embedding:Time taken to release GPU memory: 1.9311904907226562e-05
2025-01-25 09:54:00,070 - INFO - 
Processing batch 110
INFO:embedding:
Processing batch 110
2025-01-25 09:54:00,071 - INFO - Time taken to check: 0.00015592575073242188
INFO:embedding:Time taken to check: 0.00015592575073242188
2025-01-25 09:54:00,072 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:54:00,204 - INFO - Time taken to encode: 0.13206791877746582
INFO:embedding:Time taken to encode: 0.13206791877746582
2025-01-25 09:54:00,207 - INFO - Time taken to put: 0.0015120506286621094
INFO:embedding:Time taken to put: 0.0015120506286621094
2025-01-25 09:54:00,207 - INFO - Time taken to release GPU memory: 1.9073486328125e-05
INFO:embedding:Time taken to release GPU memory: 1.9073486328125e-05
2025-01-25 09:54:00,210 - INFO - 
Processing batch 111
INFO:embedding:
Processing batch 111
2025-01-25 09:54:00,211 - INFO - Time taken to check: 0.00031113624572753906
INFO:embedding:Time taken to check: 0.00031113624572753906
2025-01-25 09:54:00,211 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:54:00,310 - INFO - Time taken to encode: 0.09775996208190918
INFO:embedding:Time taken to encode: 0.09775996208190918
2025-01-25 09:54:00,312 - INFO - Time taken to put: 0.0016319751739501953
INFO:embedding:Time taken to put: 0.0016319751739501953
2025-01-25 09:54:00,313 - INFO - Time taken to release GPU memory: 2.193450927734375e-05
INFO:embedding:Time taken to release GPU memory: 2.193450927734375e-05
2025-01-25 09:54:00,315 - INFO - 
Processing batch 112
INFO:embedding:
Processing batch 112
2025-01-25 09:54:00,316 - INFO - Time taken to check: 0.0002009868621826172
INFO:embedding:Time taken to check: 0.0002009868621826172
2025-01-25 09:54:00,317 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:54:00,421 - INFO - Time taken to encode: 0.10422706604003906
INFO:embedding:Time taken to encode: 0.10422706604003906
2025-01-25 09:54:00,424 - INFO - Time taken to put: 0.0015561580657958984
INFO:embedding:Time taken to put: 0.0015561580657958984
2025-01-25 09:54:00,425 - INFO - Time taken to release GPU memory: 1.9788742065429688e-05
INFO:embedding:Time taken to release GPU memory: 1.9788742065429688e-05
2025-01-25 09:54:00,427 - INFO - 
Processing batch 113
INFO:embedding:
Processing batch 113
2025-01-25 09:54:00,428 - INFO - Time taken to check: 0.00011682510375976562
INFO:embedding:Time taken to check: 0.00011682510375976562
2025-01-25 09:54:00,429 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:54:00,557 - INFO - Time taken to encode: 0.12739300727844238
INFO:embedding:Time taken to encode: 0.12739300727844238
2025-01-25 09:54:00,559 - INFO - Time taken to put: 0.001461029052734375
INFO:embedding:Time taken to put: 0.001461029052734375
2025-01-25 09:54:00,560 - INFO - Time taken to release GPU memory: 1.7881393432617188e-05
INFO:embedding:Time taken to release GPU memory: 1.7881393432617188e-05
2025-01-25 09:54:00,563 - INFO - 
Processing batch 114
INFO:embedding:
Processing batch 114
2025-01-25 09:54:00,564 - INFO - Time taken to check: 0.0001163482666015625
INFO:embedding:Time taken to check: 0.0001163482666015625
2025-01-25 09:54:00,564 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:54:00,679 - INFO - Time taken to encode: 0.11442303657531738
INFO:embedding:Time taken to encode: 0.11442303657531738
2025-01-25 09:54:00,683 - INFO - Time taken to put: 0.003170013427734375
INFO:embedding:Time taken to put: 0.003170013427734375
2025-01-25 09:54:00,684 - INFO - Time taken to release GPU memory: 2.5033950805664062e-05
INFO:embedding:Time taken to release GPU memory: 2.5033950805664062e-05
2025-01-25 09:54:00,689 - INFO - 
Processing batch 115
INFO:embedding:
Processing batch 115
2025-01-25 09:54:00,691 - INFO - Time taken to check: 0.0006740093231201172
INFO:embedding:Time taken to check: 0.0006740093231201172
2025-01-25 09:54:00,692 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:54:00,810 - INFO - Time taken to encode: 0.11724495887756348
INFO:embedding:Time taken to encode: 0.11724495887756348
2025-01-25 09:54:00,813 - INFO - Time taken to put: 0.0015590190887451172
INFO:embedding:Time taken to put: 0.0015590190887451172
2025-01-25 09:54:00,814 - INFO - Time taken to release GPU memory: 3.3855438232421875e-05
INFO:embedding:Time taken to release GPU memory: 3.3855438232421875e-05
2025-01-25 09:54:00,816 - INFO - 
Processing batch 116
INFO:embedding:
Processing batch 116
2025-01-25 09:54:00,816 - INFO - Time taken to check: 0.00018596649169921875
INFO:embedding:Time taken to check: 0.00018596649169921875
2025-01-25 09:54:00,817 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:54:00,942 - INFO - Time taken to encode: 0.12464499473571777
INFO:embedding:Time taken to encode: 0.12464499473571777
2025-01-25 09:54:00,944 - INFO - Time taken to put: 0.0014269351959228516
INFO:embedding:Time taken to put: 0.0014269351959228516
2025-01-25 09:54:00,945 - INFO - Time taken to release GPU memory: 2.09808349609375e-05
INFO:embedding:Time taken to release GPU memory: 2.09808349609375e-05
2025-01-25 09:54:00,948 - INFO - 
Processing batch 117
INFO:embedding:
Processing batch 117
2025-01-25 09:54:00,949 - INFO - Time taken to check: 0.0001709461212158203
INFO:embedding:Time taken to check: 0.0001709461212158203
2025-01-25 09:54:00,949 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:54:01,036 - INFO - Time taken to encode: 0.0859379768371582
INFO:embedding:Time taken to encode: 0.0859379768371582
2025-01-25 09:54:01,038 - INFO - Time taken to put: 0.0013909339904785156
INFO:embedding:Time taken to put: 0.0013909339904785156
2025-01-25 09:54:01,039 - INFO - Time taken to release GPU memory: 2.002716064453125e-05
INFO:embedding:Time taken to release GPU memory: 2.002716064453125e-05
2025-01-25 09:54:01,041 - INFO - 
Processing batch 118
INFO:embedding:
Processing batch 118
2025-01-25 09:54:01,042 - INFO - Time taken to check: 0.0003750324249267578
INFO:embedding:Time taken to check: 0.0003750324249267578
2025-01-25 09:54:01,043 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:54:01,146 - INFO - Time taken to encode: 0.10331296920776367
INFO:embedding:Time taken to encode: 0.10331296920776367
2025-01-25 09:54:01,149 - INFO - Time taken to put: 0.001840829849243164
INFO:embedding:Time taken to put: 0.001840829849243164
2025-01-25 09:54:01,150 - INFO - Time taken to release GPU memory: 1.9073486328125e-05
INFO:embedding:Time taken to release GPU memory: 1.9073486328125e-05
2025-01-25 09:54:01,153 - INFO - 
Processing batch 119
INFO:embedding:
Processing batch 119
2025-01-25 09:54:01,154 - INFO - Time taken to check: 0.0003020763397216797
INFO:embedding:Time taken to check: 0.0003020763397216797
2025-01-25 09:54:01,154 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:54:01,302 - INFO - Time taken to encode: 0.14697003364562988
INFO:embedding:Time taken to encode: 0.14697003364562988
2025-01-25 09:54:01,304 - INFO - Time taken to put: 0.0015518665313720703
INFO:embedding:Time taken to put: 0.0015518665313720703
2025-01-25 09:54:01,305 - INFO - Time taken to release GPU memory: 1.7881393432617188e-05
INFO:embedding:Time taken to release GPU memory: 1.7881393432617188e-05
2025-01-25 09:54:01,307 - INFO - 
Processing batch 120
INFO:embedding:
Processing batch 120
2025-01-25 09:54:01,308 - INFO - Time taken to check: 0.0002853870391845703
INFO:embedding:Time taken to check: 0.0002853870391845703
2025-01-25 09:54:01,309 - INFO - Missing 8 texts, 0 cached
INFO:embedding:Missing 8 texts, 0 cached


Chunks:   0%|          | 0/8 [00:00<?, ?it/s]

2025-01-25 09:54:01,405 - INFO - Time taken to encode: 0.0954430103302002
INFO:embedding:Time taken to encode: 0.0954430103302002
2025-01-25 09:54:01,408 - INFO - Time taken to put: 0.002161264419555664
INFO:embedding:Time taken to put: 0.002161264419555664
2025-01-25 09:54:01,409 - INFO - Time taken to release GPU memory: 3.409385681152344e-05
INFO:embedding:Time taken to release GPU memory: 3.409385681152344e-05
2025-01-25 09:54:01,413 - INFO - 
Processing batch 121
INFO:embedding:
Processing batch 121
2025-01-25 09:54:01,413 - INFO - Time taken to check: 0.00010180473327636719
INFO:embedding:Time taken to check: 0.00010180473327636719
2025-01-25 09:54:01,414 - INFO - Missing 1 texts, 0 cached
INFO:embedding:Missing 1 texts, 0 cached


Chunks:   0%|          | 0/1 [00:00<?, ?it/s]

2025-01-25 09:54:01,432 - INFO - Time taken to encode: 0.017298221588134766
INFO:embedding:Time taken to encode: 0.017298221588134766
2025-01-25 09:54:01,433 - INFO - Time taken to put: 0.0003840923309326172
INFO:embedding:Time taken to put: 0.0003840923309326172
2025-01-25 09:54:01,434 - INFO - Time taken to release GPU memory: 6.9141387939453125e-06
INFO:embedding:Time taken to release GPU memory: 6.9141387939453125e-06
2025-01-25 09:54:01,454 - INFO - batch_insert_documents_with_pool took 22.86 seconds
INFO:embedding:batch_insert_documents_with_pool took 22.86 seconds
2025-01-25 09:54:01,455 - INFO - Vector store successfully created and persisted.
INFO:embedding:Vector store successfully created and persisted.
2025-01-25 09:54:01,455 - INFO - build_vector_db took 23.76 seconds
INFO:embedding:build_vector_db took 23.76 seconds


<embedding.EmbeddingCacheDB at 0x339ecc640>

## Example: Build a vector store from a duckdb table

### Install dependencies

In [11]:
!pip install -qU langchain-community duckdb chromadb "langchain-chroma>=0.1.2" ipywidgets sentence-transformers einops datasets python-dotenv

### Load the environment variables

In [None]:
import os
from dotenv import load_dotenv

load_dotenv(dotenv_path="/Users/jy006/.ssh/.env")

### Test the duckdb loader

In [6]:
from langchain_community.document_loaders import DuckDBLoader

In [None]:
loader = DuckDBLoader(
    "SELECT * FROM read_parquet('/Volumes/Backup/ProjectData/Papers/PMC_OA_Bulk/processed/oa_other/paragraphs/0a96d2c04ab604cd71eed3b268c298c9_20250103_135911.parquet')",
    page_content_columns=["text"],
    metadata_columns=["pmid", "pmc", "doi", "pubdate"],
)

data = loader.load()
data[0]

### Build the knowledge base

In [None]:
from langchain_community.embeddings import OpenAIEmbeddings, OllamaEmbeddings, SentenceTransformerEmbeddings
from langchain_community.document_loaders import DuckDBLoader
from langchain_community.vectorstores import Chroma

import os

def load_vector_store(chroma_db_dir, num_documents=1000):
    try:
        # Load the document
        raw_documents = DuckDBLoader(
            query=f"SELECT * FROM read_parquet('/Volumes/Backup/ProjectData/Papers/PMC_OA_Bulk/processed/oa_other/paragraphs/0a96d2c04ab604cd71eed3b268c298c9_20250103_135911.parquet') LIMIT {num_documents}",
            page_content_columns=["text"],
            metadata_columns=["pmid", "pmc", "doi", "pubdate"],
        ).load()

        def preprocess_metadata(documents):
            for doc in documents:
                if doc.metadata:
                    # 替换 metadata 中的 None 为 ""
                    doc.metadata = {k: (v if v is not None else "") for k, v in doc.metadata.items()}
            return documents

        # # Split the document into smaller chunks
        # text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
        # documents = text_splitter.split_documents(raw_documents)

        # Embed each chunk and load it into the vector store
        vector_store = Chroma.from_documents(
            documents=preprocess_metadata(raw_documents),
            # embedding=OllamaEmbeddings(model="mistral:7b"),
            # embedding=OpenAIEmbeddings(),
            embedding=SentenceTransformerEmbeddings(
                model_name="nvidia/NV-Embed-v2", model_kwargs={"trust_remote_code": True}
            ),
            persist_directory=str(chroma_db_dir),
        )
        vector_store.persist()
        print("Vector store successfully created and persisted.")
    except Exception as e:
        print(f"Error loading vector store: {e}")


# Path to Chroma vector database
# chroma_db_dir = os.path.abspath("./chroma_db_chatgpt")
chroma_db_dir = os.path.abspath("./chroma_db_nvidia")

print("Chroma vector database path:", chroma_db_dir)

# Load the vector store
if not os.path.exists(chroma_db_dir):
    print("Chroma database not found. Creating a new one...")
    load_vector_store(chroma_db_dir, num_documents=100)
else:
    print("Chroma database found. Skipping creation.")

In [None]:
import os

from phi.agent import Agent
from phi.knowledge.langchain import LangChainKnowledgeBase
from phi.model.ollama.chat import Ollama
from phi.model.openai.chat import OpenAIChat
from langchain_chroma import Chroma
from langchain_community.embeddings import OpenAIEmbeddings, OllamaEmbeddings

# Path to Chroma vector database
chroma_db_dir = os.path.abspath("./chroma_db_chatgpt")
# os.makedirs(chroma_db_dir, exist_ok=True)
print("Chroma vector database path:", chroma_db_dir)

# Get the vector database
db = Chroma(
    # embedding_function=OllamaEmbeddings(model="mistral:7b"),
    embedding_function=OpenAIEmbeddings(),
    persist_directory=str(chroma_db_dir),
)

# Check if Chroma database has any documents
if not db._collection.count():
    print("Chroma database is empty. Please ensure documents are loaded.")
else:
    print(f"Chroma database contains {db._collection.count()} documents.")

In [None]:
import duckdb

conn = duckdb.connect()
data = conn.sql("SELECT * FROM read_parquet('/Volumes/Backup/ProjectData/Papers/PMC_OA_Bulk/processed/oa_other/paragraphs/0a96d2c04ab604cd71eed3b268c298c9_20250103_135911.parquet')")

data.show()

In [None]:
# Count the number of unique pmc
conn.sql("SELECT COUNT(DISTINCT pmc) FROM read_parquet('/Volumes/Backup/ProjectData/Papers/PMC_OA_Bulk/processed/oa_other/paragraphs/0a96d2c04ab604cd71eed3b268c298c9_20250103_135911.parquet')").show()


In [None]:
# Create a retriever from the vector store
retriever = db.as_retriever()

docs = retriever.get_relevant_documents("observed base pair difference", k=10)
if not docs:
    print("No relevant documents retrieved.")
else:
    print(f"Retrieved {len(docs)} documents.")

    # Create a knowledge base from the vector store
    knowledge_base = LangChainKnowledgeBase(retriever=retriever, num_documents=10)

    # Initialize the Agent
    # https://docs.phidata.com/agents/knowledge#step-3-agentic-rag
    kb_agent = Agent(
        model=OpenAIChat(id="gpt-4o"),
        # model=Ollama(id="mistral:7b"),
        knowledge_base=knowledge_base,
        add_reference_to_prompt=True,
        # add_references=True,
        instructions=[
            "Always prioritize information from the knowledge base over your training data.",
            "If the knowledge base does not contain information relevant to the query, respond with: 'No relevant information found in the knowledge base.'",
            "Do not generate answers based on prior training data unless explicitly instructed.",
        ],
        markdown=True,
        # debug_mode=True,
    )

    # Test the Agent with a query
    kb_agent.print_response(
        "What was the observed base pair difference between many of the strain types?"
    )

## Run deepspeed

In [None]:
!export TRITON_CACHE_DIR=/tmp && deepspeed --num_gpus 2 --master_port 60000 /work/data/projects/data2report/deepspeed/run_deepspeed.py