# Setting up a Simple Vector Database


Install all the necessary libraries

In [None]:
!pip install sentence-transformers


Collecting sentence-transformers
  Downloading sentence_transformers-3.1.0-py3-none-any.whl.metadata (23 kB)
Downloading sentence_transformers-3.1.0-py3-none-any.whl (249 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m249.1/249.1 kB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: sentence-transformers
Successfully installed sentence-transformers-3.1.0


In [None]:
!pip install pinecone-client


Collecting pinecone-client
  Downloading pinecone_client-5.0.1-py3-none-any.whl.metadata (19 kB)
Collecting pinecone-plugin-inference<2.0.0,>=1.0.3 (from pinecone-client)
  Downloading pinecone_plugin_inference-1.0.3-py3-none-any.whl.metadata (2.2 kB)
Collecting pinecone-plugin-interface<0.0.8,>=0.0.7 (from pinecone-client)
  Downloading pinecone_plugin_interface-0.0.7-py3-none-any.whl.metadata (1.2 kB)
Downloading pinecone_client-5.0.1-py3-none-any.whl (244 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.8/244.8 kB[0m [31m12.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pinecone_plugin_inference-1.0.3-py3-none-any.whl (117 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m117.6/117.6 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pinecone_plugin_interface-0.0.7-py3-none-any.whl (6.2 kB)
Installing collected packages: pinecone-plugin-interface, pinecone-plugin-inference, pinecone-client
Successfully installed pinecone-client-

Set up Pinecone with your API key. Replace "your_api_key" with your actual API key.

In [None]:
from pinecone import Pinecone, ServerlessSpec

# Initialize Pinecone
pc = Pinecone(api_key="e4eebc8e-bb0f-44c7-bb0e-0cc29b19be24")


Load a pre-trained model from the sentence-transformers library to convert sentences to vectors

In [None]:
from sentence_transformers import SentenceTransformer

# Load a pre-trained model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Sample custom data (text)
texts = [
    "Pinecone is a vector database.",
    "Vectors represent data in numerical form.",
    "Embedding models convert text to vectors."
]
vectors = model.encode(texts)

# Convert texts to vectors
embeddings = model.encode(texts)

# Define vector dimension based on the model's output
dimension = embeddings.shape[1]



Create a Pinecone index with the defined dimension and other specifications

In [None]:
index_name = "quickstart"

pc.create_index(
    name=index_name,
    dimension=dimension, # Replace with your model dimensions
    metric="cosine", # Replace with your model metric
    spec=ServerlessSpec(
        cloud="aws",
        region="us-east-1"
    )
)

In [None]:
# Connect to the index
index = pc.Index(index_name)

# Prepare the data for insertion (ID and vector pairs)
vector_data = [(str(i), embeddings[i]) for i in range(len(embeddings))]

# Insert vectors into Pinecone
index.upsert(vector_data)

{'upserted_count': 3}

Perform a Query

In [None]:
# Example query
query_text = "How does Pinecone work?"
query_vector = model.encode([query_text])[0].tolist()  # Convert to list


In [None]:
print("Query Vector:", query_vector)


Query Vector: [-0.03350024297833443, -0.1366605907678604, -0.01909906230866909, 0.09239833056926727, 0.04075942188501358, 0.016362279653549194, 0.06349402666091919, 0.02581769973039627, 0.05568945035338402, 0.04700672999024391, -0.006264170166105032, 0.058952558785676956, -0.06622399389743805, 0.07259415835142136, -0.08481717109680176, 0.010515335947275162, -0.007911812514066696, 0.03800654038786888, -0.0028463718481361866, 0.01779298298060894, 0.040823910385370255, 0.02805621549487114, -0.04146581143140793, 0.03373275697231293, -0.032930899411439896, -0.0601232573390007, -0.10080737620592117, 0.013114390894770622, 0.0436558872461319, -0.02130628004670143, 0.10192885249853134, 0.015007135458290577, 0.011084984056651592, 0.03563760966062546, -0.11297101527452469, 0.027331877499818802, -0.025364186614751816, -0.12096710503101349, 0.013431917876005173, 0.09248647838830948, -0.02407141961157322, -0.002071363152936101, 0.02131751924753189, -0.019450126215815544, 0.022565552964806557, 0.0395

In [None]:
# Perform the query
try:
    result = index.query(
        vector=query_vector,
        top_k=3,
        include_values=True
    )
    # Process and print results
    if 'matches' in result:
        print("Query Results:")
        for match in result['matches']:
            print(f"ID: {match.get('id')}, Score: {match.get('score')}, Values: {match.get('values')}")
    else:
        print("No matches found in the result.")
except Exception as e:
    print("Error occurred:", e)

Query Results:
ID: 0, Score: 0.613276184, Values: [0.00616714219, -0.151020199, -0.0753949955, 0.0579001084, 0.0082278233, 0.00357001182, -0.0261911061, 0.0117331, -0.0343679376, 0.0265409276, -0.0150775, 0.0403129086, -0.0333452225, 0.0717455223, -0.0946341, 0.0383241586, -0.0327914, 0.0588847, 0.105138712, 0.0352675617, -0.0512689948, 0.0606280603, -0.0400602557, 0.0230478775, 0.0632282645, -0.041122891, -0.027623279, -0.0061295433, 0.0285106134, -0.0535609052, 0.0869671, 0.0853977129, 0.0591949373, 0.129784912, -0.128154188, 0.0279046334, 0.00618958892, -0.0934525132, -0.0375093892, 0.0685946867, -0.0399105847, 0.0752485469, -0.00281097554, 0.0154421, 0.0501180589, 0.0452337563, -0.0643143, -0.0214073844, 0.0664413646, -0.00419385917, -0.0790748075, -0.119687617, -0.0904590487, -0.0206201673, -0.00343719, 0.0489106551, 0.0472917408, -0.142980561, 0.0366289169, -0.0519360974, 0.0334791578, 0.0307305083, -0.0489193723, 0.0457431711, 0.0259341747, 0.0827801, -0.0719948635, 0.124182977,