In [1]:
import pinecone as pc
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer

In [2]:
# use a pre-trained model
model = SentenceTransformer('all-MiniLM-L6-v2')

In [3]:
documents = [
    "Legal contract about property rights",
    "Court ruling on employment disputes",
    "Patent law regarding intellectual property",
    "Legal framework for data protection",
    "Regulations on financial transactions",
    "Guidelines for corporate governance",
    "International treaties on trade agreements",
    "Legislation on environmental protection"
]

# converting documents into vectors
vectors = model.encode(documents)

print(vectors.shape)  # (4, 384) -> 4 documents, each ??-dimensional

(8, 384)


In [4]:
# Define vector dimension
dimension = 384
index = faiss.IndexFlatL2(dimension)  # L2 distance (Euclidean)

In [5]:
index.add(np.array(vectors))  # Add all vectors
faiss.write_index(index, "legal_search.index")  # Save for later
print("Stored vectors in FAISS.")

Stored vectors in FAISS.


In [6]:
query = model.encode(["Employment law for contracts"])  # Convert query to vector

In [7]:
D, I = index.search(np.array(query), k=1)  # Find top 2 similar documents

print("Most relevant documents:")
for idx in I[0]:
    print(documents[idx])  # Show matched document

Most relevant documents:
Court ruling on employment disputes
