In [None]:
!pip install faiss-cpu
!pip install sentence-transformers
!pip install gradio

import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
import gradio as gr

model = SentenceTransformer('all-mpnet-base-v2')

def get_embeddings(texts):
  embeddings = model.encode(texts)
  return embeddings

def process_text(text_input, text_prompt):
    text_input = [text.strip() for text in text_input.split(",")]

    input_embeddings = get_embeddings(text_input)

    d = input_embeddings.shape[1]
    index = faiss.IndexFlatL2(d)
    index.add(np.array(input_embeddings).astype('float32'))

    prompt_embedding = get_embeddings([text_prompt])[0]

    D, I = index.search(np.array([prompt_embedding]).astype('float32'), k=1)

    answer = text_input[I[0][0]]

    embeddings_output = ""
    for i, embedding in enumerate(input_embeddings):
      embeddings_output += f"Text {i+1}: {embedding}\n"

    embeddings_output += f"\nPrompt Embedding: {prompt_embedding}\n"
    embeddings_output += f"\nNearest neighbor index: {I[0][0]}\n"
    embeddings_output += f"Distance: {D[0][0]}\n"

    return embeddings_output, answer


iface = gr.Interface(
    fn=process_text,
    inputs=[
        gr.Textbox(label="Enter text (separate multiple texts with commas)"),
        gr.Textbox(label="Enter text prompt")
    ],
    outputs=[
        gr.Textbox(label="Embeddings"),
        gr.Textbox(label="Answer")
    ],
    title="Vector Database Demonstration",
    description="Enter text, create embeddings with Faiss, then ask a question about the text. Brought to you by Uma, Aishwarya, Prahasya and Bhavana of CSE-E."

)


iface.launch()

Collecting faiss-cpu
  Downloading faiss_cpu-1.9.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.4 kB)
Downloading faiss_cpu-1.9.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (27.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.5/27.5 MB[0m [31m32.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.9.0.post1
Collecting gradio
  Downloading gradio-5.8.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.6-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.5.1 (from gradio)
  Downloading gradio_client-1.5.1-py3-none-any.whl.metadata (7.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  D

  from tqdm.autonotebook import tqdm, trange
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://1e811610622dd45220.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


