In [1]:
pip install transformers langchain faiss-cpu datasets



In [2]:
from datasets import load_dataset

# Load a dataset from Hugging Face
dataset = load_dataset("ag_news", split="train[:1000]")  # Using a small sample for demonstration
documents = [entry["text"] for entry in dataset]


In [3]:
from transformers import AutoTokenizer, AutoModel
import torch

# Load pre-trained model and tokenizer
model_name = "sentence-transformers/all-MiniLM-L6-v2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

def embed(text):
    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
    with torch.no_grad():
        embeddings = model(**inputs).last_hidden_state.mean(dim=1)
    return embeddings

# Generate embeddings for all documents
document_embeddings = torch.cat([embed(doc) for doc in documents])


In [4]:
import faiss

# Create a FAISS index
d = document_embeddings.shape[1]
index = faiss.IndexFlatL2(d)  # L2 distance index
index.add(document_embeddings.numpy())  # Add document embeddings to the index

In [5]:
def retrieve(query, k=5):
    query_embedding = embed(query).numpy()
    distances, indices = index.search(query_embedding, k)
    return [documents[i] for i in indices[0]]

# Example query
query = "What is AI?"
retrieved_docs = retrieve(query)
print("Retrieved Documents:", retrieved_docs)

Retrieved Documents: ['NASA Develops Robust Artificial Intelligence for Planetary Rovers NASA is planning to add a strong dose of artificial intelligence (AI) to planetary rovers to make them much more self-reliant, capable of making basic decisions during a mission. Scientists are developing very complex AI software that enables a higher level of robotic intelligence.', "Computers with multiple personalities The jury's still out on whether a computer can ever truly be intelligent, but there's no question that it can have multiple personalities. It's just a matter of software.", "Battling Robots in Japan's Pop-Culture Tech The ring sits in the spotlight of a tense, packed auditorium and the jittery fighters await the bell at their red and blue corners. Like any fight, there's always the danger of a punishing uppercut or left hook. But these boxers have even more worries  like battery failure and software bugs.", "New NASA Supercomputer to Aid Theorists and Shuttle Engineers (SPACE.com)

In [6]:
!pip install transformers



In [8]:
from transformers import pipeline

generator = pipeline('text-generation', model='EleutherAI/gpt-neo-2.7B')  # Choose a suitable text generation model

config.json:   0%|          | 0.00/1.46k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/10.7G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/200 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/90.0 [00:00<?, ?B/s]

In [9]:
def generate_answer(query):
    retrieved_docs = retrieve(query)
    context = " ".join(retrieved_docs)
    prompt = f"Question: {query}\nContext: {context}\nAnswer:"
    # Use max_new_tokens to control the length of generated text instead of max_length
    generated_answer = generator(prompt, max_new_tokens=50, num_return_sequences=1)
    return generated_answer[0]["generated_text"]

# Example query
query = "What is AI?"
answer = generate_answer(query)
print("Generated Answer:", answer)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Generated Answer: Question: What is AI?
Context: NASA Develops Robust Artificial Intelligence for Planetary Rovers NASA is planning to add a strong dose of artificial intelligence (AI) to planetary rovers to make them much more self-reliant, capable of making basic decisions during a mission. Scientists are developing very complex AI software that enables a higher level of robotic intelligence. Computers with multiple personalities The jury's still out on whether a computer can ever truly be intelligent, but there's no question that it can have multiple personalities. It's just a matter of software. Battling Robots in Japan's Pop-Culture Tech The ring sits in the spotlight of a tense, packed auditorium and the jittery fighters await the bell at their red and blue corners. Like any fight, there's always the danger of a punishing uppercut or left hook. But these boxers have even more worries  like battery failure and software bugs. New NASA Supercomputer to Aid Theorists and Shuttle Engi

In [10]:
from transformers import pipeline

# Load a pre-trained language model for text generation
generator = pipeline("text-generation", model="gpt2")

def generate_answer(query):
    retrieved_docs = retrieve(query)
    context = " ".join(retrieved_docs)
    prompt = f"Question: {query}\nContext: {context}\nAnswer:"
    # Use max_new_tokens to control the length of generated text instead of max_length
    generated_answer = generator(prompt, max_new_tokens=50, num_return_sequences=1)
    return generated_answer[0]["generated_text"]

# Example query
query = "Why we need AI?"
answer = generate_answer(query)
print("Generated Answer:", answer)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Generated Answer: Question: Why we need AI?
Context: NASA Develops Robust Artificial Intelligence for Planetary Rovers NASA is planning to add a strong dose of artificial intelligence (AI) to planetary rovers to make them much more self-reliant, capable of making basic decisions during a mission. Scientists are developing very complex AI software that enables a higher level of robotic intelligence. Space Science Pioneer Van Allen Questions Human Spaceflight (SPACE.com) SPACE.com - A leading space scientist has called to question the validity of human spaceflight, suggesting that sending astronauts outward from Earth is outdated, too costly, and the science returned is trivial. Computers with multiple personalities The jury's still out on whether a computer can ever truly be intelligent, but there's no question that it can have multiple personalities. It's just a matter of software. Battling Robots in Japan's Pop-Culture Tech The ring sits in the spotlight of a tense, packed auditorium 

In [11]:
!pip install langchain_community



In [12]:
from transformers import pipeline
from langchain.llms import HuggingFacePipeline
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

# Initialize the generator with truncation explicitly enabled
generator = pipeline(
    "text-generation",
    model="EleutherAI/gpt-neo-125m",
    max_length=950,
    truncation=True,  # Explicitly enable truncation
    pad_token_id=50256
)

# Wrap the Hugging Face pipeline as a LangChain LLM
llm = HuggingFacePipeline(pipeline=generator)

# Define a prompt template
template = PromptTemplate(
    input_variables=["query", "context"],
    template="""
    Question: {query}
    Context: {context}
    """
)

# Define a simple chain
chain = LLMChain(llm=llm, prompt=template)

# Example usage
query = "What are the key differences between machine learning and deep learning?"
context = " ".join(retrieve(query))  # Assuming retrieve(query) returns relevant context

# Use the chain to generate an answer
result = chain.run(
    query=query,
    context=context,
    max_new_tokens=350  # Generate up to 350 tokens after the input
)

print(result)


  llm = HuggingFacePipeline(pipeline=generator)
  chain = LLMChain(llm=llm, prompt=template)
  result = chain.run(



    Question: What are the key differences between machine learning and deep learning?
                                                                                                                                                                                                                                                                                                             
