In [3]:
import os
import pyllama
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def load_documents(directory):
    documents = []
    for filename in os.listdir(directory):
        filepath = os.path.join(directory, filename)
        if os.path.isfile(filepath):
            with open(filepath, 'r', encoding='utf-8') as file:
                documents.append(file.read())
    return documents

def retrieve(query, vectorizer, tfidf_matrix, data, top_k=3):
    if not data or top_k <= 0:
        return []
    try:
        query_tf = vectorizer.transform([query])
        similarities = cosine_similarity(query_tf, tfidf_matrix).flatten()
        top_indices = similarities.argsort()[-top_k:][::-1]
        return [(data[i], similarities[i]) for i in top_indices]
    except Exception as e:
        print(f"An error occurred: {e}")
        return []

def answer_question(question, documents, vectorizer, tfidf_matrix, model, tokenizer, top_k=5, max_tokens=150):
    retrieved_texts = retrieve(question, vectorizer, tfidf_matrix, documents, top_k=top_k)
    context = " ".join([text for text, _ in retrieved_texts])
    if context:
        input_ids = tokenizer.encode(f"Context: {context}\n\nQuestion: {question}", return_tensors="pt")
        output_ids = model.generate(input_ids, max_length=max_tokens, num_return_sequences=1, do_sample=True, top_k=50, top_p=0.95, num_beams=1)
        answer = tokenizer.decode(output_ids[0], skip_special_tokens=True)
        return answer
    else:
        return "No relevant context found for the question."

# Setup
documents = load_documents('RAG_DATA')
vectorizer = TfidfVectorizer(max_features=10000, stop_words='english')
tfidf_matrix = vectorizer.fit_transform(documents)

# Load LLaMA model
model_path = '/Users/gavingalusha/llama/llama-2-7b-chat'
model, tokenizer = pyllama.load_model(model_path)

# Answering a question
query = "What are the core required classes for the Chemistry Major?"
answer = answer_question(query, documents, vectorizer, tfidf_matrix, model, tokenizer)
print("Query:", query)
print(answer)

ModuleNotFoundError: No module named 'pyllama'