<a href="https://colab.research.google.com/github/zeyagsen1/RAG/blob/main/RAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import re
from sentence_transformers import SentenceTransformer, util
from transformers import BlipProcessor, BlipForConditionalGeneration
import os
import torch
from PIL import Image

# Step 1: Load a retrieval model
retrieval_model = SentenceTransformer('all-MiniLM-L6-v2')

# Step 2: Dynamically load and clean text data
def load_texts_from_directory(directory_path):
    texts = []
    for filename in os.listdir(directory_path):
        if filename.endswith(".txt"):
            with open(os.path.join(directory_path, filename), "r", encoding="utf-8") as file:
                raw_text = file.read()
                cleaned_text = clean_text(raw_text)
                texts.append(cleaned_text)
    return texts

def clean_text(text):
    """Cleans the input text by removing special characters and redundant whitespace."""
    # Remove excessive whitespace and newlines
    text = re.sub(r'\s+', ' ', text)
    # Remove non-alphanumeric characters (keep essential punctuation)
    text = re.sub(r'[^a-zA-Z0-9,.!? ]+', '', text)
    return text.strip()

text_files_dir = "/content/texts"  # Replace with your text directory path
texts = load_texts_from_directory(text_files_dir)
corpus_embeddings = retrieval_model.encode(texts, convert_to_tensor=True)

# Step 3: Load the BLIP model
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

# Step 4: RAG pipeline with cleaned context
def rag_pipeline(query, image_path):
    # Embed the query
    query_embedding = retrieval_model.encode(query, convert_to_tensor=True)

    # Retrieve the top-k relevant passages
    top_k = 3
    hits = util.semantic_search(query_embedding, corpus_embeddings, top_k=top_k)
    relevant_texts = [texts[hit['corpus_id']] for hit in hits[0]]
    print(relevant_texts)

    # Combine and clean relevant texts
    context = " ".join(relevant_texts)
    context_inputs = processor.tokenizer(context, truncation=True, max_length=450, return_tensors="pt")
    truncated_context = processor.tokenizer.decode(context_inputs['input_ids'][0], skip_special_tokens=True)

    # Load and preprocess the image
    image = Image.open(image_path).convert("RGB")
    inputs = processor(images=image, text=truncated_context, return_tensors="pt", truncation=True).to(device)

    # Generate a response
    outputs = model.generate(**inputs, max_new_tokens=50)
    generated_text = processor.decode(outputs[0], skip_special_tokens=True)

    return generated_text

# Example Usage
query = "What is described in the image?"
image_path = "/content/images/page_91_full_page_shape_3.png"  # Replace with your image path
response = rag_pipeline(query, image_path)
print(response)


['228 Chapter 6. Complexity Theory z0, 0 1 0 1 1 2 0 z0, 1 0 1 1 2 0 1 z0, 0 1 1 2 0 1 0 z0, 1 1 2 0 1 0 1 z0, 1 2 0 1 0 1 1 z0, 2 0 1 0 1 z2, 1 2 0 1 0 z2, 1 0 2 0 1 z2, 0 0 0 2 0 1 z1, 1 0 0 2 Figure 6.6 The computation of the Turing machine on input x  11. The pair state,symbol indicates the position of the tape head.      z0 0 z0, 0      z0 1 z0, 1     z2  2 z0, 2     z2  0 z2, 1       z1, 1 z2, 0 3. The states z0 and z2, and the three symbols of the alphabet yield twelve tile types      z0 z0, 0 0     z0  z0, 0 0      z0 z0, 1 1     z0  z0, 1 1      z0 z0, 2 2     z0  z0, 2 2      z2 z2, 0 0     z2  z2, 0 0      z2 z2, 1 1     z2  z2, 1 1      z2 z2, 2 2     z2  z2, 2 2 The computation of the Turing machine on input x  11 consists of nine computation steps. During this computation, the tape head visits exactly six cells. Therefore, the frame for the domino game has nine rows and six columns. This frame is given in Figure 6.7. In Figure 6.8, you nd the solution of the domino game. 

In [3]:
!pip install pdf2image
!pip install pymupdf
!pip install pillow
!pip install pytesseract
!pip install transformers
!pip install torch
!pip install numpy



Collecting pdf2image
  Downloading pdf2image-1.17.0-py3-none-any.whl.metadata (6.2 kB)
Downloading pdf2image-1.17.0-py3-none-any.whl (11 kB)
Installing collected packages: pdf2image
Successfully installed pdf2image-1.17.0
Collecting pymupdf
  Downloading pymupdf-1.25.1-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (3.4 kB)
Downloading pymupdf-1.25.1-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (20.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m20.0/20.0 MB[0m [31m67.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pymupdf
Successfully installed pymupdf-1.25.1
Collecting pytesseract
  Downloading pytesseract-0.3.13-py3-none-any.whl.metadata (11 kB)
Downloading pytesseract-0.3.13-py3-none-any.whl (14 kB)
Installing collected packages: pytesseract
Successfully installed pytesseract-0.3.13


In [7]:
!pip install torch




In [9]:

import fitz  # PyMuPDF
from pdf2image import convert_from_path
import pytesseract
from transformers import AutoTokenizer, AutoModel
import faiss
import numpy as np
import torch

# 1. PDF'den Metin ve Görüntü Çıkarma
def extract_text_and_images(pdf_path):
    doc = fitz.open(pdf_path)
    text = ""
    images = []

    for page_num in range(len(doc)):
        page = doc[page_num]
        text += page.get_text()

        # Extract images
        for img_index, img in enumerate(page.get_images(full=True)):
            xref = img[0]
            base_image = doc.extract_image(xref)
            images.append((page_num, base_image["image"]))

    return text, images

# 2. Görsellerin OCR ile Metin Açıklaması
def image_to_text(image_data):
    from PIL import Image
    import io

    image = Image.open(io.BytesIO(image_data))
    text = pytesseract.image_to_string(image)
    return text

# 3. Metin Vektörleştirme
def text_to_vector(text, model_name="sentence-transformers/all-MiniLM-L6-v2"):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModel.from_pretrained(model_name)
    tokens = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        embeddings = model(**tokens).last_hidden_state.mean(dim=1)
    return embeddings.numpy()

# 4. FAISS ile Vektör Veritabanı Oluşturma
def create_faiss_index(vectors):
    dimension = vectors.shape[1]
    index = faiss.IndexFlatL2(dimension)  # L2 distance
    index.add(vectors)
    return index

# 5. RAG İle Arama Fonksiyonu
def search_query(query, index, vectors, texts):
    query_vector = text_to_vector(query)
    D, I = index.search(query_vector, k=5)  # Top 5 results
    return [texts[i] for i in I[0]]

# PDF Dosyasını İşleme
pdf_path = "/content/TheoryOfComputation.pdf"
text, images = extract_text_and_images(pdf_path)

# Görüntü Metinlerini ve Vektörleri Toplama
texts = [text] + [image_to_text(img[1]) for img in images]
vectors = np.vstack([text_to_vector(t) for t in texts])

# FAISS Veritabanı
index = create_faiss_index(vectors)

# Örnek Sorgu
query = "DFA and NFA comparison"
results = search_query(query, index, vectors, texts)
print("Search Results:", results)


Search Results: ['Introduction to Theory of Computation\nAnil Maheshwari\nMichiel Smid\nSchool of Computer Science\nCarleton University\nOttawa\nCanada\n{anil,michiel}@scs.carleton.ca\nAugust 29, 2024\nii\nContents\nContents\nPreface\nvi\n1\nIntroduction\n1\n1.1\nPurpose and motivation\n. . . . . . . . . . . . . . . . . . . . .\n1\n1.1.1\nComplexity theory\n. . . . . . . . . . . . . . . . . . . .\n2\n1.1.2\nComputability theory . . . . . . . . . . . . . . . . . . .\n2\n1.1.3\nAutomata theory . . . . . . . . . . . . . . . . . . . . .\n3\n1.1.4\nThis course\n. . . . . . . . . . . . . . . . . . . . . . . .\n3\n1.2\nMathematical preliminaries\n. . . . . . . . . . . . . . . . . . .\n4\n1.3\nProof techniques\n. . . . . . . . . . . . . . . . . . . . . . . . .\n7\n1.3.1\nDirect proofs\n. . . . . . . . . . . . . . . . . . . . . . .\n8\n1.3.2\nConstructive proofs . . . . . . . . . . . . . . . . . . . .\n9\n1.3.3\nNonconstructive proofs . . . . . . . . . . . . . . . . . .\n10\n1.3.4\nProofs by co

In [12]:
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from PIL import Image
import torch
from torchvision import transforms
from transformers import CLIPModel, CLIPProcessor

# 1. MODELLERİ YÜKLEME
# Metin için SentenceTransformer
text_model = SentenceTransformer('all-MiniLM-L6-v2')

# Görsel için CLIP
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

# Görselleri işlemek için dönüştürücü
def preprocess_image(image_path):
    image = Image.open(image_path).convert("RGB")
    inputs = clip_processor(images=image, return_tensors="pt")
    return inputs['pixel_values']

# 2. EMBEDDING FONKSİYONLARI
# Metin embedding'i
def get_text_embedding(text):
    return text_model.encode(text)

# Görsel embedding'i
def get_image_embedding(image_path):
    image_tensor = preprocess_image(image_path)
    with torch.no_grad():
        image_features = clip_model.get_image_features(image_tensor)
    return image_features.squeeze().numpy()

# Manually reduce the image embedding size to match the text embedding size
def resize_image_embedding(image_embedding, target_dim=384):
    return image_embedding[:target_dim]

# Kombine embedding
def combine_embeddings(text_embedding, image_embedding, alpha=0.5, beta=0.5):
    image_embedding = resize_image_embedding(image_embedding)
    return alpha * text_embedding + beta * image_embedding

# 3. FAISS VE METADATA
embedding_dim = 384  # Updated dimension after resizing
index = faiss.IndexFlatL2(embedding_dim)
metadata_store = {}

# 4. VERİ EKLEME
# Görsel ve metin eklemek için fonksiyon
def add_to_database(id, text, image_path):
    text_embedding = get_text_embedding(text)
    image_embedding = get_image_embedding(image_path)
    combined_embedding = combine_embeddings(text_embedding, image_embedding)

    # Embedding'i FAISS'e ekle
    index.add(np.array([combined_embedding], dtype='float32'))

    # Metadata'yı sakla
    metadata_store[id] = {
        "text": text,
        "image_path": image_path
    }

# Örnek veri ekleme
add_to_database("dfa-example", "This is a DFA diagram.", "/content/page_30_full_page_shape_1.png")
add_to_database("nfa-example", "This is an NFA diagram.", "/content/page_31_full_page_shape_1.png")

# 5. SORGU YAPMA VE CEVAP ÜRETME
# Sorgu fonksiyonu
def query_database(user_query, top_k=1):
    query_embedding = get_text_embedding(user_query)
    distances, indices = index.search(np.array([query_embedding], dtype='float32'), k=top_k)

    results = []
    for idx in indices[0]:
        if idx != -1:
            id = list(metadata_store.keys())[idx]
            metadata = metadata_store[id]
            results.append(metadata)
    return results

# Sorguya göre cevap oluşturma
def generate_response(user_query):
    results = query_database(user_query)
    if not results:
        return "Sorry, I couldn't find relevant information."

    # İlk sonuç için cevap üretimi
    metadata = results[0]
    text = metadata['text']
    image_path = metadata['image_path']
    response = f"Relevant text: {text}\nRelated image path: {image_path}"
    return response

# Kullanıcı sorgusu
user_query = "What is a DFA?"
response = generate_response(user_query)
print(response)


Relevant text: This is a DFA diagram.
Related image path: /content/page_30_full_page_shape_1.png


In [17]:
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from PIL import Image
import torch
from transformers import CLIPModel, CLIPProcessor, pipeline

# 1. MODELLERİ YÜKLEME
# Metin için SentenceTransformer
text_model = SentenceTransformer('all-MiniLM-L6-v2')

# Görsel için CLIP
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

# Özetleme için BART modelini yükleme
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

# Görselleri işlemek için dönüştürücü
def preprocess_image(image_path):
    image = Image.open(image_path).convert("RGB")
    inputs = clip_processor(images=image, return_tensors="pt")
    return inputs['pixel_values']

# 2. EMBEDDING FONKSİYONLARI
# Metin embedding'i
def get_text_embedding(text):
    return text_model.encode(text)

# Görsel embedding'i
def get_image_embedding(image_path):
    image_tensor = preprocess_image(image_path)
    with torch.no_grad():
        image_features = clip_model.get_image_features(image_tensor)
    return image_features.squeeze().numpy()

# Manually reduce the image embedding size to match the text embedding size
def resize_image_embedding(image_embedding, target_dim=384):
    return image_embedding[:target_dim]

# Kombine embedding
def combine_embeddings(text_embedding, image_embedding, alpha=0.5, beta=0.5):
    image_embedding = resize_image_embedding(image_embedding)
    return alpha * text_embedding + beta * image_embedding

# 3. FAISS VE METADATA
embedding_dim = 384  # Updated dimension after resizing
index = faiss.IndexFlatL2(embedding_dim)
metadata_store = {}

# 4. METİN ÖZETLEME
def summarize_text(text):
    # Metni özetleme
    summary = summarizer(text, max_length=150, min_length=30, do_sample=False)
    return summary[0]['summary_text']

# 5. VERİ EKLEME
# Görsel ve metin eklemek için fonksiyon
def add_to_database(id, file_path, image_path):
    # Uzun metni özetle
    with open(file_path, 'r') as file:
        text = file.read()
    summarized_text = summarize_text(text)
    print(f"ozet: {summarize_text}")
    # Metin embedding'ini al
    text_embedding = get_text_embedding(summarized_text)

    # Görsel embedding'ini al
    image_embedding = get_image_embedding(image_path)

    # Kombine embedding'i oluştur
    combined_embedding = combine_embeddings(text_embedding, image_embedding)

    # Embedding'i FAISS'e ekle
    index.add(np.array([combined_embedding], dtype='float32'))

    # Metadata'yı sakla
    metadata_store[id] = {
        "text": summarized_text,
        "image_path": image_path
    }

# Örnek veri ekleme
# Add database entries for the numbers with the updated text and image file paths
add_to_database("example-35", "/content/text/35_full_page.txt", "/content/img/page_35_full_page_shape_1.png")
add_to_database("example-37", "/content/text/37_full_page.txt", "/content/img/page_37_full_page_shape_1.png")
add_to_database("example-38", "/content/text/38_full_page.txt", "/content/img/page_38_full_page_shape_1.png")
add_to_database("example-43", "/content/text/43_full_page.txt", "/content/img/page_43_full_page_shape_1.png")
add_to_database("example-45", "/content/text/45_full_page.txt", "/content/img/page_45_full_page_shape_1.png")
add_to_database("example-46", "/content/text/46_full_page.txt", "/content/img/page_46_full_page_shape_1.png")
add_to_database("example-53", "/content/text/53_full_page.txt", "/content/img/page_53_full_page_shape_1.png")
add_to_database("example-55", "/content/text/55_full_page.txt", "/content/img/page_55_full_page_shape_1.png")
add_to_database("example-57", "/content/text/57_full_page.txt", "/content/img/page_57_full_page_shape_1.png")
add_to_database("example-59", "/content/text/59_full_page.txt", "/content/img/page_59_full_page_shape_1.png")
add_to_database("example-66", "/content/text/66_full_page.txt", "/content/img/page_66_full_page_shape_1.png")
add_to_database("example-67", "/content/text/67_full_page.txt", "/content/img/page_67_full_page_shape_1.png")
add_to_database("example-68", "/content/text/68_full_page.txt", "/content/img/page_68_full_page_shape_1.png")
add_to_database("example-69", "/content/text/69_full_page.txt", "/content/img/page_69_full_page_shape_1.png")
'''
add_to_database("example-78", "/content/text/78_full_page.txt", "/content/img/page_78_full_page_shape_1.png")
add_to_database("example-82", "/content/text/82_full_page.txt", "/content/img/page_82_full_page_shape_1.png")
add_to_database("example-91", "/content/text/91_full_page.txt", "/content/img/page_91_full_page_shape_1.png")
add_to_database("example-94", "/content/text/94_full_page.txt", "/content/img/page_94_full_page_shape_1.png")
add_to_database("example-110", "/content/text/110_full_page.txt", "/content/img/page_110_full_page_shape_1.png")
'''
# 6. SORGU YAPMA VE CEVAP ÜRETME
# Sorgu fonksiyonu
def query_database(user_query, top_k=1):
    query_embedding = get_text_embedding(user_query)
    distances, indices = index.search(np.array([query_embedding], dtype='float32'), k=top_k)

    results = []
    for idx in indices[0]:
        if idx != -1:
            id = list(metadata_store.keys())[idx]
            metadata = metadata_store[id]
            results.append(metadata)
    return results

# Sorguya göre cevap oluşturma
def generate_response(user_query):
    results = query_database(user_query)
    if not results:
        return "Sorry, I couldn't find relevant information."

    # İlk sonuç için cevap üretimi
    metadata = results[0]
    text = metadata['text']
    image_path = metadata['image_path']
    response = f"Relevant text: {text}\nRelated image path: {image_path}"
    return response

# Kullanıcı sorgusu
user_query = "What is a DFA?"
response = generate_response(user_query)
print(response)


Device set to use cpu


ozet: <function summarize_text at 0x7be4b6638af0>
ozet: <function summarize_text at 0x7be4b6638af0>
ozet: <function summarize_text at 0x7be4b6638af0>
ozet: <function summarize_text at 0x7be4b6638af0>
ozet: <function summarize_text at 0x7be4b6638af0>
ozet: <function summarize_text at 0x7be4b6638af0>
ozet: <function summarize_text at 0x7be4b6638af0>
ozet: <function summarize_text at 0x7be4b6638af0>
ozet: <function summarize_text at 0x7be4b6638af0>
ozet: <function summarize_text at 0x7be4b6638af0>
ozet: <function summarize_text at 0x7be4b6638af0>
ozet: <function summarize_text at 0x7be4b6638af0>
ozet: <function summarize_text at 0x7be4b6638af0>
ozet: <function summarize_text at 0x7be4b6638af0>
Relevant text: The state diagram of N is as follows. We will show how to convert this NFA N to a DFA M that accepts the same language. Following the proof of Theorem 2.5.1, M is speciﬁed by M = (Q′, Σ, δ′, q′, F ′)
Related image path: /content/img/page_53_full_page_shape_1.png


In [20]:
import torch
from transformers import T5ForConditionalGeneration, T5Tokenizer
from PIL import Image
from transformers import CLIPProcessor, CLIPModel
# 1. T5 Modeli ve Tokenizer'ı Yükleme
t5_model = T5ForConditionalGeneration.from_pretrained("t5-base")
t5_tokenizer = T5Tokenizer.from_pretrained("t5-base")

# 2. CLIP Modeli ve Processor'ı Yükleme (Görsel Özellik Çıkartmak için)
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

# 3. Görseli İşleme (CLIP ile)
def preprocess_image(image_path):
    image = Image.open(image_path).convert("RGB")
    inputs = clip_processor(images=image, return_tensors="pt")
    return inputs['pixel_values']

def get_image_features(image_path):
    image_tensor = preprocess_image(image_path)
    with torch.no_grad():
        image_features = clip_model.get_image_features(image_tensor)
    return image_features.squeeze().numpy()

# 4. T5 Modeline Metinle Açıklama Üretme
def generate_text_description(image_path, text_input):
    image_features = get_image_features(image_path)

    # Görsel ve metni birleştirerek açıklama oluşturmak
    input_text = f"Describe the following image based on this information: {text_input}"

    # T5 modeline metin tokenizasyonu
    input_ids = t5_tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)

    # T5 ile açıklama üretme
    output = t5_model.generate(input_ids, max_length=150, num_beams=4, early_stopping=True)

    # Sonucu çözme ve yazdırma
    description = t5_tokenizer.decode(output[0], skip_special_tokens=True)
    return description

# 5. Örnek Kullanım
image_path = "/content/download.png"  # Görsel dosyasının yolu
text_input = "This is an image of a DFA (Deterministic Finite Automaton)."  # Metin açıklaması

description = generate_text_description(image_path, response)
print(description)

True


In [22]:
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from PIL import Image
import torch
from transformers import CLIPModel, CLIPProcessor, pipeline, T5ForConditionalGeneration, T5Tokenizer

# 1. MODELLERİ YÜKLEME
# Metin için SentenceTransformer
text_model = SentenceTransformer('all-MiniLM-L6-v2')

# Görsel için CLIP
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

# Özetleme için BART modelini yükleme
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

# T5 Modeli ve Tokenizer'ı Yükleme
t5_model = T5ForConditionalGeneration.from_pretrained("t5-base")
t5_tokenizer = T5Tokenizer.from_pretrained("t5-base")

# 2. GÖRSEL İŞLEME
def preprocess_image(image_path):
    image = Image.open(image_path).convert("RGB")
    inputs = clip_processor(images=image, return_tensors="pt")
    return inputs['pixel_values']

# 3. EMBEDDING FONKSİYONLARI
# Metin embedding'i
def get_text_embedding(text):
    return text_model.encode(text)

# Görsel embedding'i
def get_image_embedding(image_path):
    image_tensor = preprocess_image(image_path)
    with torch.no_grad():
        image_features = clip_model.get_image_features(image_tensor)
    return image_features.squeeze().numpy()

# Manually reduce the image embedding size to match the text embedding size
def resize_image_embedding(image_embedding, target_dim=384):
    return image_embedding[:target_dim]

# Kombine embedding
def combine_embeddings(text_embedding, image_embedding, alpha=0.5, beta=0.5):
    image_embedding = resize_image_embedding(image_embedding)
    return alpha * text_embedding + beta * image_embedding

# 4. FAISS VE METADATA
embedding_dim = 384  # Updated dimension after resizing
index = faiss.IndexFlatL2(embedding_dim)
metadata_store = {}

# 5. METİN ÖZETLEME
def summarize_text(text):
    # Metni özetleme
    summary = summarizer(text, max_length=150, min_length=30, do_sample=False)
    return summary[0]['summary_text']

# 6. VERİ EKLEME
def add_to_database(id, file_path, image_path):
    # Uzun metni özetle
    with open(file_path, 'r') as file:
        text = file.read()
    summarized_text = summarize_text(text)

    # Metin embedding'ini al
    text_embedding = get_text_embedding(summarized_text)

    # Görsel embedding'ini al
    image_embedding = get_image_embedding(image_path)

    # Kombine embedding'i oluştur
    combined_embedding = combine_embeddings(text_embedding, image_embedding)

    # Embedding'i FAISS'e ekle
    index.add(np.array([combined_embedding], dtype='float32'))

    # Metadata'yı sakla
    metadata_store[id] = {
        "text": summarized_text,
        "image_path": image_path
    }

# 7. SORGU YAPMA VE CEVAP ÜRETME
def query_database(user_query, top_k=1):
    query_embedding = get_text_embedding(user_query)
    distances, indices = index.search(np.array([query_embedding], dtype='float32'), k=top_k)

    results = []
    for idx in indices[0]:
        if idx != -1:
            id = list(metadata_store.keys())[idx]
            metadata = metadata_store[id]
            results.append(metadata)
    return results

# Kullanıcı sorgusuna dayalı açıklama üretme
def generate_text_description(image_path, text_input):
    image_features = get_image_embedding(image_path)
    input_text = f"Describe the following image based on this information: {text_input}"

    # T5 modeline metin tokenizasyonu
    input_ids = t5_tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)

    # T5 ile açıklama üretme
    output = t5_model.generate(input_ids, max_length=150, num_beams=4, early_stopping=True)

    # Sonucu çözme ve yazdırma
    description = t5_tokenizer.decode(output[0], skip_special_tokens=True)
    return description

# Kullanıcı sorgusu ve veri ekleme örneği
user_query = "What is a DFA?"
response = generate_response(user_query)
print(response)

# Örnek Veri Ekleyip Sorgulama
add_to_database("example-35", "/content/text/35_full_page.txt", "/content/img/page_35_full_page_shape_1.png")
add_to_database("example-35", "/content/text/35_full_page.txt", "/content/img/page_35_full_page_shape_1.png")
add_to_database("example-37", "/content/text/37_full_page.txt", "/content/img/page_37_full_page_shape_1.png")
add_to_database("example-38", "/content/text/38_full_page.txt", "/content/img/page_38_full_page_shape_1.png")
add_to_database("example-43", "/content/text/43_full_page.txt", "/content/img/page_43_full_page_shape_1.png")
add_to_database("example-45", "/content/text/45_full_page.txt", "/content/img/page_45_full_page_shape_1.png")
add_to_database("example-46", "/content/text/46_full_page.txt", "/content/img/page_46_full_page_shape_1.png")
add_to_database("example-53", "/content/text/53_full_page.txt", "/content/img/page_53_full_page_shape_1.png")
add_to_database("example-55", "/content/text/55_full_page.txt", "/content/img/page_55_full_page_shape_1.png")
add_to_database("example-57", "/content/text/57_full_page.txt", "/content/img/page_57_full_page_shape_1.png")
description = generate_text_description("/content/img/page_35_full_page_shape_1.png", response)
print(description)


Device set to use cpu


Sorry, I couldn't find relevant information.
the following image based on this information: Sorry, I couldn't find relevant information.
