In [8]:

import re
from collections import defaultdict

class InvertedIndex:
    def __init__(self):
        self.index = defaultdict(list)

    def add_document(self, doc_id, content):
        words = re.findall(r'\w+', content.lower())
        for word in words:
            self.index[word].append(doc_id)

    def search(self, query):
        query_terms = query.lower().split()
        if all(term in self.index for term in query_terms):
            result = set(self.index[query_terms[0]])
            for term in query_terms[1:]:
                result.intersection_update(self.index[term])
            return list(result)
        else:
            return []

    def print_term_document_matrix(self, documents):
        print("Term-Document Matrix:")
        for term, doc_ids in self.index.items():
            row = [1 if doc_id in doc_ids else 0 for doc_id in documents.keys()]
            print(f"{term}: {row}")

    def print_inverted_index(self):
        print("Inverted Index:")
        for term, doc_ids in self.index.items():
            print(f"{term}: {doc_ids}")

if __name__ == "__main__":
    # Sample documents
    documents = {
        1: "This is the first document.",
        2: "This document is the second document.",
        3: "And this is the third one.",
        4: "Is this the first document?"
    }

    # Create an inverted index
    index = InvertedIndex()
    for doc_id, content in documents.items():
        index.add_document(doc_id, content)

    # Print Term-Document Matrix and Inverted Index
    index.print_term_document_matrix(documents)
    print("")
    index.print_inverted_index()
    print("")

    # Perform a search
    query = input("Enter your search query: ")
    results = index.search(query)

    if results:
        print("Matching documents:")
        for doc_id in results:
            print(f"Document {doc_id}: {documents[doc_id]}")
    else:
        print("No matching documents found.")


Term-Document Matrix:
this: [1, 1, 1, 1]
is: [1, 1, 1, 1]
the: [1, 1, 1, 1]
first: [1, 0, 0, 1]
document: [1, 1, 0, 1]
second: [0, 1, 0, 0]
and: [0, 0, 1, 0]
third: [0, 0, 1, 0]
one: [0, 0, 1, 0]

Inverted Index:
this: [1, 2, 3, 4]
is: [1, 2, 3, 4]
the: [1, 2, 3, 4]
first: [1, 4]
document: [1, 2, 2, 4]
second: [2]
and: [3]
third: [3]
one: [3]

Enter your search query: document
Matching documents:
Document 1: This is the first document.
Document 2: This document is the second document.
Document 4: Is this the first document?
