In [None]:
import math
from collections import Counter

def term_frequency(document):
    tf_counter = Counter(document)
    total_words = len(document)
    tf = {word: tf_counter[word] / total_words for word in tf_counter}
    return tf

def inverse_document_frequency(documents):
    total_documents = len(documents)
    idf = {}
    all_words = set(word for doc in documents for word in doc)
    for word in all_words:
        doc_count = sum(1 for doc in documents if word in doc)
        idf[word] = math.log10(total_documents / doc_count)
    return idf

def main():

    documents = [
        ["this", "is", "a", "sample", "document"],
        ["another", "sample", "document", "for", "testing"],
        ["just", "one", "more", "sample", "document"]
    ]

    # Calculate TF and IDF
    tfidf = []
    idf = inverse_document_frequency(documents)
    for doc in documents:
        tf = term_frequency(doc)
        tfidf_doc = {word: tf[word] * idf[word] for word in tf}
        tfidf.append(tfidf_doc)

    # Print TF-IDF for each document
    for i, doc in enumerate(tfidf):
        print("TF-IDF for document", i+1)
        for word, score in doc.items():
            print(word, ":", score)
        print()

if __name__ == "__main__":
    main()


TF-IDF for document 1
this : 0.09542425094393249
is : 0.09542425094393249
a : 0.09542425094393249
sample : 0.0
document : 0.0

TF-IDF for document 2
another : 0.09542425094393249
sample : 0.0
document : 0.0
for : 0.09542425094393249
testing : 0.09542425094393249

TF-IDF for document 3
just : 0.09542425094393249
one : 0.09542425094393249
more : 0.09542425094393249
sample : 0.0
document : 0.0

