In [2]:
import nltk
from nltk.tokenize import word_tokenize
from collections import Counter

def calculate_term_frequency(file_path):
    """
    Load a text file, perform tokenization, and calculate the term frequency (TF) of each token.
    
    :param file_path: Path to the text file
    """
    try:
        # Load the text file
        with open(file_path, 'r') as file:
            text = file.read()
        
        # Tokenize the text
        tokens = word_tokenize(text)
        
        # Filter tokens to include only alphabetic words
        words = [word.lower() for word in tokens if word.isalpha()]
        
        # Calculate term frequency (TF) using Counter
        tf = Counter(words)
        
        # Display the top 5 most frequent tokens
        print("Top 5 Most Frequent Tokens:")
        for word, count in tf.most_common(5):
            print(f"{word}: {count}")
    
    except FileNotFoundError:
        print(f"Error: File '{file_path}' not found.")
    except Exception as e:
        print(f"An error occurred: {e}")

# Test the program
if __name__ == "__main__":
    nltk.download('punkt')  # Download the tokenizer data
    file_path = r"C:\Users\vavil\Downloads\Sample--1.txt"  # Use your provided file path
    calculate_term_frequency(file_path)


Top 5 Most Frequent Tokens:
data: 3
science: 3
is: 2
learning: 2
fun: 1


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\vavil\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
