In [1]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.probability import FreqDist

# Ensure you have downloaded necessary NLTK resources
nltk.download('punkt')

def load_text_file(file_path):
    """Load a text file and return its content."""
    with open(file_path, 'r', encoding='utf-8') as file:
        return file.read()

def tokenize_text(text):
    """Tokenize the given text using NLTK."""
    return word_tokenize(text)

def get_most_common_words(tokens, n=10):
    """Get the n most common words from the tokens."""
    freq_dist = FreqDist(tokens)
    return freq_dist.most_common(n)

if __name__ == "__main__":
    # Path to the text file
    file_path = "Output for Sentiment Analysis.txt"  # Replace with the path to your text file

    try:
        # Load and process the text
        text = load_text_file(file_path)
        tokens = tokenize_text(text)

        # Filter out non-alphanumeric tokens (optional)
        tokens = [token.lower() for token in tokens if token.isalpha()]

        # Get and display the 10 most common words
        common_words = get_most_common_words(tokens, n=10)
        print("10 Most Common Words:")
        for word, count in common_words:
            print(f"{word}: {count}")

    except FileNotFoundError:
        print(f"Error: The file '{file_path}' was not found.")
    except Exception as e:
        print(f"An error occurred: {e}")


10 Most Common Words:
package: 7
downloading: 4
to: 4
c: 4
is: 3
already: 3
stopwords: 2
punkt: 2
wordnet: 2
text: 2


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\navad\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
