In [4]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.probability import FreqDist

def eda_for_text(file_path):
    """
    Perform exploratory data analysis (EDA) on text data.
    
    :param file_path: Path to the text file
    """
    try:
        # Load the text file
        with open(file_path, 'r') as file:
            text = file.read()
        
        # Tokenize the text
        tokens = word_tokenize(text)
        
        # Filter tokens to include only alphabetic words
        words = [word.lower() for word in tokens if word.isalpha()]
        
        # Compute the frequency distribution of words
        freq_dist = FreqDist(words)
        
        # Display the 10 most common words
        print("10 Most Common Words:")
        for word, count in freq_dist.most_common(10):
            print(f"{word}: {count}")
    
    except FileNotFoundError:
        print(f"Error: File '{file_path}' not found.")
    except Exception as e:
        print(f"An error occurred: {e}")

# Test the program
if __name__ == "__main__":
    nltk.download('punkt')  # Download the tokenizer data
    file_path = r"C:\Users\vavil\Downloads\sample-1.txt"  # Use your provided file path
    eda_for_text(file_path)


10 Most Common Words:
data: 2
science: 2
is: 1
amazing: 1
involves: 1
machine: 1
learning: 1
and: 1
artificial: 1
intelligence: 1


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\vavil\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
