### 1.Importing required libraries

In [1]:
import pickle
import re
import string
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import math

### 2. Preprocessing blog post

In [None]:
def preprocess_text(text, use_ngrams=False, n_gram_range=(1, 1)):
    
    if not isinstance(text, str):
        text = ""
        
    text = text.lower()
    text = re.sub(f'[{re.escape(string.punctuation)}]', '', text)
    text = re.sub(r'\d+', '', text)
    tokens = word_tokenize(text)
    stop_words = set(stopwords.words('english'))
    filtered_tokens = [word for word in tokens if word not in stop_words and len(word) > 1]

    if use_ngrams:
        all_ngrams = []
        for n in range(n_gram_range[0], n_gram_range[1] + 1):
            if n == 1:
                all_ngrams.extend(filtered_tokens)
            elif n > 1:
                for i in range(len(filtered_tokens) - n + 1):
                    all_ngrams.append("_".join(filtered_tokens[i:i+n]))
        return all_ngrams
    return filtered_tokens

### 3.Loading trained model

In [None]:
def load_model_params(filename):
    """Load the trained model parameters from a file."""
    with open(filename, "rb") as f:
        data = pickle.load(f)
    print(f"Model parameters loaded from {filename}")
    return (
        data["vocabulary"],
        data["class_priors"],
        data["word_probabilities"],
        data["total_words_in_class"]
    )


### 4.Predicting own blog post

In [None]:
def predict_single_blog(doc_tokens, vocabulary, class_priors, word_probabilities, total_words_in_class, smoothing_alpha=1.0):
    
    best_class = None
    max_log_posterior = -float('inf')

    for c, prior_prob in class_priors.items():
        
        log_posterior = math.log(prior_prob)
        
        for word_token in doc_tokens:
            
            if word_token in vocabulary:
                
                word_prob = word_probabilities[c].get(word_token, smoothing_alpha / total_words_in_class[c])
                log_posterior += math.log(word_prob)

        if log_posterior > max_log_posterior:
            max_log_posterior = log_posterior
            best_class = c
    return best_class

In [None]:

model_filename = "blog_categorization.pkl"


try:
    vocabulary_loaded, class_priors_loaded, word_probabilities_loaded, total_words_in_class_loaded = load_model_params(model_filename)
    print("Model parameters loaded successfully!")

    # 
    new_blog_text = """Dharahara, also known as the Bhimsen Tower, is a historical tower located in the heart of Kathmandu, Nepal. It was originally built in 1832 by the then Prime Minister Bhimsen Thapa during the reign of Queen Lalit Tripura Sundari. The tower stood as a symbol of Nepal’s architectural heritage and resilience.
The original Dharahara was a nine-storey (61.88 meters tall) white cylindrical tower, designed in the Mughal–European fusion style. It had spiral staircases leading to the top, where a balcony offered panoramic views of the Kathmandu Valley. At the very top, there was a bronze mast that served as a lightning rod.
Unfortunately, the tower suffered destruction multiple times due to earthquakes. The first Dharahara collapsed in the 1934 earthquake but was later reconstructed. The most devastating collapse occurred during the April 25, 2015 earthquake, when the tower crumbled, killing and injuring many visitors who were inside."""

    USE_NGRAMS_PREDICT = True 
    NGRAM_RANGE_PREDICT = (1, 2) 

    print(f"\nNew blog content: '{new_blog_text}'")

    processed_new_blog_tokens = preprocess_text(new_blog_text, use_ngrams=USE_NGRAMS_PREDICT, n_gram_range=NGRAM_RANGE_PREDICT)
    print(f"Processed tokens for new blog: {processed_new_blog_tokens[:10]}...") # Show first few tokens

   
    prediction_for_new_blog = predict_single_blog(
        processed_new_blog_tokens,
        vocabulary_loaded,
        class_priors_loaded,
        word_probabilities_loaded,
        total_words_in_class_loaded
    )

    print(f"\nPredicted category for the new blog: {prediction_for_new_blog}")

except FileNotFoundError:
    print(f"Error: Model parameters file '{model_filename}' not found. Please ensure you have run the training pipeline and saved the model parameters.")
except Exception as e:
    print(f"An error occurred while loading or predicting: {e}")

Model parameters loaded from blog_categorization.pkl
Model parameters loaded successfully!

New blog content: 'Dharahara, also known as the Bhimsen Tower, is a historical tower located in the heart of Kathmandu, Nepal. It was originally built in 1832 by the then Prime Minister Bhimsen Thapa during the reign of Queen Lalit Tripura Sundari. The tower stood as a symbol of Nepal’s architectural heritage and resilience.
The original Dharahara was a nine-storey (61.88 meters tall) white cylindrical tower, designed in the Mughal–European fusion style. It had spiral staircases leading to the top, where a balcony offered panoramic views of the Kathmandu Valley. At the very top, there was a bronze mast that served as a lightning rod.
Unfortunately, the tower suffered destruction multiple times due to earthquakes. The first Dharahara collapsed in the 1934 earthquake but was later reconstructed. The most devastating collapse occurred during the April 25, 2015 earthquake, when the tower crumbled, k