In [1]:
import os
import cv2
import numpy as np
import pandas as pd
from scipy.fft import fft

def compute_fft_features(window):
    # Convert to grayscale if the window is not already
    if len(window.shape) == 3:
        window = cv2.cvtColor(window, cv2.COLOR_BGR2GRAY)
    
    # Flatten the window to 1D array for FFT computation
    window_flat = window.flatten()
    
    # Compute FFT and take the first N coefficients (e.g., 198)
    fft_features = np.abs(fft(window_flat))[:198]  # Use absolute value of FFT and keep first N coefficients
    
    return fft_features

# Adjust these paths according to your dataset structure
image_folder = r'C:\Users\Dell\Downloads\color_equlsize_jpg'
labels_df = pd.read_excel(r'C:\Users\Dell\Downloads\line_gt 6.xlsx') 
# Initialize a dictionary to store FFT feature sequences for each image
word_sequences = {}

for index, row in labels_df.iterrows():
    image_name = row['image name']
    character_sequence = row['gt']
    print(image_folder, image_name)
    
    # Load the corresponding image
    image_path = os.path.join(image_folder, image_name)
    image = cv2.imread(image_path)
    
    if image is None:
        print(f"Image {image_name} could not be loaded.")
        continue

    image_width = image.shape[1]  # Get image width
    
    # Define the sliding window parameters
    window_width = 140  # Width of the sliding window in pixels
    step_size = 20     # Step size of the sliding window in pixels

    # Calculate the number of windows
    num_windows = (image_width - window_width) // step_size + 1

    # Create an empty list to hold the sequence of FFT features for this word
    word_fft_sequence = []

    # Create sliding windows and compute FFT features for each window
    for i in range(num_windows):
        # Calculate the pixel range covered by the current window
        window_start = i * step_size
        window_end = window_start + window_width
        
        # Extract the window from the image
        window = image[:, window_start:window_end]  # Assume height is all rows, width is the window
        
        # Compute FFT features for the window
        fft_features = compute_fft_features(window)
        word_fft_sequence.append(fft_features)
        
    # Store the FFT feature sequence for the current word/image
    word_sequences[image_name] = np.vstack(word_fft_sequence)  # Stack the list of feature arrays to form a matrix


C:\Users\Dell\Downloads\color_equlsize_jpg MaI849_039_line_1.jpg
C:\Users\Dell\Downloads\color_equlsize_jpg MaI849_039_line_2.jpg
C:\Users\Dell\Downloads\color_equlsize_jpg MaI849_039_line_3.jpg
C:\Users\Dell\Downloads\color_equlsize_jpg MaI849_039_line_4.jpg
C:\Users\Dell\Downloads\color_equlsize_jpg MaI849_039_line_5.jpg
C:\Users\Dell\Downloads\color_equlsize_jpg MaI849_039_line_6.jpg
C:\Users\Dell\Downloads\color_equlsize_jpg MaI849_039_line_7.jpg
C:\Users\Dell\Downloads\color_equlsize_jpg MaI849_039_line_8.jpg
C:\Users\Dell\Downloads\color_equlsize_jpg MaI849_039_line_9.jpg
C:\Users\Dell\Downloads\color_equlsize_jpg MaI849_038_line_1.jpg
C:\Users\Dell\Downloads\color_equlsize_jpg MaI849_038_line_2.jpg
C:\Users\Dell\Downloads\color_equlsize_jpg MaI849_038_line_3.jpg
C:\Users\Dell\Downloads\color_equlsize_jpg MaI849_038_line_4.jpg
C:\Users\Dell\Downloads\color_equlsize_jpg MaI849_038_line_5.jpg
C:\Users\Dell\Downloads\color_equlsize_jpg MaI849_038_line_6.jpg
C:\Users\Dell\Downloads\c

In [2]:
# Initialize lists for sequences and their corresponding lengths
sequences = []
lengths = []
labels = []

# For each image, add its FFT feature sequence and the corresponding label (word)
for image_name, fft_sequence in word_sequences.items():  # Assuming FFT sequences are stored in word_sequences
    # Get the corresponding label (word) from the DataFrame
    label = labels_df.loc[labels_df['image name'] == image_name, 'gt'].values[0]
    
    # Append the FFT sequence and its length
    sequences.append(fft_sequence)
    lengths.append(len(fft_sequence))
    labels.append(label)  # Store the label for each image

# Convert sequences to a single numpy array
X = np.concatenate(sequences)  # Flatten the list of sequences

# Convert lengths to numpy array (required by hmmlearn)
model_lengths = np.array(lengths)

# Print the prepared data
print("Sequences shape:", X.shape)
print("Lengths:", model_lengths)
print("Labels:", labels[:5])  # Display the first 5 labels


Sequences shape: (26442, 198)
Lengths: [339 339 339 339 339 339 339 339 339 339 339 339 339 339 339 339 339 339
 339 339 339 339 339 339 339 339 339 339 339 339 339 339 339 339 339 339
 339 339 339 339 339 339 339 339 339 339 339 339 339 339 339 339 339 339
 339 339 339 339 339 339 339 339 339 339 339 339 339 339 339 339 339 339
 339 339 339 339 339 339]
Labels: [' അവരൊധാ കഴിച്ചു കൊള്ളുവാൻ തക്കവണ്ണം എച്ചും നീരും കൊടുത്തു ചെരമാന്\u200d പെരു', 'മാളെന്നരാജാവിനഅണ കലിസ്വർഗ്ഗ സന്ദേഹ പ്രാപ്യ പ ചെരമാന്\u200d പെരുമാളെന്ന രാജാ', 'വിൻറെ ഗുണാധിക്യംമുപ്പത്തിയാറുവർഷം കാലം പാണു ', 'ബ്രാഹ്മണരപരദേശത്ത ചെന്നതുമില്ലപെരുമാളെ കണ്ടതുമില്ല എന്നുകൽപ്പി', 'ച്ച കൃഷ്ണരായ രാ മലയാളം അടക്കുവാൻ പടക്കൂട്ടുകഎല്ലോ ചൈക്കാത്ത അതി']


In [3]:
from hmmlearn import hmm

# Initialize the HMM model
n_states = 71  # Number of hidden states in the HMM
model = hmm.GaussianHMM(n_components=n_states, covariance_type="diag", n_iter=1000)

# Train the model
model.fit(X, model_lengths)

print("HMM training complete.")

HMM training complete.


In [4]:
from joblib import dump

# Save the model to a file
dump(model, 'line_sequences_71_hmm_mode_11-11-2024_1000.joblib')

['line_sequences_71_hmm_mode_11-11-2024_1000.joblib']

In [5]:
import cv2
import numpy as np

# Function to compute FFT features
def compute_fft_features(window, target_feature_size=198):
    fft_features = np.fft.fft2(window)
    fft_magnitude = np.abs(fft_features).flatten()
    
    # Optionally trim or pad the features to match the target size
    if len(fft_magnitude) > target_feature_size:
        fft_magnitude = fft_magnitude[:target_feature_size]
    else:
        fft_magnitude = np.pad(fft_magnitude, (0, target_feature_size - len(fft_magnitude)), mode='constant')
    
    return fft_magnitude

# Function to predict sequence of states for an image
def predict_sequence(model, image_path, window_width=140, step_size=20, target_feature_size=198):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if image is None:
        raise ValueError(f"Image at {image_path} could not be loaded.")
    
    image_width = image.shape[1]
    num_windows = (image_width - window_width) // step_size + 1
    fft_sequence = []

    # Generate FFT features for each window
    for i in range(num_windows):
        window_start = i * step_size
        window_end = window_start + window_width
        window = image[:, window_start:window_end]
        fft_features = compute_fft_features(window, target_feature_size)
        fft_sequence.append(fft_features)

    # Predict the sequence of states
    fft_sequence = np.array(fft_sequence)
    predicted_states = model.predict(fft_sequence)
    
    return predicted_states


In [6]:
import os
import numpy as np
import pandas as pd
import cv2
from hmmlearn import hmm
import joblib

# Function to compute FFT features
def compute_fft_features(window, target_feature_size=198):
    fft_features = np.fft.fft2(window)
    fft_magnitude = np.abs(fft_features).flatten()
    
    if len(fft_magnitude) > target_feature_size:
        fft_magnitude = fft_magnitude[:target_feature_size]
    else:
        fft_magnitude = np.pad(fft_magnitude, (0, target_feature_size - len(fft_magnitude)), mode='constant')
    
    return fft_magnitude

# Function to predict sequence of states for an image
def predict_sequence(model, image_path, window_width=140, step_size=20, target_feature_size=198):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if image is None:
        raise ValueError(f"Image at {image_path} could not be loaded.")
    
    image_width = image.shape[1]
    num_windows = (image_width - window_width) // step_size + 1
    fft_sequence = []

    # Generate FFT features for each window
    for i in range(num_windows):
        window_start = i * step_size
        window_end = window_start + window_width
        window = image[:, window_start:window_end]
        fft_features = compute_fft_features(window, target_feature_size)
        fft_sequence.append(fft_features)

    # Predict the sequence of states
    fft_sequence = np.array(fft_sequence)
    predicted_states = model.predict(fft_sequence)
    
    return predicted_states

# Load the trained HMM model
model_path = r'line_sequences_71_hmm_mode_11-11-2024_1000.joblib'
if os.path.exists(model_path):
    model = joblib.load(model_path)
    print("Loaded HMM model from file.")
else:
    raise ValueError(f"Model file not found at {model_path}. Please ensure the model file exists.")

# Read the Excel file containing character sequences
df = pd.read_excel(r'C:\Users\Dell\Downloads\line_gt 6.xlsx')

# Extract the 'gt' column containing character sequences
character_sequences = df['gt'].tolist()

# Set to hold unique characters
unique_characters = set()

# Loop through each sequence and add characters to the set
for sequence in character_sequences:
    unique_characters.update(sequence)

# Ensure the number of unique characters matches the number of states (71)
if len(unique_characters) != 71:
    print(f"Warning: Number of unique characters ({len(unique_characters)}) does not match the number of states (71).")
else:
    print(f"Unique characters match the number of states (71).")

# Sort the unique characters to create a consistent mapping
sorted_characters = sorted(unique_characters)

# Create the mapping of state index to character
state_to_char = {i: char for i, char in enumerate(sorted_characters)}

# Directory containing the images
image_folder = r'C:\Users\Dell\Downloads\color_equlsize_jpg'

# Get all image files in the folder (adjust pattern as needed)
image_files = [f for f in os.listdir(image_folder) if f.endswith('.jpg')]
states = []

# Process each image in the folder
for image_file in image_files:
    # Get the full path of the image
    image_path = os.path.join(image_folder, image_file)
    
    # Predict the sequence of states for the image
    predicted_states = predict_sequence(model, image_path)
    predicted_states = np.array(predicted_states)

    # Decode the predicted states into characters using the state-to-character mapping
    decoded_sequence = ''
    for state in predicted_states:
        # Ensure the state is within the valid range (0-70)
        if 0 <= state < len(state_to_char):  # Check for valid state range (0 to 70)
            decoded_sequence += state_to_char[state]
        else:
            # If state is out of range, silently add a placeholder (no warnings)
            decoded_sequence += '?'  # Placeholder for unknown states
    
    # Append the results
    states.append([image_file, decoded_sequence])
    print(f"Processed {image_file}: {decoded_sequence}")

# Convert data to a DataFrame
output_df = pd.DataFrame(states, columns=['Image Name', 'Decoded Sequence'])

# Save the DataFrame to an Excel file
output_df.to_excel(r'C:\Users\Dell\Desktop\predicted_states_line_fft4_decoded.xlsx', index=False)


Loaded HMM model from file.
Processed MaI12_Page100_line_1.jpg: ഓഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉമജജജജജജജജജജഓഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉമജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജഓഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉമജജജജജജജജജജഓഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉമജജജജജജജജജജജജജജജഓഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉ
Processed MaI12_Page100_line_2.jpg: ഞഓഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഓഉഓഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉമ    ഉഉഉഉഉഉഉമജജജജഓഉഉഉഉഉഉഉഉഉമജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജദമ ഉഉഉഉഉഉഉഉഉഉഉഉമ    ഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഓല    ഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഓല         ഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉമ           ഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉ
Processed MaI12_Page100_line_3.jpg: ൗജജജജജജജജജജഓഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉമജജജജജജജജജജജജജജജജജജജജമ     ഓല   ഓഓൗമ       മജജജജജജമ       ഓഞഖഖൾൾൗജജജജജജഓഉമജജജജജജജജജജജജഓഉഉഉഉഉഉഉഉഉഉഉഉഉഓല   ഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉ

In [7]:
import os
import numpy as np
import pandas as pd
import cv2
from hmmlearn import hmm
import joblib

# Function to compute FFT features
def compute_fft_features(window, target_feature_size=198):
    fft_features = np.fft.fft2(window)
    fft_magnitude = np.abs(fft_features).flatten()
    
    if len(fft_magnitude) > target_feature_size:
        fft_magnitude = fft_magnitude[:target_feature_size]
    else:
        fft_magnitude = np.pad(fft_magnitude, (0, target_feature_size - len(fft_magnitude)), mode='constant')
    
    return fft_magnitude

# Function to predict sequence of states for an image
def predict_sequence(model, image_path, window_width=140, step_size=20, target_feature_size=198):
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if image is None:
        raise ValueError(f"Image at {image_path} could not be loaded.")
    
    image_width = image.shape[1]
    num_windows = (image_width - window_width) // step_size + 1
    fft_sequence = []

    # Generate FFT features for each window
    for i in range(num_windows):
        window_start = i * step_size
        window_end = window_start + window_width
        window = image[:, window_start:window_end]
        fft_features = compute_fft_features(window, target_feature_size)
        fft_sequence.append(fft_features)

    # Predict the sequence of states
    fft_sequence = np.array(fft_sequence)
    predicted_states = model.predict(fft_sequence)

    # Calculate the log probability of the sequence under the model
    log_probability = model.score(fft_sequence)
    print(f"Log Probability for the image {image_path}: {log_probability}")
    
    return predicted_states, log_probability

# Load the trained HMM model
model_path = r'line_sequences_71_hmm_mode_11-11-2024_1000.joblib'
if os.path.exists(model_path):
    model = joblib.load(model_path)
    print("Loaded HMM model from file.")
else:
    raise ValueError(f"Model file not found at {model_path}. Please ensure the model file exists.")

# Read the Excel file containing character sequences
df = pd.read_excel(r'C:\Users\Dell\Downloads\line_gt 6.xlsx')

# Extract the 'gt' column containing character sequences
character_sequences = df['gt'].tolist()

# Set to hold unique characters
unique_characters = set()

# Loop through each sequence and add characters to the set
for sequence in character_sequences:
    unique_characters.update(sequence)

# Ensure the number of unique characters matches the number of states (71)
if len(unique_characters) != 71:
    print(f"Warning: Number of unique characters ({len(unique_characters)}) does not match the number of states (71).")
else:
    print(f"Unique characters match the number of states (71).")

# Sort the unique characters to create a consistent mapping
sorted_characters = sorted(unique_characters)

# Create the mapping of state index to character
state_to_char = {i: char for i, char in enumerate(sorted_characters)}

# Directory containing the images
image_folder = r'C:\Users\Dell\Downloads\color_equlsize_jpg'

# Get all image files in the folder (adjust pattern as needed)
image_files = [f for f in os.listdir(image_folder) if f.endswith('.jpg')]
states = []

# Process each image in the folder
for image_file in image_files:
    # Get the full path of the image
    image_path = os.path.join(image_folder, image_file)
    
    # Predict the sequence of states for the image and get the log probability
    predicted_states, log_probability = predict_sequence(model, image_path)
    predicted_states = np.array(predicted_states)

    # Decode the predicted states into characters using the state-to-character mapping
    decoded_sequence = ''
    for state in predicted_states:
        # Ensure the state is within the valid range (0-70)
        if 0 <= state < len(state_to_char):  # Check for valid state range (0 to 70)
            decoded_sequence += state_to_char[state]
        else:
            # If state is out of range, silently add a placeholder (no warnings)
            decoded_sequence += '?'  # Placeholder for unknown states
    
    # Append the results
    states.append([image_file, decoded_sequence, log_probability])
    print(f"Processed {image_file}: {decoded_sequence}, Log Probability: {log_probability}")

# Convert data to a DataFrame
output_df = pd.DataFrame(states, columns=['Image Name', 'Decoded Sequence', 'Log Probability'])

# Save the DataFrame to an Excel file
output_df.to_excel(r'C:\Users\Dell\Desktop\predicted_states_line_fft4_decoded_with_log_probabilities.xlsx', index=False)


Loaded HMM model from file.
Log Probability for the image C:\Users\Dell\Downloads\color_equlsize_jpg\MaI12_Page100_line_1.jpg: -1158213.3419966223
Processed MaI12_Page100_line_1.jpg: ഓഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉമജജജജജജജജജജഓഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉമജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജഓഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉമജജജജജജജജജജഓഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉമജജജജജജജജജജജജജജജഓഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉ, Log Probability: -1158213.3419966223
Log Probability for the image C:\Users\Dell\Downloads\color_equlsize_jpg\MaI12_Page100_line_2.jpg: -1194407.1805363991
Processed MaI12_Page100_line_2.jpg: ഞഓഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഓഉഓഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉമ    ഉഉഉഉഉഉഉമജജജജഓഉഉഉഉഉഉഉഉഉമജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജജദമ ഉഉഉഉഉഉഉഉഉഉഉഉമ    ഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഓല    ഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഓല         ഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉമ           ഉഉഉഉഉഉഉഉഉഉഉഉഉഉഉ