In [1]:
import tensorflow as tf
import numpy as np
import xml.etree.ElementTree as ET

In [2]:
def preprocess_strokes(strokes, max_length=100):
    """
    Normalize and pad strokes to ensure a consistent input shape.
    - Normalizes (X, Y) coordinates.
    - Pads/truncates strokes to max_length.
    """
    all_points = np.concatenate(strokes, axis=0) if strokes else np.array([[0, 0]])
    
    # Normalize X and Y to [0,1] range
    min_vals = np.min(all_points, axis=0)
    max_vals = np.max(all_points, axis=0)
    norm_strokes = [(s - min_vals) / (max_vals - min_vals + 1e-5) for s in strokes]
    
    # Flatten strokes and pad/truncate to fixed size
    flat_strokes = np.concatenate(norm_strokes, axis=0)[:max_length]
    pad_length = max_length - len(flat_strokes)
    padded_strokes = np.pad(flat_strokes, ((0, pad_length), (0, 0)), mode='constant')
    
    return padded_strokes
import os
def load_dataset(inkml_dirs, num_files=1000, max_length=100):
    samples = []
    labels = []
    charset = set()

    for root_dir in inkml_dirs:
        if num_files == 'None' :
            files = [f for f in os.listdir(root_dir) if f.endswith('.inkml')]
        else:
            files = [f for f in os.listdir(root_dir) if f.endswith('.inkml')][:num_files]  # Limit files
            
        for file in files:
            path = os.path.join(root_dir, file)
            strokes, label = parse_inkml(path)
            
            if strokes and label:
                processed_strokes = preprocess_strokes(strokes, max_length)
                samples.append(processed_strokes)
                labels.append(label)
                charset.update(label)

    # Create character mappings
    char2idx = {c: i + 1 for i, c in enumerate(sorted(charset))}
    char2idx['<pad>'] = 0
    idx2char = {v: k for k, v in char2idx.items()}

    return samples, labels, char2idx, idx2char

In [3]:
model1 = tf.keras.models.load_model('exp2.h5')
model0 = tf.keras.models.load_model('final_model.h5')
model2 = tf.keras.models.load_model('exp3.h5')



In [4]:
def parse_inkml(file_path):
    """
    Parse an InkML file and return:
    - strokes: a list of numpy arrays (each is a stroke of shape [n_points, 2])
    - label: the LaTeX label (ground truth) if available
    """
    tree = ET.parse(file_path)
    root = tree.getroot()
    
    # Handling namespaces (InkML files typically use a default InkML namespace)
    ns = {'ink': 'http://www.w3.org/2003/InkML'}
    
    # Get annotation (if present)
    label = None
    for annotation in root.findall('ink:annotation', ns):
        # Choose normalizedLabel if available; fallback to label
        if annotation.get('type') == 'normalizedLabel':
            label = annotation.text.strip()
            break
        elif annotation.get('type') == 'label':
            label = annotation.text.strip()
    
    strokes = []
    # Each <trace> element contains a stroke
    for trace in root.findall('ink:trace', ns):
        trace_data = trace.text.strip()
        points = []
        # InkML usually separates points by commas and coordinates by space
        for point_str in trace_data.split(','):
            # Remove extra spaces and split by whitespace
            coords = point_str.strip().split()
            if len(coords) >= 2:
                x, y = float(coords[0]), float(coords[1])
                points.append([x, y])
        if points:
            strokes.append(np.array(points))
    
    return strokes, label

In [5]:
num_test_files = 15000   # Get value from slider
test_samples, test_labels, test_char2idx, test_idx2char = load_dataset(['./data/mathwriting-2024/mathwriting-2024/test'], num_files=num_test_files)
print(f"Loaded {len(test_samples)} samples")

Loaded 7644 samples


In [6]:
# Convert strokes (samples) into a padded sequence format
import tensorflow as tf

def preprocess(samples, labels, char2idx):
    # max_seq_length = max(len(s) for s in samples)
    target_seq_length = 110  # Find the longest stroke sequence
    padded_samples = np.array([
        np.pad(s, ((0, max(0, target_seq_length - len(s))), (0, 0)), mode='constant')[:target_seq_length]
        for s in samples
    ])
    
    # Ensure padded_samples has shape (batch_size, 110, features)
    padded_samples = np.pad(padded_samples, ((0, 0), (0, 0), (0, 1)), mode='constant')

    # Convert labels into numerical format using char2idx
    numerical_labels = [[char2idx[c] for c in label] for label in labels]

    # Pad label sequences
    max_label_length = max(
    max(len(label) for label in labels),
    max(len(label) for label in test_labels))
    # Pad label sequences
    padded_labels = tf.keras.preprocessing.sequence.pad_sequences(
        numerical_labels, maxlen=max_label_length, padding='post'
    )
    # padded_labels = tf.keras.preprocessing.sequence.pad_sequences(numerical_labels, padding='post')

    # Convert to TensorFlow datasets for efficient loading
    # BATCH_SIZE = 32           

    # train_dataset = tf.data.Dataset.from_tensor_slices((padded_samples, padded_labels))
    # train_dataset = train_dataset.shuffle(10).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
    # return train_dataset
    return padded_samples, padded_labels


In [7]:
test_X, test_y = preprocess(test_samples, test_labels, test_char2idx)

In [8]:
print(test_X.shape,test_y.shape)

(7644, 110, 3) (7644, 173)


In [9]:
import numpy as np
start_token = 0  # Define the start token (adjust based on your dataset)
test_decoder_inputs = np.pad(test_y[:, :-1], ((0, 0), (1, 0)), constant_values=start_token)


# convert an image to inkml format

In [12]:
import cv2
import numpy as np
import xml.etree.ElementTree as ET

def extract_strokes_with_timestamps(image_path):
    # Load image in grayscale
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    
    # Find contours (these represent strokes)
    contours, _ = cv2.findContours(img, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)
    
    strokes = []
    timestamp = 0  # Initialize timestamp
    
    # Get image dimensions for normalization
    h, w = img.shape

    for contour in contours:
        stroke = []
        for point in contour:
            x, y = int(point[0][0]), int(point[0][1])
            
            # Normalize x and y coordinates to [0, 1]
            norm_x = round(x / w, 8)
            norm_y = round(y / h, 8)
            
            stroke.append((norm_x, norm_y, timestamp))
            timestamp += 1  # Increment timestamp for each point
        strokes.append(stroke)
    
    return strokes

def strokes_to_inkml(strokes, output_file="output.inkml"):
    root = ET.Element("ink", xmlns="http://www.w3.org/2003/InkML")
    trace_group = ET.SubElement(root, "traceGroup")
    
    for i, stroke in enumerate(strokes):
        trace = ET.SubElement(trace_group, "trace", id=str(i))
        trace.text = " ".join(f"{x:.4f},{y:.4f},{t}" for x, y, t in stroke)
    
    tree = ET.ElementTree(root)
    tree.write(output_file, encoding="utf-8", xml_declaration=True)
    print(f"InkML saved to {output_file}")


In [13]:
# Example usage
image_path = "C:/Users/HP/Pictures/test.png"  # Replace with your image path
strokes = extract_strokes_with_timestamps(image_path)
print(strokes)  # Output: [(x1, y1, t1)
# strokes_to_inkml(strokes)

[[(0.94021739, 0.40111421, 0), (0.94202899, 0.39832869, 1), (0.94384058, 0.39832869, 2), (0.94565217, 0.39832869, 3), (0.94746377, 0.39832869, 4), (0.94927536, 0.39832869, 5), (0.95108696, 0.39832869, 6), (0.95289855, 0.40111421, 7), (0.95289855, 0.40389972, 8), (0.95108696, 0.40668524, 9), (0.94927536, 0.40668524, 10), (0.94746377, 0.40668524, 11), (0.94565217, 0.40668524, 12), (0.94384058, 0.40668524, 13), (0.94202899, 0.40947075, 14), (0.94021739, 0.40947075, 15), (0.9384058, 0.40947075, 16), (0.9365942, 0.41225627, 17), (0.93478261, 0.41225627, 18), (0.93297101, 0.41225627, 19), (0.93115942, 0.41225627, 20), (0.92934783, 0.41225627, 21), (0.92753623, 0.41225627, 22), (0.92572464, 0.41225627, 23), (0.92391304, 0.41225627, 24), (0.92210145, 0.41225627, 25), (0.92028986, 0.41225627, 26), (0.91847826, 0.41225627, 27), (0.91666667, 0.41504178, 28), (0.91485507, 0.41504178, 29), (0.91304348, 0.41504178, 30), (0.91123188, 0.4178273, 31), (0.90942029, 0.4178273, 32), (0.9076087, 0.4178273,

In [14]:
def preprocess_extracted_strokes(strokes, target_seq_length=110, char2idx=None):
    """
    Process extracted strokes from an image to match the format used during training.
    
    Args:
        strokes: List of strokes where each stroke is a list of (x, y, timestamp) tuples
        target_seq_length: The fixed sequence length expected by the model
        char2idx: Character to index mapping (only needed for labels)
        
    Returns:
        Processed strokes in the expected model input format
    """
    # Flatten strokes into a single sequence of (x, y, timestamp) points
    flattened_strokes = []
    for stroke in strokes:
        flattened_strokes.extend(stroke)
    
    # Convert to numpy array and extract features
    stroke_array = np.array(flattened_strokes)
    
    # If the stroke array is empty, create a default one
    if len(stroke_array) == 0:
        stroke_array = np.zeros((1, 3))
    
    # Take only the first target_seq_length points or pad if needed
    if len(stroke_array) > target_seq_length:
        processed_strokes = stroke_array[:target_seq_length]
    else:
        # Pad with zeros to reach target_seq_length
        padding = np.zeros((target_seq_length - len(stroke_array), 3))
        processed_strokes = np.vstack([stroke_array, padding])
    
    # Add the extra feature dimension to match your training format
    processed_strokes = np.pad(processed_strokes, ((0, 0), (0, 1)), mode='constant')
    
    # Reshape to match the expected batch input shape
    processed_strokes = np.expand_dims(processed_strokes, axis=0)
    
    return processed_strokes

# Example usage with your extracted strokes
def predict_from_image(image_path, model, idx2char, target_seq_length=110, max_time_steps=128):
    """
    Extract strokes from an image and make a prediction using your trained model
    
    Args:
        image_path: Path to the image
        model: Your trained model
        char2idx: Character to index mapping
        idx2char: Index to character mapping
        target_seq_length: The fixed sequence length expected by the model
        max_time_steps: Maximum time steps for decoder input
        
    Returns:
        Predicted text
    """
    # Extract strokes from image
    strokes = extract_strokes_with_timestamps(image_path)
    
    # Preprocess the strokes to match model's expected input format
    processed_strokes = preprocess_extracted_strokes(strokes, target_seq_length)
    
    # Create decoder input (assuming you're using the same approach as in training)
    start_token = 0
    decoder_input = np.array([[start_token]])  # Initial token to start decoding
    
    # Create padding to match MAX_TIME_STEPS if necessary
    if max_time_steps > 1:
        padding = np.zeros((1, max_time_steps - 1))
        decoder_input = np.hstack([decoder_input, padding])
    
    # Make prediction
    prediction = model.predict([processed_strokes, decoder_input])
    
    # Convert prediction to text
    # This depends on your model output format, but typically:
    predicted_indices = np.argmax(prediction, axis=-1)[0]
    
    # Stop at end token or padding
    text = ""
    for idx in predicted_indices:
        if idx == 0 or idx not in idx2char:  # Assuming 0 is padding or end token
            break
        text += idx2char[idx]
    
    return text


In [32]:
import numpy as np

def preprocess_extracted_strokes(strokes, target_seq_length=110):
    """
    Process extracted strokes from an image to match the format used during training.
    
    Args:
        strokes: List of strokes where each stroke is a list of (x, y, timestamp) tuples
        target_seq_length: The fixed sequence length expected by the model
        
    Returns:
        Processed strokes in the expected model input format
    """
    # Flatten strokes into a single sequence of (x, y, timestamp) points
    flattened_strokes = []
    for stroke in strokes:
        flattened_strokes.extend(stroke)
    
    # Convert to numpy array and extract features
    if len(flattened_strokes) == 0:
        # Handle empty strokes
        stroke_array = np.zeros((1, 3))
    else:
        stroke_array = np.array(flattened_strokes)
    
    # Extract x, y coordinates and timestamps
    # Normalize coordinates if needed (similar to training preprocessing)
    # You might need to adjust this based on your original preprocessing
    
    # Take only the first target_seq_length points or pad if needed
    if len(stroke_array) > target_seq_length:
        processed_strokes = stroke_array[:target_seq_length, :]
    else:
        # Pad with zeros to reach target_seq_length
        padding = np.zeros((target_seq_length - len(stroke_array), 3))
        processed_strokes = np.vstack([stroke_array, padding])
    
    # Important: Create shape (batch_size, seq_length, 3) 
    # The error says model expects shape=(None, None, 3), so no need for extra feature dimension
    processed_strokes = processed_strokes[:, :3]  # Ensure we only have 3 features
    
    # Reshape to match the expected batch input shape
    processed_strokes = np.expand_dims(processed_strokes, axis=0)
    
    return processed_strokes

import numpy as np

def predict_from_image(image_path, model, idx2char, target_seq_length=110, max_time_steps=128):
    """
    Extract strokes from an image and make a prediction using your trained model.
    
    Args:
        image_path: Path to the image
        model: Your trained model
        idx2char: Index-to-character mapping
        target_seq_length: The fixed sequence length expected by the model
        max_time_steps: Maximum time steps for decoder input
        
    Returns:
        Predicted text
    """
    
    # Extract strokes from the image
    strokes = extract_strokes_with_timestamps(image_path)
    
    # Preprocess the strokes to match model's expected input format
    processed_strokes = preprocess_extracted_strokes(strokes, target_seq_length)
    
    # Print shapes for debugging
    print(f"Processed strokes shape: {processed_strokes.shape}")
    
    # Create decoder input with correct initialization
    start_token = 0
    decoder_input = np.full((1, max_time_steps), start_token, dtype=np.int32)  # Use np.full() for proper initialization
    
    print(f"Decoder input shape: {decoder_input.shape}")
    
    # Make prediction
    try:
        prediction = model.predict([processed_strokes, decoder_input])
        print(f"Prediction shape: {prediction.shape}")
    except Exception as e:
        print(f"Error during prediction: {e}")
        # Debug model input shapes
        for i, inp in enumerate(model.inputs):
            print(f"Model input {i} shape: {inp.shape}")
        raise
    
    return prediction
    
    # # Convert prediction to text
    # predicted_indices = np.argmax(prediction, axis=-1)[0]
    
    # # Stop at end token or padding
    # text = ""
    # for idx in predicted_indices:
    #     if idx == 0 or idx not in idx2char:  # Assuming 0 is padding or end token
    #         break
    #     text += idx2char[idx]
    
    # return text

def full_prediction_workflow(image_path, model,idx2char):
    """
    Complete workflow from image to text prediction
    """
    # Extract input shapes from model for debugging
    print("Model expects:")
    for i, inp in enumerate(model.inputs):
        print(f"Input {i} shape: {inp.shape}")
    
    # Perform the prediction
    try:
        prediction = predict_from_image(image_path, model,idx2char=idx2char)
        print(f"Predicted text: {prediction}")
        return prediction
    
    except Exception as e:
        print(f"Error in prediction workflow: {e}")
        
        # # Let's try a more flexible approach by creating inputs based on model's expected shapes
        # try:
        #     print("Attempting alternative approach...")
        #     # Create dummy inputs with shapes that match the model's expectations
        #     inputs = []
        #     for i, inp in enumerate(model.inputs):
        #         shape = [1 if dim is None else dim for dim in inp.shape.as_list()]
        #         if i == 0:  # Assume first input is strokes
        #             # Extract strokes and reshape according to expected dimensions
        #             strokes = extract_strokes_with_timestamps(image_path)
        #             processed = preprocess_extracted_strokes(strokes)
        #             # Reshape if necessary
        #             if len(shape) > 1 and shape[1] is None:
        #                 shape[1] = processed.shape[1]
        #             inputs.append(processed.reshape(shape))
        #         else:
        #             # For other inputs (like decoder inputs), create zeros with expected shape
        #             dummy_input = np.zeros(shape)
        #             if i == 1:  # Assume second input is decoder input
        #                 dummy_input[0, 0] = 0  # Start token
        #             inputs.append(dummy_input)
        #         print(f"Created input {i} with shape {inputs[-1].shape}")
            
        #     # Try prediction with these inputs
        #     prediction = model.predict(inputs)
        #     print(f"Alternative prediction shape: {prediction.shape}")
            
        # #     # Handle result
        # #     idx2char = {idx: char for char, idx in char2idx.items()}
        # #     predicted_indices = np.argmax(prediction, axis=-1)[0]
        # #     text = ""
        # #     for idx in predicted_indices:
        # #         if idx == 0 or idx not in idx2char:
        # #             break
        # #         text += idx2char[idx]
        # #     print(f"Alternative predicted text: {text}")
        # #     return text
        # except Exception as e2:
        #     print(f"Alternative approach failed: {e2}")
        #     raise e

In [22]:
import json
file_path = "EXP1_char2idx.json"
with open(file_path, "r") as file:
    loaded_dict = json.load(file)

print("Loaded Dictionary:", loaded_dict)

Loaded Dictionary: {'1': ' ', '2': '!', '3': '#', '4': '%', '5': '&', '6': '(', '7': ')', '8': '*', '9': '+', '10': ',', '11': '-', '12': '.', '13': '/', '14': '0', '15': '1', '16': '2', '17': '3', '18': '4', '19': '5', '20': '6', '21': '7', '22': '8', '23': '9', '24': ':', '25': ';', '26': '<', '27': '=', '28': '>', '29': '?', '30': 'A', '31': 'B', '32': 'C', '33': 'D', '34': 'E', '35': 'F', '36': 'G', '37': 'H', '38': 'I', '39': 'J', '40': 'K', '41': 'L', '42': 'M', '43': 'N', '44': 'O', '45': 'P', '46': 'Q', '47': 'R', '48': 'S', '49': 'T', '50': 'U', '51': 'V', '52': 'W', '53': 'X', '54': 'Y', '55': 'Z', '56': '[', '57': '\\', '58': ']', '59': '^', '60': '_', '61': 'a', '62': 'b', '63': 'c', '64': 'd', '65': 'e', '66': 'f', '67': 'g', '68': 'h', '69': 'i', '70': 'j', '71': 'k', '72': 'l', '73': 'm', '74': 'n', '75': 'o', '76': 'p', '77': 'q', '78': 'r', '79': 's', '80': 't', '81': 'u', '82': 'v', '83': 'w', '84': 'x', '85': 'y', '86': 'z', '87': '{', '88': '|', '89': '}', '0': '<pa

In [35]:
# Usage example
image_path = "C:/Users/HP/Pictures/test.png"
prediction = full_prediction_workflow(image_path, model2, loaded_dict)

Model expects:
Input 0 shape: (None, None, 3)
Input 1 shape: (None, None)
Processed strokes shape: (1, 110, 3)
Decoder input shape: (1, 128)
Prediction shape: (1, 128, 100)
Predicted text: [[[0.00611791 0.02234418 0.00490657 ... 0.00455681 0.00505696 0.00514279]
  [0.00611791 0.02234418 0.00490657 ... 0.00455681 0.00505696 0.00514279]
  [0.00611791 0.02234418 0.00490657 ... 0.00455681 0.00505696 0.00514279]
  ...
  [0.00611791 0.02234418 0.00490657 ... 0.00455681 0.00505696 0.00514279]
  [0.00611791 0.02234418 0.00490657 ... 0.00455681 0.00505696 0.00514279]
  [0.00611791 0.02234418 0.00490657 ... 0.00455681 0.00505696 0.00514279]]]


In [36]:
predicted_token_ids = np.argmax(prediction, axis=-1) 
print(predicted_token_ids)

[[57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57
  57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57
  57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57
  57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57
  57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57 57
  57 57 57 57 57 57 57 57]]


In [19]:
def convert_predictions_to_latex(predictions, idx2char):
    """
    Convert a sequence of predicted token IDs into a LaTeX equation.

    Args:
        predictions (list or np.array): List of predicted token IDs.
        idx2char (dict): Dictionary mapping token IDs to LaTeX tokens.

    Returns:
        str: LaTeX formatted equation.
    """
    latex_tokens = [idx2char.get(token, "") for token in predictions if token in idx2char]
    latex_expression = " ".join(latex_tokens)  # Join tokens with space for readability
    return f"\\[{latex_expression}\\]"  # Wrap with LaTeX math mode delimiters

In [20]:
latex_equation = convert_predictions_to_latex(predicted_token_ids, idx2char=loaded_dict)
latex_equation

TypeError: unhashable type: 'numpy.ndarray'

# Benchmark testing

: 