# Binary Beats: Predicting Morse code gaps with ML


###### Importing all the necessary libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from nltk.tokenize import word_tokenize
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
import re
import os.path
import random
import copy
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

###### File Path:

there are two methods employed for cleaning the data read from a file located at "D:\Projects\Morse Code\The Origin.rtf."

##### Word separation using 2 methods.

###### method 1: Counting word using space separation
Replaced '/n' the next line tag with space and split the sentences with space.

###### method 2: Word Tokenization
Words are separated using the NLTK library


In [2]:
##### Part 1 to separate the words 

file_path = "D:/Projects/Morse Code1/The Origin.rtf"
f = open(file_path,"r")

# method 1
data = f.read()
words1 = data. replace('\n',' ').split(" ")
len(words1)

# trail 2
# final method
words = word_tokenize(data)
print(len(words1),len(words))
f.close()

2622 2895


The word separation is better with method 2, using word tokenization.

###### Data Cleaning:

###### Method : Removing Escape Sequences

Once the file was loaded, a few issues were encountered while reading the data. It contained tags along with sequence characters. A regular expression pattern `r'\\[a-zA-Z]+|\[a-z]+'` was defined to match sequences of characters that either start with a backslash (\) followed by one or more letters (uppercase or lowercase), or sequences that start with an opening bracket ([) followed by one or more lowercase letters. This was used to remove the unnecessary words and clean the data.

In [1]:
# cleaning of the data:

file_path = "D:/Projects/Morse Code1/The Origin.rtf"
f = open(file_path,"r")

data = f.read()
print('Original Data:')
print(data[100:500],'\n')
pattern = r'\\[a-zA-Z]+|\[a-z]+'
cleaned_data = re.sub(pattern, '',data)
print('Cleaned Data: ')
print(cleaned_data[102 :500])
print(len(cleaned_data))



Original Data:
Calibri;}}
{\*\generator Riched20 10.0.22621}\viewkind4\uc1 
\pard\sa200\sl276\slmult1\f0\fs22\lang9 CHAPTER 1 \par
\par
\par
professor Robert langdon gazed up at the forty-foot-tall dog sitting in the \par
plaza. The animal\rquote s fur was a living carpet of grass and fragrant flowers. \par
\par
I\rquote m trying to love you, he thought. I truly am. \par
\par
Langdon pondered the creature a bit  

Cleaned Data: 
Robert langdon gazed up at the forty-foot-tall dog sitting in the 
plaza. The animal s fur was a living carpet of grass and fragrant flowers. 

I m trying to love you, he thought. I truly am. 

Langdon pondered the creature a bit longer and then continued along a 
suspended walkway, descending a sprawling terrace of stairs whose uneven 
treads were intended to jar the arriving visitor from his u
13682


###### Morse Code Conversion and File Writing:

A text string from cleaned data is converted to Morse code. The text is iterated character by character, and each character is replaced with its corresponding Morse code representation based on the predefined morse_code dictionary. The resulting Morse code is then stored in the variable morse_text.
sole.

In [None]:
# final word tokenization and create a df
morse_text = ''
morse_code = {
    'a': '.-', 'b': '-...', 'c': '-.-.', 'd': '-..', 'e': '.', 'f': '..-.', 'g': '--.', 'h': '....', 'i': '..', 'j': '.---',
    'k': '-.-', 'l': '.-..', 'm': '--', 'n': '-.', 'o': '---', 'p': '.--.', 'q': '--.-', 'r': '.-.', 's': '...', 't': '-',
    'u': '..-', 'v': '...-', 'w': '.--', 'x': '-..-', 'y': '-.--', 'z': '--..',
    
    '0': '-----', '1': '.----', '2': '..---', '3': '...--', '4': '....-', '5': '.....',
    '6': '-....', '7': '--...', '8': '---..', '9': '----.',
    
    '.': '.-.-.-', ',': '--..--', '?': '..--..', '!': '-.-.--', ':': '---...', ';': '-.-.-.', "'": ".----.",
    '"': ".-..-",
    '+': ".-.-.", '-': "-....-", '/': "-..-.", '=': "-...-", '(': "-.--.", ')': "-.--.-", '&': ".-...",
    ';': "-.-.-.", '=': "-...-", '@': ".--.-.", " ":"   "
}
for i in cleaned_data:
    if i.lower() in morse_code:
        morse_text += morse_code[i.lower()]
        
filename = 'Morse code converted.txt'
directory = "D:/Projects/Morse Code/"
file_path = os.path.join(directory, filename)
file = open(file_path, 'w')
file.write(morse_text)
file.close()


print(morse_text)

.----.----..---.......--------..--------.....--...----------   -.-..-.-....-....-...-.-.-..----   -.-..-.-....-....-...-.-.-.   .-...-.-......-....--------   .---------.-.-.------.-.-.-..---..----......---.----....-.----   ..-------------..-----...-.....---------..---..-------.   -.-......-.--.-..-.   .----   .--..-.---..-........---.-.   .-.----.....-.-   .-...--.--.-..----.   --..---...-..   ..-.--.   .--   -.....   ..-.---.-.--.---....-..-.--------....--.-.-...-..   -..-----.   .....--..-.--.   ..-.   -.....   .--..-...---...-.-.-.-   -.....   .--...--.-.-..   ...   ..-...-.-.   .--.-...   .-   .-.......-..-.--.   -.-..-.-..--..-   ---..-.   --..-..-......   .--.-..   ..-..-..---..-..--.-   ..-..-..---.--..-.....-.-.-   ..   --   -.-.-.--..-.--.   ----   .-..---...-.   -.-----..---..--   .....   -....---..---.....-.-.-.-   ..   -.-...-.-..-.--   .---.-.-.-   .-...--.--.-..----.   .--.----.-....-..-..   -.....   -.-..-...--..-.-..   .-   -.....-   .-..----.--...-.   .--.-..   -.....-

This code is effectively reading the Morse code from the specified file, and the content of the Morse code is now available in the morse_code_text variable for further processing or display.

In [None]:
text_file_path = "D:/Projects/Morse Code/Morse code converted.txt"
f = open(text_file_path,'r')
morse_code_text = f.read()
morse_code_text

'.----.----..---.......--------..--------.....--...----------   -.-..-.-....-....-...-.-.-..----   -.-..-.-....-....-...-.-.-.   .-...-.-......-....--------   .---------.-.-.------.-.-.-..---..----......---.----....-.----   ..-------------..-----...-.....---------..---..-------.   -.-......-.--.-..-.   .----   .--..-.---..-........---.-.   .-.----.....-.-   .-...--.--.-..----.   --..---...-..   ..-.--.   .--   -.....   ..-.---.-.--.---....-..-.--------....--.-.-...-..   -..-----.   .....--..-.--.   ..-.   -.....   .--..-...---...-.-.-.-   -.....   .--...--.-.-..   ...   ..-...-.-.   .--.-...   .-   .-.......-..-.--.   -.-..-.-..--..-   ---..-.   --..-..-......   .--.-..   ..-..-..---..-..--.-   ..-..-..---.--..-.....-.-.-   ..   --   -.-.-.--..-.--.   ----   .-..---...-.   -.-----..---..--   .....   -....---..---.....-.-.-.-   ..   -.-...-.-..-.--   .---.-.-.-   .-...--.--.-..----.   .--.----.-....-..-..   -.....   -.-..-...--..-.-..   .-   -.....-   .-..----.--...-.   .--.-..   -.....

###### Introducing Data Corruption for Testing:

The corrupt_data function corrupts a global Morse code text by randomly replacing a specified percentage of its characters with '?', excluding spaces, and returns the corrupted text and a list of the original replaced characters. It prints the count of corrupted characters.

In [None]:
# removing some part of the data

import random

def corrupt_data(percent):
    global morse_code_text
    morse_code_text_final = morse_code_text
    length = len(morse_code_text)
    count = int(length * percent)
    #print(length, count)
    #print(morse_code_text)
    morse_code_text = list(morse_code_text)
    #print(morse_code_text)
    for i in range(count):
        random_number = random.randint(length//3, length - 1)
        if morse_code_text[random_number] == ' ':
            continue
        else:
            morse_code_text[random_number] = '?'
    morse_code_text = ''.join(morse_code_text)
    final_label = []

    for i in range(len(morse_code_text)):
        if morse_code_text[i] == '?':
            final_label.append(morse_code_text_final[i])
    print(morse_code_text.count('?'))
    return morse_code_text, final_label

morse_code_text = morse_text

In [None]:
pip install python-docx


Note: you may need to restart the kernel to use updated packages.




In [None]:
'''corrupt_percent = [0.01, 0.02, 0.03, 0.05,0.1,0.5,0.7]

for percent in corrupt_percent:
    morse_code_text, final_label = corrupt_data(percent)
    print(morse_code_text.count('?'))
    print(morse_code_text)'''

"corrupt_percent = [0.01, 0.02, 0.03, 0.05,0.1,0.5,0.7]\n\nfor percent in corrupt_percent:\n    morse_code_text, final_label = corrupt_data(percent)\n    print(morse_code_text.count('?'))\n    print(morse_code_text)"

In [None]:
morse_code_text

'.----.----..---.......--------..--------.....--...----------   -.-..-.-....-....-...-.-.-..----   -.-..-.-....-....-...-.-.-.   .-...-.-......-....--------   .---------.-.-.------.-.-.-..---..----......---.----....-.----   ..-------------..-----...-.....---------..---..-------.   -.-......-.--.-..-.   .----   .--..-.---..-........---.-.   .-.----.....-.-   .-...--.--.-..----.   --..---...-..   ..-.--.   .--   -.....   ..-.---.-.--.---....-..-.--------....--.-.-...-..   -..-----.   .....--..-.--.   ..-.   -.....   .--..-...---...-.-.-.-   -.....   .--...--.-.-..   ...   ..-...-.-.   .--.-...   .-   .-.......-..-.--.   -.-..-.-..--..-   ---..-.   --..-..-......   .--.-..   ..-..-..---..-..--.-   ..-..-..---.--..-.....-.-.-   ..   --   -.-.-.--..-.--.   ----   .-..---...-.   -.-----..---..--   .....   -....---..---.....-.-.-.-   ..   -.-...-.-..-.--   .---.-.-.-   .-...--.--.-..----.   .--.----.-....-..-..   -.....   -.-..-...--..-.-..   .-   -.....-   .-..----.--...-.   .--.-..   -.....

###### Creating a Hash Table for Morse Code Patterns:

The create_hash_table function constructs a hash table from Morse code sequences, capturing patterns of a specified length by recording their indices, occurrence frequency, and counts of subsequent dots and dashes, aiding in pattern analysis.

In [None]:
#final
def create_hash_table(input_pattern, window_size):
    hash_table = {}
    pattern_list = []

    for i in range(len(input_pattern) - window_size + 1):
        pattern = input_pattern[i:i + window_size]
        pattern_list.append(pattern)

    for idx, pattern in enumerate(pattern_list):
        if pattern in hash_table:
            hash_table[pattern]['indices'].append(idx)
            hash_table[pattern]['count'] += 1
            
            next_char_index = idx + window_size
            if next_char_index < len(input_pattern):
                next_char = input_pattern[next_char_index]
                hash_table[pattern]['dot_count'] += (next_char == '.')
                hash_table[pattern]['dash_count'] += (next_char == '-')
        else:
            hash_table[pattern] = {'indices': [idx], 'count': 1, 'dot_count': 0, 'dash_count': 0}

            next_char_index = idx + window_size
            if next_char_index < len(input_pattern):
                next_char = input_pattern[next_char_index]
                hash_table[pattern]['dot_count'] += (next_char == '.')
                hash_table[pattern]['dash_count'] += (next_char == '-')

    return hash_table


###### Predicting Question Mark Labels Using Context:

The predict_question_mark function uses a hash table to predict replacements for '?' in Morse code by comparing dot and dash counts within a context window, returning a list of predicted characters. It enhances Morse code interpretation by intelligently inferring missing or corrupted characters.

In [None]:
#final
def predict_question_mark(input_pattern, window_size):
    hash_table = create_hash_table(input_pattern, window_size)
    predicted_label = []
    
    for i in range(len(input_pattern)):
        if input_pattern[i] == '?':
            preceding_pattern = input_pattern[i - window_size:i]

            if preceding_pattern in hash_table:
                dot_count = hash_table[preceding_pattern]['dot_count']
                dash_count = hash_table[preceding_pattern]['dash_count']

                predicted_char = '.' if dot_count >= dash_count else '-'
                predicted_label.append(predicted_char)

    return predicted_label


###### Evaluating Accuracy of Predictive Model Across Corrupted Data:


This code evaluates a predictive model's accuracy on corrupted Morse code data, iterating over varying corruption levels (corrupt_percent) and window sizes (z) for hash tables. For each combination, it corrupts data, predicts missing characters, and records accuracy, organizing results into a 2D list for analysis of the model's performance across conditions..

In [None]:
z = [2,3,4,5]
corrupt_percent = [0.01, 0.02, 0.03, 0.05,0.1]
accuracies = []
cf_matrices = []

for percent in corrupt_percent:
    morse_code_text, final_label = corrupt_data(percent)
    #print('len of final label:',len(final_label))
    
    for window_size in z:
        hash_table = create_hash_table(morse_code_text, window_size)
        predicted_label = predict_question_mark(morse_code_text, window_size)
        accuracies.append(round(accuracy_score(final_label, predicted_label), 2))
        
# Reshape accuracies list for plotting
accuracies = [accuracies[i:i + len(z)] for i in range(0, len(accuracies), len(z))]


282
850
1685
3014
5335


In [None]:
accuracies

[[0.62, 0.64, 0.64, 0.64],
 [0.4, 0.4, 0.4, 0.4],
 [0.3, 0.3, 0.3, 0.3],
 [0.26, 0.26, 0.26, 0.26],
 [0.26, 0.26, 0.26, 0.26]]

**Generating Morse Code Frequency Features and Creating DataFrame:**

The code introduces some structure into Morse code sequences by replacing a character with '?', calculates features like dot/dash counts and ratios, and compiles these into a DataFrame for analysis. This structured approach aids in examining Morse code patterns and understanding their quantitative attributes for research or analysis.

In [None]:
def replace_single_random_index_with_question(morse_list):
    morse_list_copy = copy.deepcopy(morse_list)
    for i in range(len(morse_list_copy)):
        random_index = random.randint(0, len(morse_list_copy[i]) - 1)
        morse_list_copy[i] = morse_list_copy[i][:random_index] + '?' + morse_list_copy[i][random_index + 1:]

    return morse_list_copy


def morse_frequency_features(morse_code, og):
    dot_count = morse_code.count('.')
    dash_count = morse_code.count('-')
    sequence_length = len(morse_code)

    dot_ratio = dot_count / sequence_length
    dash_ratio = dash_count / sequence_length

    if dot_count + dash_count != 0:
        average_length = sequence_length / (dot_count + dash_count)
    else:
        average_length = 0

    total_length = sequence_length

    index_of_question_mark = morse_code.find('?')
    
    if 0 <= index_of_question_mark < len(og):
        sign_at_same_index_in_og = og[index_of_question_mark]
    else:
        sign_at_same_index_in_og = None

    return [dot_count, dash_count, dot_ratio, dash_ratio, average_length, total_length, index_of_question_mark, sign_at_same_index_in_og]

column_names = ['Dot_Count', 'Dash_Count', 'Dot_Ratio', 'Dash_Ratio', 'Average_Length', 'Total_Length', 'Index_of_?', 'Sign_at_Same_Index']
df = pd.DataFrame(columns=column_names)

morse_list = morse_text.split()
modified_morse_list = replace_single_random_index_with_question(morse_list)

for i in range(len(morse_list)):
    numerical_features = morse_frequency_features(modified_morse_list[i], morse_list[i])
    df = df.append(pd.Series(numerical_features, index=column_names), ignore_index=True)

# Print the DataFrame
df


Unnamed: 0,Dot_Count,Dash_Count,Dot_Ratio,Dash_Ratio,Average_Length,Total_Length,Index_of_?,Sign_at_Same_Index
0,20,39,0.333333,0.650000,1.016949,60,18,.
1,18,13,0.562500,0.406250,1.032258,32,14,.
2,17,9,0.629630,0.333333,1.038462,27,20,.
3,15,11,0.555556,0.407407,1.038462,27,1,-
4,22,39,0.354839,0.629032,1.016393,62,39,.
...,...,...,...,...,...,...,...,...
2249,6,6,0.461538,0.461538,1.083333,13,2,-
2250,3,3,0.428571,0.428571,1.166667,7,5,-
2251,1,0,0.500000,0.000000,2.000000,2,1,-
2252,10,11,0.454545,0.500000,1.047619,22,8,-


**Training and Evaluating an SVM Classifier for Morse Code Prediction:**

A linear kernel SVM classifier is trained on a preprocessed dataset split into 75% training and 25% testing sets, using frequency features from Morse code sequences to predict original signs. The model's accuracy, indicating its effectiveness in decoding Morse code patterns, is evaluated and printed.

In [None]:
X = df.drop('Sign_at_Same_Index', axis=1)
y = df['Sign_at_Same_Index']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.75, random_state=42)

# Create an SVM classifier
svm_classifier = SVC(kernel='linear')

# Train the classifier on the training set
svm_classifier.fit(X_train, y_train)

# Make predictions on the test set
predictions = svm_classifier.predict(X_test)

# Evaluate the accuracy of the model
accuracy = accuracy_score(y_test, predictions)
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 60.97%


**Training and Evaluating a Logistic Regression Model for Morse Code Prediction:**

Using Logistic Regression, this code trains a model on 75% of Morse code data for sequence prediction, evaluates its accuracy, and compares its predictive performance with other algorithms. It highlights the model's effectiveness in recognizing Morse code patterns through accuracy metrics.

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.75, random_state=42)

# Create a Logistic Regression model
logistic_regression_model = LogisticRegression()

# Train the model on the training set
logistic_regression_model.fit(X_train, y_train)

# Make predictions on the test set
predictions = logistic_regression_model.predict(X_test)

# Evaluate the accuracy of the model
accuracy = accuracy_score(y_test, predictions)
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 60.85%


In [None]:
morse_code_text = morse_text
morse_code_text, final_label = corrupt_data(0.01)

294


In [None]:
morse_code_text

'.----.----..---.......--------..--------.....--...----------   -.-..-.-....-....-...-.-.-..----   -.-..-.-....-....-...-.-.-.   .-...-.-......-....--------   .---------.-.-.------.-.-.-..---..----......---.----....-.----   ..-------------..-----...-.....---------..---..-------.   -.-......-.--.-..-.   .----   .--..-.---..-........---.-.   .-.----.....-.-   .-...--.--.-..----.   --..---...-..   ..-.--.   .--   -.....   ..-.---.-.--.---....-..-.--------....--.-.-...-..   -..-----.   .....--..-.--.   ..-.   -.....   .--..-...---...-.-.-.-   -.....   .--...--.-.-..   ...   ..-...-.-.   .--.-...   .-   .-.......-..-.--.   -.-..-.-..--..-   ---..-.   --..-..-......   .--.-..   ..-..-..---..-..--.-   ..-..-..---.--..-.....-.-.-   ..   --   -.-.-.--..-.--.   ----   .-..---...-.   -.-----..---..--   .....   -....---..---.....-.-.-.-   ..   -.-...-.-..-.--   .---.-.-.-   .-...--.--.-..----.   .--.----.-....-..-..   -.....   -.-..-...--..-.-..   .-   -.....-   .-..----.--...-.   .--.-..   -.....

In [None]:
#pip install --upgrade tensorflow

In [None]:
y

0       .
1       .
2       .
3       -
4       .
       ..
2249    -
2250    -
2251    -
2252    -
2253    -
Name: Sign_at_Same_Index, Length: 2254, dtype: object

In [None]:
modified_morse_list

['.----.----..---...?...--------..--------.....--...----------',
 '-.-..-.-....-.?..-...-.-.-..----',
 '-.-..-.-....-....-..?-.-.-.',
 '.?...-.-......-....--------',
 '.---------.-.-.------.-.-.-..---..----.?....---.----....-.----',
 '..-------------..-----?..-.....---------..---..-------.',
 '-?-......-.--.-..-.',
 '.?---',
 '.--.?-.---..-........---.-.',
 '.?.----.....-.-',
 '.-...--.--.-.?----.',
 '--..?--...-..',
 '..-.--?',
 '.?-',
 '-...?.',
 '..-.?--.-.--.---....-..-.--------....--.-.-...-..',
 '-.?-----.',
 '?....--..-.--.',
 '..-?',
 '-....?',
 '.--..-.?.---...-.-.-.-',
 '-?....',
 '.-?...--.-.-..',
 '..?',
 '..-.?.-.-.',
 '.--?-...',
 '.?',
 '.-...?...-..-.--.',
 '-.-..-.?..--..-',
 '-?-..-.',
 '--..-..-.....?',
 '.?-.-..',
 '..-..-..---..-..--.?',
 '..-..-..---.--..-.....-.-.?',
 '?.',
 '?-',
 '-?-.-.--..-.--.',
 '?---',
 '.-..---...-?',
 '-.-----?.---..--',
 '..?..',
 '-....---..---.....-.?.-.-',
 '.?',
 '-.-...-.-..-.?-',
 '.---?-.-.-',
 '.-...--.--.-..-?--.',
 '.--.----.-

In [None]:
df.columns

Index(['Dot_Count', 'Dash_Count', 'Dot_Ratio', 'Dash_Ratio', 'Average_Length',
       'Total_Length', 'Index_of_?', 'Sign_at_Same_Index'],
      dtype='object')

In [None]:
modified_morse_list[:3], morse_list[:3]

(['.----.----..---...?...--------..--------.....--...----------',
  '-.-..-.-....-.?..-...-.-.-..----',
  '-.-..-.-....-....-..?-.-.-.'],
 ['.----.----..---.......--------..--------.....--...----------',
  '-.-..-.-....-....-...-.-.-..----',
  '-.-..-.-....-....-...-.-.-.'])

In [None]:
df['Sign_at_Same_Index'][:3]

0    .
1    .
2    .
Name: Sign_at_Same_Index, dtype: object

In [None]:
lstm_df = pd.DataFrame({'Modified_Morse_list':modified_morse_list, 'y': y})
lstm_df.head()

Unnamed: 0,Modified_Morse_list,y
0,.----.----..---...?...--------..--------.....-...,.
1,-.-..-.-....-.?..-...-.-.-..----,.
2,-.-..-.-....-....-..?-.-.-.,.
3,.?...-.-......-....--------,-
4,.---------.-.-.------.-.-.-..---..----.?....--...,.


**Training an RNN for Morse Code Label Prediction:**

he process of developing and training a Recurrent Neural Network (RNN) with TensorFlow's Keras API for Morse code label prediction, involving data preprocessing (like encoding and padding), model construction (embedding, LSTM, and dense layers), and training (using Adam optimizer and binary crossentropy loss) over 10 epochs. The dataset is divided into training (80%) and testing sets, followed by model evaluation on the test set using accuracy metrics. The workflow encompasses the entire pipeline from preprocessing to evaluation, demonstrating the model's predictive performance on Morse code sequences.

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import numpy as np

char_to_num = {'-': 0, '.': 1, '?': 2, ' ': 3} 
# Assuming `lstm_df` is your DataFrame and already loaded
X = lstm_df['Modified_Morse_list'].values
y = lstm_df['y'].values

# Step 1: Data Preprocessing
## Encoding labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

## Encoding Morse code sequences as integers
# This is a simplified example. You may need a custom function to properly encode Morse sequences.
X_encoded = [list(map(ord, list(sequence))) for sequence in X]  # Example encoding: converting characters to their ASCII values
X_padded = pad_sequences(X_encoded, padding='post')  # Padding sequences

# Splitting dataset
X_train, X_test, y_train, y_test = train_test_split(X_padded, y_encoded, test_size=0.2, random_state=42)

# Step 2: Model Construction
model = Sequential([
    Embedding(input_dim=256, output_dim=32),  # Adjust input_dim based on your encoding
    LSTM(64),
    Dense(1, activation='sigmoid')
])

# Step 3: Training
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

# Step 4: Evaluation
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_acc}")


Epoch 1/10
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 18ms/step - accuracy: 0.5644 - loss: 0.6800 - val_accuracy: 0.5873 - val_loss: 0.6736
Epoch 2/10
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.6103 - loss: 0.6695 - val_accuracy: 0.5873 - val_loss: 0.6737
Epoch 3/10
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.6233 - loss: 0.6660 - val_accuracy: 0.5873 - val_loss: 0.6736
Epoch 4/10
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.6036 - loss: 0.6720 - val_accuracy: 0.5873 - val_loss: 0.6736
Epoch 5/10
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.6014 - loss: 0.6730 - val_accuracy: 0.5873 - val_loss: 0.6743
Epoch 6/10
[1m46/46[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - accuracy: 0.6122 - loss: 0.6685 - val_accuracy: 0.5873 - val_loss: 0.6735
Epoch 7/10
[1m46/46[0m [32m━━━━