In [4]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [25]:
cols = ["solver", "time", "recon", "method", "stm", "stps", "etm", "etps", "rotations"]
df = pd.read_csv("solve_data.csv", names=cols)
df.head()

Unnamed: 0,solver,time,recon,method,stm,stps,etm,etps,rotations
0,Kevin Costello III,6.94,R' L y U' R U' R' U2' R' U R y' U2 R U R' y' U...,CFOP,43,6.2,47,6.77,4
1,Austin Moore,11.15,M' u U F' L U x2 U' M' L F' r' U x U M' U R' U...,Roux,52,,54,,2
2,Austin Moore,11.18,r D L U x R' F' r' F' U' M2' U R2 U2' r' U' r ...,Roux,54,,55,,1
3,Austin Moore,11.3,D U l F' U l' U R' U r' U' r U x U M' U R U' R...,Roux,67,,68,,1
4,Austin Moore,8.65,x' U2 F' U l' U R r U' x R' U M' U R' U R2' U ...,Roux,45,,47,,2


In [26]:
# Extract relevant columns
X_text = df['recon'].astype(str).values  # Move sequences
X_numeric = df[['stm', 'stps', 'etm', 'etps', 'rotations']].values  # Other features
y = df['method'].values  # Target (solving method)


In [27]:
# Tokenize move sequences
tokenizer = Tokenizer(char_level=False)  # Moves as words, not letters
tokenizer.fit_on_texts(X_text)
X_text_seq = tokenizer.texts_to_sequences(X_text)
X_text_padded = pad_sequences(X_text_seq, padding='post', maxlen=100)

In [30]:
# Encode the target labels
le = LabelEncoder()
y_encoded = le.fit_transform(y)

In [31]:
X_train_text, X_test_text, X_train_num, X_test_num, y_train, y_test = train_test_split(
    X_text_padded, X_numeric, y_encoded, test_size=0.2, random_state=42
)

In [32]:
embedding_dim = 64
lstm_units = 128

model = Sequential([
    Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=embedding_dim, input_length=X_text_padded.shape[1]),
    LSTM(lstm_units, return_sequences=True),
    LSTM(lstm_units),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(len(np.unique(y_encoded)), activation='softmax')  # Multi-class classification
])

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(X_train_text, y_train, epochs=10, batch_size=32, validation_data=(X_test_text, y_test))

# Evaluate the model
test_loss, test_acc = model.evaluate(X_test_text, y_test)
print(f"Test Accuracy: {test_acc:.4f}")



Epoch 1/10
[1m234/234[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 71ms/step - accuracy: 0.9268 - loss: 0.4474 - val_accuracy: 0.9369 - val_loss: 0.2951
Epoch 2/10
[1m234/234[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 69ms/step - accuracy: 0.9300 - loss: 0.3311 - val_accuracy: 0.9369 - val_loss: 0.2941
Epoch 3/10
[1m234/234[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 70ms/step - accuracy: 0.9319 - loss: 0.3218 - val_accuracy: 0.9369 - val_loss: 0.2950
Epoch 4/10
[1m234/234[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 69ms/step - accuracy: 0.9355 - loss: 0.3024 - val_accuracy: 0.9369 - val_loss: 0.2958
Epoch 5/10
[1m234/234[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 69ms/step - accuracy: 0.9318 - loss: 0.3169 - val_accuracy: 0.9369 - val_loss: 0.2940
Epoch 6/10
[1m234/234[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 70ms/step - accuracy: 0.9338 - loss: 0.3123 - val_accuracy: 0.9369 - val_loss: 0.2960
Epoch 7/10
[1m2