# Importa bibliotecas

In [None]:
import pandas as pd
import glob
import numpy as np
from keras.models import Sequential
from keras.layers import SimpleRNN, Dense, Masking
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from keras.layers import Dropout
from sklearn.preprocessing import OneHotEncoder
from keras.callbacks import EarlyStopping
from keras.layers import Input
from keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split

# Preparando dados

In [None]:
csv_files = glob.glob('/mnt/d/dados_surdos/CSVs/dados_pessoa2_*.csv')

dfs = []
for csv_file in csv_files:
    df = pd.read_csv(csv_file)
    dfs.append(df)

df = pd.concat(dfs, ignore_index=True)
df

In [None]:
# Separa features e label
grouped = df.groupby(['word', 'repetition'])
X_raw = []
y_raw = []

In [None]:
# Normaliza features com valores entre 0 e 1
scaler = MinMaxScaler()
landmark_cols = list(df.columns[:63])
df[landmark_cols] = scaler.fit_transform(df[landmark_cols])

In [None]:
# prepara lista com frames agrupadas por video
for (word, rep), group in grouped:
    sequence = group[landmark_cols].values
    X_raw.append(sequence)
    y_raw.append(word)

In [None]:
# Separa em treino, teste e validação de forma estratificada

X_temp, X_test, y_temp, y_test = train_test_split(
    X_raw, y_raw, test_size=0.3, stratify=y_raw, random_state=42)

X_train, X_val, y_train, y_val = train_test_split(
    X_temp, y_temp, test_size=0.285, stratify=y_temp, random_state=42)

In [None]:
print(f"Treino - {(len(X_train)/len(X_raw))*100}%")
print(f"Teste - {(len(X_test)/len(X_raw))*100}%")
print(f"Validacao - {(len(X_val)/len(X_raw))*100}%")

X_raw é uma lista de sequências de frames, onde:

- cada item da lista representa um vídeo.

- cada vídeo é representado como um array 2D de shape (T, 63), onde:

    - T = número de frames (time steps) do vídeo (varia de vídeo para vídeo)

    - 63 = número de features por frame (21 pontos da mão × 3 coordenadas)

In [None]:
# Padding
max_len = max(len(seq) for seq in X_train) # define tamanho maximo das sequencias

X_train = pad_sequences(X_train, maxlen=max_len, padding='post', dtype='float32')
X_val = pad_sequences(X_val,   maxlen=max_len, padding='post', dtype='float32')
X_test = pad_sequences(X_test,  maxlen=max_len, padding='post', dtype='float32')

X_train.shape,X_val.shape,X_test.shape

In [None]:
X_train.shape

In [None]:
# Encode das labels - OneHotEncoder
label_encoder = OneHotEncoder(sparse_output=False)
y_train = label_encoder.fit_transform(np.array(y_train).reshape(-1, 1))
y_val = label_encoder.transform(np.array(y_val).reshape(-1, 1))
y_test = label_encoder.transform(np.array(y_test).reshape(-1, 1))

y_train.shape,y_test.shape,y_val.shape

# Criando o modelo

In [None]:
num_classes = 26 # palavras/labels
batch_size = 32

In [None]:
mask = Masking(mask_value=0.0, input_shape=(max_len, 63))
SRNN = SimpleRNN(128)
dense = Dense(num_classes, activation='softmax')

model = Sequential()
model.add(mask)
model.add(SRNN)
model.add(dense)

model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)
model.summary()

# Treinamento

In [None]:
epochs = 50

history = model.fit(
    X_train, y_train,
    epochs=epochs,
    batch_size=32,
    validation_data=(X_val, y_val),
    verbose=1
)

# Avaliação do modelo

In [None]:
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"\naccuracy: {test_accuracy:.4f}")

In [None]:
# historico de treinamento
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')

plt.tight_layout()
plt.show()