In [1]:
!gdown --id 1KLn3NLLv2rng2vV_8Ys3Yn53iahv9shd

Downloading...
From (original): https://drive.google.com/uc?id=1KLn3NLLv2rng2vV_8Ys3Yn53iahv9shd
From (redirected): https://drive.google.com/uc?id=1KLn3NLLv2rng2vV_8Ys3Yn53iahv9shd&confirm=t&uuid=2945bca2-e802-4894-8e6b-e83293c73ef3
To: /content/Suicide_Detection.csv
100% 167M/167M [00:04<00:00, 37.0MB/s]


In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, GlobalMaxPooling1D
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# 1. Load Dataset
dataset_path = "Suicide_Detection.csv"
df = pd.read_csv(dataset_path)

# 2. Data Preprocessing
texts = df['text'].astype(str).values
labels = df['class'].values

# Encode labels to integers
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

# Tokenize texts
max_vocab_size = 20000
max_sequence_length = 100
tokenizer = Tokenizer(num_words=max_vocab_size, oov_token="<OOV>")
tokenizer.fit_on_texts(texts)
sequences = tokenizer.texts_to_sequences(texts)
padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length, padding='post')

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, labels_encoded, test_size=0.2, random_state=42)

# 3. Model Building
embedding_dim = 128

input_layer = Input(shape=(max_sequence_length,))
embedding_layer = Embedding(input_dim=max_vocab_size, output_dim=embedding_dim)(input_layer)
# Use unroll=True for LSTM compatibility with TFLite
lstm_layer = LSTM(64, return_sequences=True, unroll=True)(embedding_layer)
global_pooling = GlobalMaxPooling1D()(lstm_layer)
output_layer = Dense(1, activation='sigmoid')(global_pooling)

model = Model(inputs=input_layer, outputs=output_layer)

# 4. Compile Model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# 5. Train Model
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=5,
    batch_size=128
)

# 6. Save Model
model.save("suicide_detection_model.keras")
print("Model saved as suicide_detection_model.keras")

# 7. Convert to TensorFlow Lite
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]  # Apply default optimizations
tflite_model = converter.convert()

# Save the TFLite model
with open("suicide_detection_model.tflite", "wb") as f:
    f.write(tflite_model)

print("Model converted and saved as suicide_detection_model.tflite")

Epoch 1/5
[1m1451/1451[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - accuracy: 0.8862 - loss: 0.2811Epoch 2/5
[1m1451/1451[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 9ms/step - accuracy: 0.9449 - loss: 0.1504 - val_accuracy: 0.9376 - val_loss: 0.1627
Epoch 3/5
[1m1451/1451[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 9ms/step - accuracy: 0.9565 - loss: 0.1220 - val_accuracy: 0.9389 - val_loss: 0.1646
Epoch 4/5
[1m1451/1451[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 9ms/step - accuracy: 0.9633 - loss: 0.1028 - val_accuracy: 0.9387 - val_loss: 0.1723
Epoch 5/5
[1m1451/1451[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 9ms/step - accuracy: 0.9714 - loss: 0.0821 - val_accuracy: 0.9369 - val_loss: 0.1821
Model saved as suicide_detection_model.keras
Saved artifact at '/tmp/tmp0fvdajod'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 100), dtype=tf.float32, name=

In [3]:
def predict_sentence_tflite(interpreter, tokenizer, sentence, max_sequence_length):
    """
    Fungsi untuk memprediksi label dari kalimat input menggunakan model TFLite.

    Args:
        interpreter: TensorFlow Lite interpreter yang sudah dimuat.
        tokenizer: Tokenizer yang digunakan saat pelatihan.
        sentence: Kalimat input (string) yang ingin diprediksi.
        max_sequence_length: Panjang maksimum urutan (sama seperti saat melatih model).

    Returns:
        Prediksi label ('suicide' atau 'non suicide') untuk kalimat input.
    """
    # Preprocess input sentence
    sequence = tokenizer.texts_to_sequences([sentence])
    padded_sequence = pad_sequences(sequence, maxlen=max_sequence_length, padding='post')

    # Set input tensor
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    interpreter.allocate_tensors()
    interpreter.set_tensor(input_details[0]['index'], padded_sequence.astype(np.float32))

    # Run inference
    interpreter.invoke()

    # Get the prediction result
    prediction = interpreter.get_tensor(output_details[0]['index'])[0][0]

    # Interpret prediction
    label = 'suicide' if prediction >= 0.5 else 'non suicide'
    return label

# 9. Load TFLite Model for Inference
interpreter = tf.lite.Interpreter(model_path="suicide_detection_model.tflite")

In [4]:
sentence = "life is wonderful, i like being alive"
predicted_label = predict_sentence_tflite(interpreter, tokenizer, sentence, max_sequence_length)
print(f"Kalimat: \"{sentence}\"")
print(f"Prediksi: {predicted_label}")

Kalimat: "life is wonderful, i like being alive"
Prediksi: non suicide
