In [1]:
%%capture
!pip -q install pyngrok
!pip install transformers
!pip install tf-keras

## Load model và define các hàm

In [4]:
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
import tensorflow as tf
import numpy as np

# Hàm convert label sang title
def convert_label_to_title(label):
    convert_dict = {
        0: "SỨC KHỎE",
        1: "GIÁO DỤC",
        2: "THỂ THAO",
        3: "PHÁP LUẬT",
        4: "KHOA HỌC",
        5: "DU LỊCH",
        6: "GIẢI TRÍ",
        7: "KINH DOANH"
    }
    return convert_dict[label]

# Tải model và tokenizer
checkpoint = 'minnehwg/vnexpress-title-classification'
model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint)
tokenizer = AutoTokenizer.from_pretrained('distilbert-base-multilingual-cased')

# Hàm predict
def predict_sentence(model, tokenizer, sentence):
    input_data = tokenizer(sentence, return_tensors='tf', padding=True, truncation=True)
    logits = model(input_data['input_ids'], attention_mask=input_data['attention_mask']).logits
    probabilities = tf.nn.softmax(logits, axis=1)
    predicted_class = tf.argmax(logits, axis=1).numpy()[0]
    highest_probability = probabilities.numpy()[0, predicted_class]
    title = convert_label_to_title(predicted_class)
    return title, probabilities.numpy(), highest_probability

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/961 [00:00<?, ?B/s]

tf_model.h5:   0%|          | 0.00/541M [00:00<?, ?B/s]

All model checkpoint layers were used when initializing TFDistilBertForSequenceClassification.

All the layers of TFDistilBertForSequenceClassification were initialized from the model checkpoint at minnehwg/vnexpress-title-classification.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertForSequenceClassification for predictions without further training.


tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/466 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/996k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.96M [00:00<?, ?B/s]

## Tạo thư mục templates và viết html css
render_templates của flask yêu cầu các file html các thứ chứa trong một thư mục tên là templates, ta cần tạo ra nó trên môi trường colab này.

### Tạo thư mục templates

In [5]:
!mkdir templates

Viết file index.html trong thư mục, đây sẽ là giao diện chính

In [6]:
%%writefile templates/index.html
<!DOCTYPE html>
<html>
<head>
    <title>Predict Article Category</title>
    <style>
        body {
            font-family: Arial, sans-serif;
            margin: 0;
            padding: 0;
            background-color: #f4f4f9;
            color: #333;
            text-align: center;
        }
        h1 {
            background-color: #4CAF50;
            color: white;
            padding: 20px;
        }
        form {
            background: white;
            padding: 20px;
            margin: 20px auto;
            width: 50%;
            border-radius: 8px;
            box-shadow: 0px 4px 6px rgba(0, 0, 0, 0.1);
        }
        label {
            font-weight: bold;
        }
        input[type="text"] {
            width: 80%;
            padding: 10px;
            margin-top: 10px;
            margin-bottom: 20px;
            border: 1px solid #ccc;
            border-radius: 4px;
        }
        button {
            background-color: #4CAF50;
            color: white;
            padding: 10px 20px;
            border: none;
            border-radius: 4px;
            cursor: pointer;
            font-size: 16px;
        }
        button:hover {
            background-color: #45a049;
        }
        .result {
            background: #fff;
            margin: 20px auto;
            padding: 20px;
            width: 50%;
            border-radius: 8px;
            box-shadow: 0px 4px 6px rgba(0, 0, 0, 0.1);
        }
        .result h2 {
            color: #4CAF50;
        }
    </style>
</head>
<body>
    <h1>Article Title Prediction</h1>
    <form action="/predict" method="POST">
        <label for="title">Enter Article Title:</label><br>
        <input type="text" id="title" name="title" placeholder="Type your article title here..." required><br>
        <button type="submit">Predict</button>
    </form>

    {% if prediction %}
    <div class="result">
        <h2>Prediction Result</h2>
        <p><b>Input Title:</b> {{ input_title }}</p>
        <p><b>Predicted Category:</b> {{ prediction }}</p>
        <p><b>Confidence:</b> {{ probability }}</p>
    </div>
    {% endif %}
</body>
</html>


Writing templates/index.html


## Chạy shell này và nhấn vào link bên dưới
nó sẽ direct ra một trang khác, và bấm visit sẽ đến được flask app



In [None]:
from flask import Flask, request, render_template
from pyngrok import ngrok, conf

from google.colab import userdata


app = Flask(__name__)
conf.get_default().auth_token = userdata.get('ngrok')

public_url = ngrok.connect(8888).public_url
print(" * ngrok tunnel \"{}\" -> \"http://127.0.0.1:{}/\"".format(public_url, 5000))

app.config["BASE_URL"] = public_url

@app.route("/", methods=["GET"])
def index():
    return render_template("index.html")

@app.route("/predict", methods=["POST"])
def predict():
    # Lấy tiêu đề từ form
    sentence = request.form.get("title")  # Lấy giá trị người dùng nhập từ input có name="title"

    # Gọi hàm predict_sentence
    title, probabilities, probability = predict_sentence(model, tokenizer, sentence)

    # Trả kết quả về giao diện
    return render_template(
        "index.html",
        input_title=sentence,  # Truyền tiêu đề người dùng nhập
        prediction=title,  # Truyền kết quả dự đoán
        probability=round(probability * 100, 2),  # Hiển thị % confidence (làm tròn 2 chữ số)
    )

if __name__ =='__main__':
    app.run(debug=False, host='0.0.0.0', port=8888, use_reloader=False)

 * ngrok tunnel "https://0873-34-74-253-251.ngrok-free.app" -> "http://127.0.0.1:5000/"
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:8888
 * Running on http://172.28.0.12:8888
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [23/Jan/2025 09:23:42] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [23/Jan/2025 09:23:43] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
INFO:werkzeug:127.0.0.1 - - [23/Jan/2025 09:23:50] "POST /predict HTTP/1.1" 200 -
