In [None]:
import gradio as gr
import torch
import re
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
device_for_summary = 0 if torch.cuda.is_available() else -1


In [None]:
MODEL_NAME = "vinai/bartpho-syllable"

try:
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=False)
    model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
    summarizer = pipeline(
        "summarization",
        model=model,
        tokenizer=tokenizer,
        device=device_for_summary
    )
except Exception as e:
    print(f"L·ªói t·∫£i m√¥ h√¨nh BARTpho-syllable: {e}")
    summarizer = None
    tokenizer = None

Device set to use cpu


In [None]:
def summarize_text(full_text):
    """
    T√≥m t·∫Øt vƒÉn b·∫£n s·ª≠ d·ª•ng BartPho-syllable.
    """
    if not summarizer:
        return "M√¥ h√¨nh t√≥m t·∫Øt ch∆∞a s·∫µn s√†ng."
    if not full_text.strip():
        return "Kh√¥ng c√≥ n·ªôi dung ƒë·ªÉ t√≥m t·∫Øt."

    # G·ªçi pipeline t√≥m t·∫Øt
    result = summarizer(
        full_text,
        max_length=100, 
        min_length=30,
        truncation=True,
        num_beams=4,
        no_repeat_ngram_size=3,
        repetition_penalty=2.0,
        length_penalty=1.0,
        temperature=0.0,
        do_sample=False,
        early_stopping=True
    )
    key = 'summary_text' if 'summary_text' in result[0] else 'generated_text'
    return result[0][key]

In [None]:
# 4) CH·∫†Y TH·ª¨ M√î H√åNH V·ªöI ƒêO·∫†N VƒÇN B·∫¢N M·∫™U
################################################################################
text = """
H√¥m nay ch√∫ng ta h·ªçp ƒë·ªÉ b√†n v·ªÅ k·∫ø ho·∫°ch tri·ªÉn khai d·ª± √°n qu√Ω t·ªõi. 
ƒê·∫ßu ti√™n, nh√≥m k·ªπ thu·∫≠t s·∫Ω c·∫ßn ho√†n th√†nh vi·ªác n√¢ng c·∫•p h·ªá th·ªëng v√†o cu·ªëi th√°ng n√†y. 
ƒêi·ªÅu n√†y r·∫•t quan tr·ªçng ƒë·ªÉ ƒë·∫£m b·∫£o kh√¥ng ·∫£nh h∆∞·ªüng ƒë·∫øn ti·∫øn ƒë·ªô chung. 
Ti·∫øp theo, nh√≥m kinh doanh ƒë√£ li√™n h·ªá v·ªõi m·ªôt s·ªë ƒë·ªëi t√°c v√† h·ªç c·∫ßn ph·∫£n h·ªìi s·ªõm ƒë·ªÉ k·ªãp k√Ω k·∫øt h·ª£p ƒë·ªìng. 
Ngo√†i ra, v·∫•n ƒë·ªÅ ng√¢n s√°ch c≈©ng c·∫ßn ƒë∆∞·ª£c xem x√©t v√¨ c√≥ m·ªôt s·ªë kho·∫£n chi ch∆∞a ƒë∆∞·ª£c ph√™ duy·ªát. 
Ch√∫ng ta c·∫ßn ƒë∆∞a ra quy·∫øt ƒë·ªãnh s·ªõm ƒë·ªÉ ƒë·∫£m b·∫£o ngu·ªìn l·ª±c ƒë·∫ßy ƒë·ªß. 
Cu·ªëi c√πng, t√¥i ƒë·ªÅ xu·∫•t t·ªï ch·ª©c m·ªôt cu·ªôc h·ªçp nh·ªè v√†o tu·∫ßn sau ƒë·ªÉ ki·ªÉm tra ti·∫øn ƒë·ªô t·ª´ng nh√≥m tr∆∞·ªõc khi c√≥ b√°o c√°o ch√≠nh th·ª©c. 
N·∫øu kh√¥ng c√≤n √Ω ki·∫øn g√¨ th√™m, ch√∫ng ta k·∫øt th√∫c cu·ªôc h·ªçp t·∫°i ƒë√¢y.
"""

In [None]:
summary = summarize_text(text)
print("\nüìå VƒÉn b·∫£n g·ªëc:")
print(text)
print("\nüìå VƒÉn b·∫£n t√≥m t·∫Øt:")
print(summary)


üìå VƒÉn b·∫£n g·ªëc:

H√¥m nay ch√∫ng ta h·ªçp ƒë·ªÉ b√†n v·ªÅ k·∫ø ho·∫°ch tri·ªÉn khai d·ª± √°n qu√Ω t·ªõi. 
ƒê·∫ßu ti√™n, nh√≥m k·ªπ thu·∫≠t s·∫Ω c·∫ßn ho√†n th√†nh vi·ªác n√¢ng c·∫•p h·ªá th·ªëng v√†o cu·ªëi th√°ng n√†y. 
ƒêi·ªÅu n√†y r·∫•t quan tr·ªçng ƒë·ªÉ ƒë·∫£m b·∫£o kh√¥ng ·∫£nh h∆∞·ªüng ƒë·∫øn ti·∫øn ƒë·ªô chung. 
Ti·∫øp theo, nh√≥m kinh doanh ƒë√£ li√™n h·ªá v·ªõi m·ªôt s·ªë ƒë·ªëi t√°c v√† h·ªç c·∫ßn ph·∫£n h·ªìi s·ªõm ƒë·ªÉ k·ªãp k√Ω k·∫øt h·ª£p ƒë·ªìng. 
Ngo√†i ra, v·∫•n ƒë·ªÅ ng√¢n s√°ch c≈©ng c·∫ßn ƒë∆∞·ª£c xem x√©t v√¨ c√≥ m·ªôt s·ªë kho·∫£n chi ch∆∞a ƒë∆∞·ª£c ph√™ duy·ªát. 
Ch√∫ng ta c·∫ßn ƒë∆∞a ra quy·∫øt ƒë·ªãnh s·ªõm ƒë·ªÉ ƒë·∫£m b·∫£o ngu·ªìn l·ª±c ƒë·∫ßy ƒë·ªß. 
Cu·ªëi c√πng, t√¥i ƒë·ªÅ xu·∫•t t·ªï ch·ª©c m·ªôt cu·ªôc h·ªçp nh·ªè v√†o tu·∫ßn sau ƒë·ªÉ ki·ªÉm tra ti·∫øn ƒë·ªô t·ª´ng nh√≥m tr∆∞·ªõc khi c√≥ b√°o c√°o ch√≠nh th·ª©c. 
N·∫øu kh√¥ng c√≤n √Ω ki·∫øn g√¨ th√™m, ch√∫ng ta k·∫øt th√∫c cu·ªôc h·ªçp t·∫°i ƒë√¢y.


üìå VƒÉn b·∫£n t√≥m t·∫Øt:
H√¥m nay ch√∫n

In [None]:
# if __name__ == "__main__":
#     app = build_gradio_app()
#     app.queue()
#     app.launch(server_name="0.0.0.0", server_port=7860, share=False)

ERROR:    [Errno 10048] error while attempting to bind on address ('0.0.0.0', 7860): only one usage of each socket address (protocol/network address/port) is normally permitted


OSError: Cannot find empty port in range: 7860-7860. You can specify a different port by setting the GRADIO_SERVER_PORT environment variable or passing the `server_port` parameter to `launch()`.

In [None]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

tokenizer = AutoTokenizer.from_pretrained("VietAI/vit5-large-vietnews-summarization")  
model = AutoModelForSeq2SeqLM.from_pretrained("VietAI/vit5-large-vietnews-summarization")
model.cuda()

sentence = "VietAI l√† t·ªï ch·ª©c phi l·ª£i nhu·∫≠n v·ªõi s·ª© m·ªánh ∆∞∆°m m·∫ßm t√†i nƒÉng v·ªÅ tr√≠ tu·ªá nh√¢n t·∫°o v√† x√¢y d·ª±ng m·ªôt c·ªông ƒë·ªìng c√°c chuy√™n gia trong lƒ©nh v·ª±c tr√≠ tu·ªá nh√¢n t·∫°o ƒë·∫≥ng c·∫•p qu·ªëc t·∫ø t·∫°i Vi·ªát Nam."
text =  "vietnews: " + sentence + " </s>"
encoding = tokenizer(text, return_tensors="pt")
input_ids, attention_masks = encoding["input_ids"].to("cuda"), encoding["attention_mask"].to("cuda")
outputs = model.generate(
    input_ids=input_ids, attention_mask=attention_masks,
    max_length=256,
    early_stopping=True
)
for output in outputs:
    line = tokenizer.decode(output, skip_special_tokens=True, clean_up_tokenization_spaces=True)
    print(line)


  from .autonotebook import tqdm as notebook_tqdm


AssertionError: Torch not compiled with CUDA enabled