In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' # Suppress TF INFO/WARNING

# Try to force CPU before TF initializes fully
os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # Should hide GPUs from TF

import tensorflow as tf

# After import, double-check and explicitly set visible devices to CPU only
try:
    tf.config.set_visible_devices([], 'GPU') # Tell TF to use no GPUs
    physical_devices = tf.config.list_physical_devices('GPU')
    if not physical_devices:
        print("SUCCESS: TensorFlow is configured to use CPU only (no GPUs visible).")
    else:
        print(f"WARNING: TensorFlow still sees GPUs: {physical_devices}. CPU forcing might not be fully effective.")
except RuntimeError as e:
    # This can happen if GPUs were already initialized or if there's a context issue
    print(f"RuntimeError during set_visible_devices: {e}. Will proceed assuming CPU if no GPU ops fail.")
except Exception as e_config:
    print(f"An unexpected error occurred during tf.config.set_visible_devices: {e_config}")


# Now, the rest of your imports
import transformers
from transformers import AutoTokenizer, TFAutoModelForSeq2SeqLM, pipeline
import yake

# Version checks (optional here, but good for confirmation)
if hasattr(tf, 'keras') and hasattr(tf.keras, '__version__'):
    print(f"tf.keras.__version__: {tf.keras.__version__}")
if hasattr(tf, 'version') and hasattr(tf.version, 'VERSION'):
    print(f"tf.version.VERSION: {tf.version.VERSION}")

print(f"Transformers version: {transformers.__version__}")

tf.version.VERSION: 2.13.0
Transformers version: 4.52.4


In [2]:
# --- Text Summarization (TensorFlow) ---

summarizer_model_name = "t5-small" # T5 models work well with TensorFlow too

# Load tokenizer (same as before) and TF model
try:
    summarizer_tokenizer = AutoTokenizer.from_pretrained(summarizer_model_name)
    # Use TFAutoModelForSeq2SeqLM for TensorFlow-specific Keras model
    summarizer_model_tf = TFAutoModelForSeq2SeqLM.from_pretrained(summarizer_model_name)
    print(f"TensorFlow Summarizer model '{summarizer_model_name}' loaded successfully.")
except Exception as e:
    print(f"Error loading TensorFlow summarizer model: {e}")
    summarizer_tokenizer = None
    summarizer_model_tf = None

# Using the Hugging Face pipeline with TensorFlow
try:
    # Specify framework="tf" for TensorFlow pipeline
    summarization_pipeline_tf = pipeline("summarization", model=summarizer_model_name, tokenizer=summarizer_model_name, framework="tf")
    print(f"TensorFlow Summarization pipeline for '{summarizer_model_name}' created successfully.")
except Exception as e:
    print(f"Error creating TensorFlow summarization pipeline: {e}")
    summarization_pipeline_tf = None

All PyTorch model weights were used when initializing TFT5ForConditionalGeneration.

All the weights of TFT5ForConditionalGeneration were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFT5ForConditionalGeneration for predictions without further training.


TensorFlow Summarizer model 't5-small' loaded successfully.


All PyTorch model weights were used when initializing TFT5ForConditionalGeneration.

All the weights of TFT5ForConditionalGeneration were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFT5ForConditionalGeneration for predictions without further training.
Device set to use 0


TensorFlow Summarization pipeline for 't5-small' created successfully.


In [3]:
def summarize_text_direct_tf(text, model, tokenizer, max_length=150, min_length=30, num_beams=4):
    """
    Summarizes text using a pre-trained TensorFlow Seq2Seq model and tokenizer directly.
    """
    if model is None or tokenizer is None:
        return "Summarizer TF model not loaded."

    if "t5" in model.name.lower(): # Accessing model name might be slightly different for TF models
         preprocess_text = "summarize: " + text.strip().replace("\n", " ")
    else:
         preprocess_text = text.strip().replace("\n", " ")

    # Tokenize for TensorFlow: ensure return_tensors="tf"
    inputs = tokenizer(preprocess_text, return_tensors="tf", max_length=1024, truncation=True)

    # Generate summary using TensorFlow model's generate method
    summary_ids = model.generate(
        inputs.input_ids, # Pass input_ids tensor
        max_length=max_length,
        min_length=min_length,
        num_beams=num_beams,
        early_stopping=True
    )

    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

# Example Usage (Direct TF Method)
sample_text_long = """
Artificial intelligence (AI) is rapidly transforming various industries, from healthcare to finance and entertainment.
Machine learning, a subset of AI, enables systems to learn from data and make predictions or decisions without being explicitly programmed.
Deep learning, a further specialization, utilizes neural networks with many layers to analyze complex patterns in large datasets.
Natural Language Processing (NLP) allows computers to understand, interpret, and generate human language, powering applications like chatbots and machine translation.
Computer vision, another AI field, focuses on enabling machines to interpret and understand visual information from the world, such as images and videos.
The ethical implications of AI, including bias in algorithms and job displacement, are critical areas of ongoing discussion and research.
As AI technology continues to advance, its integration into daily life is expected to grow, offering both opportunities and challenges.
Developing robust and responsible AI systems is paramount for harnessing its full potential while mitigating risks.
Many companies are investing heavily in AI research and development to gain a competitive edge.
"""

if summarizer_model_tf and summarizer_tokenizer:
    summary1_tf = summarize_text_direct_tf(sample_text_long, summarizer_model_tf, summarizer_tokenizer)
    print("--- Summary (Direct TF Method) ---")
    print(summary1_tf)
else:
    print("Skipping direct TF summarization as model/tokenizer failed to load.")

--- Summary (Direct TF Method) ---
machine learning enables systems to learn from data and make predictions or decisions without being explicitly programmed. deep learning, a subset of AI, enables systems to learn from data and make predictions or decisions without being explicitly programmed.


In [4]:
def summarize_text_pipeline_tf(text, pipeline_instance, max_length=150, min_length=30):
    """
    Summarizes text using the Hugging Face TensorFlow summarization pipeline.
    """
    if pipeline_instance is None:
        return "Summarization TF pipeline not loaded."
    
    try:
        result = pipeline_instance(text, max_length=max_length, min_length=min_length, do_sample=False)
        return result[0]['summary_text']
    except Exception as e:
        return f"Error during TF pipeline summarization: {e}"

# Example Usage (TF Pipeline Method)
if summarization_pipeline_tf:
    summary2_tf = summarize_text_pipeline_tf(sample_text_long, summarization_pipeline_tf)
    print("\n--- Summary (TF Pipeline Method) ---")
    print(summary2_tf)
else:
    print("Skipping TF pipeline summarization as pipeline failed to create.")


--- Summary (TF Pipeline Method) ---
machine learning enables systems to learn from data and make predictions or decisions without being explicitly programmed . natural language processing (NLP) allows computers to understand, interpret, and generate human language, powering applications like chatbots and machine translation .


In [5]:
# --- Keyword/Tag Extraction (using YAKE!) ---

# YAKE! parameters
language = "en"
max_ngram_size = 3
deduplication_threshold = 0.9
num_of_keywords = 10

kw_extractor = yake.KeywordExtractor(lan=language, 
                                     n=max_ngram_size, 
                                     dedupLim=deduplication_threshold, 
                                     top=num_of_keywords, 
                                     features=None)

def extract_tags_yake(text, extractor):
    if extractor is None:
        return ["YAKE! extractor not initialized."]
    try:
        keywords_with_scores = extractor.extract_keywords(text)
        keywords = [kw[0] for kw in keywords_with_scores]
        return keywords
    except Exception as e:
        return [f"Error during YAKE! keyword extraction: {e}"]

# Example Usage (YAKE!)
tags_tf = extract_tags_yake(sample_text_long, kw_extractor) # Renamed variable for clarity
print("\n--- Extracted Tags (YAKE!) ---")
print(tags_tf)


--- Extracted Tags (YAKE!) ---
['Artificial intelligence', 'transforming various industries', 'finance and entertainment', 'rapidly transforming', 'healthcare to finance', 'Natural Language Processing', 'Language Processing', 'Machine learning', 'Deep learning', 'Artificial']


In [6]:
def summarize_and_tag_tf(text, summarizer_tf_pipe, tag_extractor):
    summary = "Could not summarize."
    tags_list = ["Could not extract tags."]

    if summarizer_tf_pipe:
        summary = summarize_text_pipeline_tf(text, summarizer_tf_pipe) # Use TF pipeline function
    
    if tag_extractor:
        tags_list = extract_tags_yake(text, tag_extractor)
        
    return {
        "original_text_length": len(text.split()),
        "summary": summary,
        "summary_length": len(summary.split()),
        "tags": tags_list
    }

# Example of combined function
if summarization_pipeline_tf and kw_extractor:
    analysis_result_tf = summarize_and_tag_tf(sample_text_long, summarization_pipeline_tf, kw_extractor)
    print("\n--- Combined Analysis (TF) ---")
    print(f"Original Text Length (words): {analysis_result_tf['original_text_length']}")
    print(f"Summary: {analysis_result_tf['summary']}")
    print(f"Summary Length (words): {analysis_result_tf['summary_length']}")
    print(f"Tags: {analysis_result_tf['tags']}")
else:
    print("Skipping combined TF analysis due to issues with pipeline or extractor.")


--- Combined Analysis (TF) ---
Original Text Length (words): 167
Summary: machine learning enables systems to learn from data and make predictions or decisions without being explicitly programmed . natural language processing (NLP) allows computers to understand, interpret, and generate human language, powering applications like chatbots and machine translation .
Summary Length (words): 39
Tags: ['Artificial intelligence', 'transforming various industries', 'finance and entertainment', 'rapidly transforming', 'healthcare to finance', 'Natural Language Processing', 'Language Processing', 'Machine learning', 'Deep learning', 'Artificial']
