In [1]:
import helpers
helpers.initialize(notebook_path=__vsc_ipynb_file__)

✅ Added notebook directory to Python path:
   /workspaces/llmops-course/modules/prompt_engineering/solutions
   You can now import modules from this directory
🔄 Initializing Course environment...
🔁 Autoreload enabled: modules will reload automatically when changed
📝 Logging configured
📊 Pandas display settings configured for better output
🔍 Looking for .env file at: /workspaces/llmops-course/.env
✅ Successfully loaded environment variables from /workspaces/llmops-course/.env
📋 Loaded variables: GEMINI_API_KEY=****sbqo
⚙️ Disabled MLflow system metrics logging
📔 Disabled MLflow notebook display (avoids VSCode bugs)

🎉 All systems go! Your Course environment is ready for learning!


In [None]:
import mlflow
from rouge_score import rouge_scorer
from textsummarizer import TextSummarizer

def rouge_metrics(reference, prediction):
    scorer = rouge_scorer.RougeScorer(['rouge1'], use_stemmer=True)
    scores = scorer.score(reference, prediction)
    
    return scores['rouge1'].fmeasure

with mlflow.start_run(run_name="Rouge Scorer"):
    path = 'assets/articles_full_length/ai_relationships.html'
    text = open(path).read()

    prompt = "Summarize this article: {text}"
    max_output_tokens = 1000

    mlflow.log_param("max_output_tokens", max_output_tokens)
    mlflow.log_param("prompt", prompt)
    mlflow.log_param("path", path)
    mlflow.log_text(text, "original_text.txt")

    summarizer = TextSummarizer(prompt = prompt, max_output_tokens=max_output_tokens)
    summary = summarizer.summarize(text)

    mlflow.log_text(summary, "summary.txt")

    mlflow.log_metric("summary_length", len(summary))

    rouge_f1 = rouge_metrics(text, summary)
    mlflow.log_metric("rouge1_f1", rouge_f1)

Using default tokenizer.


🏃 View run Rouge Scorer at: http://localhost:5050/#/experiments/0/runs/4c93a859a7284e9b91a15a239863a03f
🧪 View experiment at: http://localhost:5050/#/experiments/0


In [None]:
import mlflow
from rouge_score import rouge_scorer
from textsummarizer import TextSummarizer

def rouge_metrics(reference, prediction):
    scorer = rouge_scorer.RougeScorer(['rouge1'], use_stemmer=True)
    scores = scorer.score(reference, prediction)
    
    return scores['rouge1'].fmeasure

with mlflow.start_run(run_name="Rouge Scorer"):
    path = 'assets/articles_full_length/ai_relationships.html'
    text = open(path).read()

    prompt = "Extract the most important information from this article, covering all key points, main arguments, and conclusions. Include relevant phrases from the original text: {text}"
    max_output_tokens = 1000

    mlflow.log_param("prompt", prompt)
    mlflow.log_param("path", path)
    mlflow.log_text(text, "original_text.txt")

    summarizer = TextSummarizer(prompt = prompt)
    summary = summarizer.summarize(text)

    mlflow.log_text(summary, "summary.txt")

    mlflow.log_metric("summary_length", len(summary))

    rouge_f1 = rouge_metrics(text, summary)
    mlflow.log_metric("rouge1_f1", rouge_f1)

Using default tokenizer.


🏃 View run Rouge Scorer at: http://localhost:5050/#/experiments/0/runs/83b45e9f770346f8b876e5c3d767d547
🧪 View experiment at: http://localhost:5050/#/experiments/0


In [12]:
import mlflow
from rouge_score import rouge_scorer
from textsummarizer import TextSummarizer

def rouge_metrics(reference, prediction):
    scorer = rouge_scorer.RougeScorer(['rouge1'], use_stemmer=True)
    scores = scorer.score(reference, prediction)
    
    return scores['rouge1'].fmeasure

with mlflow.start_run(run_name="Link Summary Basic"):
    path = 'assets/articles_full_length/ai_relationships.html'
    text = open(path).read()

    prompt = """Create a brief preview of the web page content in 50-75 words. Focus on what a reader would want to know before clicking:
{text}"""
    max_output_tokens = 150

    mlflow.log_param("prompt", prompt)
    mlflow.log_param("path", path)
    mlflow.log_text(text, "original_text.txt")

    summarizer = TextSummarizer(prompt = prompt)
    summary = summarizer.summarize(text)

    mlflow.log_text(summary, "summary.txt")

    mlflow.log_metric("summary_length", len(summary))

    rouge_f1 = rouge_metrics(text, summary)
    mlflow.log_metric("rouge1_f1", rouge_f1)

Using default tokenizer.


🏃 View run Link Summary Basic at: http://localhost:5050/#/experiments/0/runs/90aa7e8263164113be058bd14e4fc053
🧪 View experiment at: http://localhost:5050/#/experiments/0


In [None]:
import mlflow
from rouge_score import rouge_scorer
from textsummarizer import TextSummarizer

def rouge_metrics(reference, prediction):
    scorer = rouge_scorer.RougeScorer(['rouge1'], use_stemmer=True)
    scores = scorer.score(reference, prediction)
    
    return scores['rouge1'].fmeasure

with mlflow.start_run(run_name="Link Summary Optimized"):
    path = 'assets/articles_full_length/ai_relationships.html'
    text = open(path).read()

    prompt = """Extract the essence of this web content in exactly 4 bullet points (total under 100 words):

    1. 🔍 **Focus:** [Main topic/purpose in 5-7 words]
    2. 📰 **Content:** [Key information in 5-7 words]
    3. 🎯 **Tone:** [Style/perspective in 5-7 words]
    4. 👥 **Value:** [User benefit in 5-7 words]

    Use exact phrases from the original where possible to increase content overlap. Keep descriptions under 7 words each. Maintain the exact formatting with emoji, bold categories, and brief descriptions:
    {text}"""

    max_output_tokens = 200

    mlflow.log_param("prompt", prompt)
    mlflow.log_param("path", path)
    mlflow.log_text(text, "original_text.txt")

    summarizer = TextSummarizer(prompt = prompt)
    summary = summarizer.summarize(text)

    mlflow.log_text(summary, "summary.txt")

    mlflow.log_metric("summary_length", len(summary))

    rouge_f1 = rouge_metrics(text, summary)
    mlflow.log_metric("rouge1_f1", rouge_f1)

Using default tokenizer.


🏃 View run Link Summary Optimized at: http://localhost:5050/#/experiments/0/runs/e462d309536948d885b47101d01d9330
🧪 View experiment at: http://localhost:5050/#/experiments/0


Improve the rouge score

In [None]:
import mlflow
from rouge_score import rouge_scorer
from textsummarizer import TextSummarizer

def rouge_metrics(reference, prediction):
    scorer = rouge_scorer.RougeScorer(['rouge1'], use_stemmer=True)
    scores = scorer.score(reference, prediction)
    
    return scores['rouge1'].fmeasure

with mlflow.start_run(run_name="Link Summary Optimized"):
    path = 'assets/articles_full_length/ai_relationships.html'
    text = open(path).read()

    prompt = """Extract the essence of this web content in exactly 4 bullet points (total under 200 words):

    1. 🔍 **Focus:** [Main topic/purpose in 15-20 words]
    2. 📰 **Content:** [Key information in 15-20 words]
    3. 🎯 **Tone:** [Style/perspective in 5-7 words]
    4. 👥 **Value:** [User benefit in 5-7 words]

    Use exact phrases from the original where possible to increase content overlap. Keep descriptions under 7 words each. Maintain the exact formatting with emoji, bold categories, and brief descriptions:
    {text}"""

    max_output_tokens = 300

    mlflow.log_param("prompt", prompt)
    mlflow.log_param("path", path)
    mlflow.log_text(text, "original_text.txt")

    summarizer = TextSummarizer(prompt = prompt)
    summary = summarizer.summarize(text)

    mlflow.log_text(summary, "summary.txt")

    mlflow.log_metric("summary_length", len(summary))

    rouge_f1 = rouge_metrics(text, summary)
    mlflow.log_metric("rouge1_f1", rouge_f1)