# Text Summarization Using LLMs

In [None]:
import validmind as vm

vm.init(
  api_host = "https://api.prod.validmind.ai/api/v1/tracking",
  api_key = "...",
  api_secret = "...",
  project = "..."
)

## Load the sample dataset

In [None]:
# lets first install the `datasets` library from huggingface
%pip install -q datasets

In [None]:
# Import the sample dataset from the library
from validmind.datasets.nlp import cnn_dailymail

print(
    f"Loaded demo dataset with: \n\n\t• Target column: '{cnn_dailymail.target_column}' "
    f"\n\t• Input text column: {cnn_dailymail.text_column} "
    f"\n\t• Prediction columns: '{cnn_dailymail.t5_prediction}', '{cnn_dailymail.gpt_35_prediction_column}'"
)


train_df, test_df = cnn_dailymail.load_data(source="offline", dataset_size="100k")

# Display the first few rows of the dataframe to check the loaded data.
cnn_dailymail.display_nice(train_df.head())


## LLM Setup

In [None]:
from validmind.models import FoundationModel, Prompt

In [None]:
import os

import dotenv

dotenv.load_dotenv()

if os.getenv("OPENAI_API_KEY") is None:
    raise Exception("OPENAI_API_KEY not found")

In [None]:
from openai import OpenAI

model = OpenAI()


def call_model(prompt):
    return (
        model.chat.completions.create(
            model="gpt-4-1106-preview",
            messages=[
                {"role": "user", "content": prompt},
            ],
        )
        .choices[0]
        .message.content
    )

In [None]:
prompt_template = """
You are an AI with expertise in summarizing financial news.
Your task is to provide a concise summary of the specific news article provided below.
Before proceeding, take a moment to understand the context and nuances of the financial terminology used in the article.

Article to Summarize:

```
{article}
```

Please respond with a concise summary of the article's main points.
Ensure that your summary is based on the content of the article and not on external information or assumptions.
""".strip()

prompt_variables = ["article"]

In [None]:
vm_train_ds = vm.init_dataset(
    dataset=train_df,
    input_id="train_dataset",
    text_column="article",
    target_column="highlights",
)

vm_test_ds = vm.init_dataset(
    dataset=test_df,
    input_id="test_dataset",
    text_column="article",
    target_column="highlights",
)

vm_model = FoundationModel(
    predict_fn=call_model,
    prompt=Prompt(
        template=prompt_template,
        variables=prompt_variables,
    ),
    input_id="llm_model"
)

# Assign pre-computed model predictions to the test dataset
vm_test_ds.assign_predictions(vm_model, prediction_column="gpt_35_prediction")
vm_train_ds.assign_predictions(vm_model, prediction_column="gpt_35_prediction")

print(vm_train_ds)
print(vm_test_ds)


## Data Validation

### Data Description

- Text Description
- Common Words 
- Punctuations
- StopWords

In [None]:
test = vm.tests.run_test(
    "validmind.data_validation.nlp.TextDescription",
    inputs = {
        "dataset": vm_test_ds,
    }
)
test.log()

In [None]:
test = vm.tests.run_test(
    "validmind.data_validation.nlp.CommonWords",
    inputs = {
        "dataset": vm_test_ds,
    }
)
test.log()

In [None]:
test = vm.tests.run_test(
    "validmind.data_validation.nlp.Punctuations",
    inputs = {
        "dataset": vm_test_ds,
    }
)
test.log()

In [None]:
test = vm.tests.run_test(
    "validmind.data_validation.nlp.StopWords",
    inputs = {
        "dataset": vm_test_ds,
    }
)
test.log()

### Embeddings 

- Cosine Similarity Distribution
- Cluster Distribution
- Descriptive Analytics
- Stability Analysis Keyword
- Stability Analysis Random Noise
- Stability Analysis Synonyms

In [None]:
from transformers import pipeline

embedding_model = pipeline(
    'feature-extraction', 
    model='bert-base-uncased', 
    tokenizer='bert-base-uncased',
    truncation=True,
)

vm_embedding_model = vm.init_model(
    model=embedding_model,
    input_id="bert_embedding_model",
)

In [None]:
vm_test_ds.assign_predictions(
    model=vm_embedding_model,
)

In [None]:
test = vm.tests.run_test(
    "validmind.model_validation.embeddings.CosineSimilarityDistribution",
    inputs = {
        "dataset": vm_test_ds,
        "model": vm_embedding_model,
    }
)
test.log()

In [None]:
test = vm.tests.run_test(
    "validmind.model_validation.embeddings.ClusterDistribution",
    inputs = {
        "dataset": vm_test_ds,
        "model": vm_embedding_model,
    }
)
test.log()

In [None]:
test = vm.tests.run_test(
    "validmind.model_validation.embeddings.DescriptiveAnalytics",
    inputs = {
        "dataset": vm_test_ds,
        "model": vm_embedding_model,
    }
)
test.log()

In [None]:
test = vm.tests.run_test(
    "validmind.model_validation.embeddings.StabilityAnalysisKeyword",
    inputs = {
        "dataset": vm_test_ds,
        "model": vm_embedding_model,
    },
    params = {
        "text_column": "article",
        "keyword_dict": {"finance": "financial"},
    }
)
test.log()

In [None]:
test = vm.tests.run_test(
    "validmind.model_validation.embeddings.StabilityAnalysisRandomNoise",
    inputs = {
        "dataset": vm_test_ds,
        "model": vm_embedding_model,
    },
    params = {
        "text_column": "article",
    }
)
test.log()

In [None]:
test = vm.tests.run_test(
    "validmind.model_validation.embeddings.StabilityAnalysisSynonyms",
    inputs = {
        "dataset": vm_test_ds,
        "model": vm_embedding_model,
    },
    params = {
        "text_column": "article",
        "probability:": 0.1,
    }
)
test.log()

## Prompt Validation

- Bias
- Clarity
- Conciseness
- Delimitation
- NegativeInstruction
- Specificity

In [None]:

test = vm.tests.run_test(
    "validmind.prompt_validation.Bias",
    inputs = {
        "dataset": vm_test_ds,
        "model": vm_model,
    }
)
test.log()

In [None]:

test = vm.tests.run_test(
    "validmind.prompt_validation.Clarity",
    inputs = {
        "dataset": vm_test_ds,
        "model": vm_model,
    }
)
test.log()

In [None]:

test = vm.tests.run_test(
    "validmind.prompt_validation.Conciseness",
    inputs = {
        "dataset": vm_test_ds,
        "model": vm_model,
    }
)
test.log()

In [None]:

test = vm.tests.run_test(
    "validmind.prompt_validation.Delimitation",
    inputs = {
        "dataset": vm_test_ds,
        "model": vm_model,
    }
)
test.log()

In [None]:

test = vm.tests.run_test(
    "validmind.prompt_validation.NegativeInstruction",
    inputs = {
        "dataset": vm_test_ds,
        "model": vm_model,
    }
)
test.log()

In [None]:

test = vm.tests.run_test(
    "validmind.prompt_validation.Specificity",
    inputs = {
        "dataset": vm_test_ds,
        "model": vm_model,
    }
)
test.log()

In [None]:
run_code = False  
if run_code:

    test_suite_results = vm.run_test_suite(
        "prompt_validation",
        inputs={
            "dataset": vm_test_ds,
            "model": vm_model,
        },
    )

## Model Validation

### Model Performance Tests

- Token Disparity
- Rouge Metrics
- Rouge Metrics Aggregate
- Bert Score
- Bert Score Aggregate
- Contextual Recall
- Bleu Score

In [None]:
test = vm.tests.run_test(
    "validmind.model_validation.TokenDisparity",
    inputs = {
        "dataset": vm_test_ds,
        "model": vm_model,
    }
)
test.log()

In [None]:
test = vm.tests.run_test(
    "validmind.model_validation.RougeMetrics",
    inputs = {
        "dataset": vm_test_ds,
        "model": vm_model,
    }
)
test.log()

In [None]:
test = vm.tests.run_test(
    "validmind.model_validation.RougeMetricsAggregate",
    inputs = {
        "dataset": vm_test_ds,
        "model": vm_model,
    }
)
test.log()

In [None]:
test = vm.tests.run_test(
    "validmind.model_validation.BertScore",
    inputs = {
        "dataset": vm_test_ds,
        "model": vm_model,
    }
)
test.log()

In [None]:
test = vm.tests.run_test(
    "validmind.model_validation.BertScoreAggregate",
    inputs = {
        "dataset": vm_test_ds,
        "model": vm_model,
    }
)
test.log()

In [None]:
test = vm.tests.run_test(
    "validmind.model_validation.ContextualRecall",
    inputs = {
        "dataset": vm_test_ds,
        "model": vm_model,
    }
)
test.log()

In [None]:
test = vm.tests.run_test(
    "validmind.model_validation.BleuScore",
    inputs = {
        "dataset": vm_test_ds,
        "model": vm_model,
    }
)
test.log()

In [None]:
test = vm.tests.run_test(
    "validmind.model_validation.MeteorScore",
    inputs = {
        "dataset": vm_test_ds,
        "model": vm_model,
    }
)
test.log()

### Bias and Toxicity Tests

- Toxicity Score
- Toxicity Histogram
- Regard Score
- Regard Histogram

In [None]:
test = vm.tests.run_test(
    "validmind.model_validation.ToxicityScore",
    inputs = {
        "dataset": vm_test_ds,
        "model": vm_model,
    }
)
test.log()

In [None]:
test = vm.tests.run_test(
    "validmind.model_validation.ToxicityHistogram",
    inputs = {
        "dataset": vm_test_ds,
        "model": vm_model,
    }
)
test.log()

In [None]:
test = vm.tests.run_test(
    "validmind.model_validation.RegardHistogram",
    inputs = {
        "dataset": vm_test_ds,
        "model": vm_model,
    }
)
test.log()

In [None]:
test = vm.tests.run_test(
    "validmind.model_validation.RegardScore",
    inputs = {
        "dataset": vm_test_ds,
        "model": vm_model,
    }
)
test.log()