In [None]:

%%capture
!pip install transformers torch pandas nltk tqdm
!git clone https://github.com/xpuria/glimpse-mds.git
!cd glimpse-mds && pip install -e .


import sys
sys.path.append('/content/glimpse-mds')

import pandas as pd
import torch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
import nltk
nltk.download('punkt')
import logging
from pathlib import Path
import json
from tqdm.notebook import tqdm
from rsasumm.rsa_reranker import RSAReranking

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


def load_data(num_samples=5):

    df = pd.read_csv('/content/glimpse-mds/data/all_reviews_2017.csv')
    logger.info(f"Total reviews: {len(df)}")


    df = df.dropna(subset=['review', 'paper_title'])


    sampled_papers = df['paper_title'].unique()[:num_samples]
    df_sample = df[df['paper_title'].isin(sampled_papers)]

    logger.info(f"Selected {len(df_sample)} reviews for {len(sampled_papers)} papers")
    return df_sample


def run_rsa_analysis(reviews: list, model, tokenizer, device='cuda'):
    """Run RSA analysis on reviews"""

    sentences = []
    for review in reviews:
        sentences.extend(nltk.sent_tokenize(review))
    sentences = list(dict.fromkeys(sentences))


    rsa_reranker = RSAReranking(
        model=model,
        tokenizer=tokenizer,
        candidates=sentences,
        source_texts=reviews,
        device=device,
        rationality=3.0,
        batch_size=8
    )


    (
        best_rsa,
        best_base,
        speaker_df,
        listener_df,
        initial_listener,
        lm_proba_df,
        initial_cons,
        cons_scores
    ) = rsa_reranker.rerank(t=2)


    sentence_scores = []
    for sent in sentences:
        score = speaker_df[sent].mean()
        sentence_scores.append((sent, float(score)))


    sentence_scores.sort(key=lambda x: x[1], reverse=True)

    return sentence_scores, {
        'speaker_df': speaker_df,
        'listener_df': listener_df,
        'consensuality_scores': cons_scores
    }


def generate_summary(selected_sentences: list, paper_title: str, model, tokenizer, device='cuda'):
    """Generate final summary from selected sentences"""
    summary_input = f"Paper Title: {paper_title}\n\nReview Summary:\n\n"
    for sent, score in selected_sentences[:5]:
        summary_input += f"• {sent}\n"

    inputs = tokenizer(
        summary_input,
        max_length=1024,
        truncation=True,
        padding=True,
        return_tensors="pt"
    ).to(device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_length=200,
            min_length=50,
            num_beams=4,
            length_penalty=2.0,
            no_repeat_ngram_size=3
        )

    return tokenizer.decode(outputs[0], skip_special_tokens=True)


def process_reviews(num_papers=5):

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large-cnn")
    tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-cnn")
    model.to(device)


    df = load_data(num_papers)

    results = []
    for title in tqdm(df['paper_title'].unique()):
        logger.info(f"\nProcessing: {title}")


        reviews = df[df['paper_title'] == title]['review'].tolist()


        sentence_scores, rsa_data = run_rsa_analysis(reviews, model, tokenizer, device)


        final_summary = generate_summary(sentence_scores, title, model, tokenizer, device)


        result = {
            'paper_title': title,
            'num_reviews': len(reviews),
            'extracted_sentences': [
                {
                    'text': sent,
                    'rsa_score': score
                }
                for sent, score in sentence_scores[:5]  # Store top 5
            ],
            'final_summary': final_summary,
            'rsa_metrics': {
                'consensuality_scores': rsa_data['consensuality_scores'].to_dict()
            }
        }
        results.append(result)

        logger.info("\nTop RSA-selected sentences:")
        for i, sent_data in enumerate(result['extracted_sentences'], 1):
            logger.info(f"\n{i}. RSA Score: {sent_data['rsa_score']:.3f}")
            logger.info(f"   {sent_data['text']}")

        logger.info("\nFinal Summary:")
        logger.info(final_summary)


    Path('results').mkdir(exist_ok=True)
    with open('results/rsa_analysis.json', 'w') as f:
        json.dump(results, f, indent=2)

    logger.info("\nResults saved to results/rsa_analysis.json")
    return results

results = process_reviews(num_papers=5)


print("\nSummary Statistics:")
for result in results:
    print(f"\nPaper: {result['paper_title']}")
    print(f"Number of reviews: {result['num_reviews']}")
    print(f"Top RSA score: {result['extracted_sentences'][0]['rsa_score']:.3f}")
    print(f"Average RSA score: {sum(s['rsa_score'] for s in result['extracted_sentences'])/len(result['extracted_sentences']):.3f}")

In [None]:
!git clone https://github.com/xpuria/glimpse-mds.git
%cd glimpse-mds

Cloning into 'glimpse-mds'...
remote: Enumerating objects: 351, done.[K
remote: Counting objects: 100% (157/157), done.[K
remote: Compressing objects: 100% (74/74), done.[K
remote: Total 351 (delta 120), reused 111 (delta 83), pack-reused 194 (from 1)[K
Receiving objects: 100% (351/351), 31.65 MiB | 15.83 MiB/s, done.
Resolving deltas: 100% (222/222), done.
/content/glimpse-mds


In [None]:
ls

[0m[01;34mdata[0m/  [01;34mexamples[0m/  [01;34mglimpse[0m/  [01;34mmds[0m/  pyproject.toml  Readme.md  requirements  [01;34mrsasumm[0m/  [01;34mscripts[0m/


In [None]:
pip install datasets

Collecting datasets
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.2.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m13.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (

In [None]:
import os
from pathlib import Path
import pandas as pd
from tqdm import tqdm
import torch

from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from glimpse.data_loading.data_processing import process_data
from glimpse.src.compute_rsa import compute_rsa
from glimpse.data_loading.generate_abstractive_candidates import generate_summaries

RAW_DATA_DIR = "data"
PROCESSED_DATA_DIR = "data/processed"
OUTPUT_DIR = "output"
MODEL_NAME = "facebook/bart-large-cnn"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
BATCH_SIZE = 4

GENERATION_CONFIGS = {
    "top_p_sampling": {
        "max_new_tokens": 200,
        "do_sample": True,
        "top_p": 0.95,
        "temperature": 1.0,
        "num_return_sequences": 8,
        "num_beams": 1,
    },
    "hybrid_top_p_beam": {
        "max_new_tokens": 200,
        "do_sample": True,
        "top_p": 0.9,
        "num_beams": 5,
        "temperature": 1.0,
        "num_return_sequences": 8,
    },
    "contrastive_search": {
        "max_new_tokens": 200,
        "do_sample": False,
        "penalty_alpha": 0.6,
        "top_k": 50,
        "temperature": 1.0,
    },
}

def main():

    raw_data_path = os.path.join(RAW_DATA_DIR, "all_reviews_2017.csv")
    processed_data_path = os.path.join(PROCESSED_DATA_DIR, "all_reviews_2017.csv")

    os.makedirs(PROCESSED_DATA_DIR, exist_ok=True)
    os.makedirs(OUTPUT_DIR, exist_ok=True)

    if not os.path.exists(processed_data_path):
        print(f"Processing raw data from {raw_data_path}...")
        process_data(raw_data_path, processed_data_path)
        print(f"Processed dataset saved at {processed_data_path}.")
    else:
        print(f"Processed dataset already exists at {processed_data_path}.")

    dataset = pd.read_csv(processed_data_path)

    print("Loading model and tokenizer...")
    model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME).to(DEVICE)
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

    for strategy, config in DECODING_CONFIGS.items():
        print(f"\nGenerating summaries with strategy: {strategy}")
        summaries = generate_summaries(
            model=model,
            tokenizer=tokenizer,
            dataset=dataset,
            decoding_config=config,
            batch_size=BATCH_SIZE,
            device=DEVICE,
        )

        summary_file = os.path.join(OUTPUT_DIR, f"summaries_{strategy}.csv")
        pd.DataFrame({"id": dataset["id"], "text": dataset["text"], "summary": summaries}).to_csv(summary_file, index=False)
        print(f"Summaries saved at {summary_file}.")

        print(f"Computing RSA scores for strategy: {strategy}...")
        rsa_output_path = os.path.join(OUTPUT_DIR, f"rsa_scores_{strategy}.pk")
        rsa_results = compute_rsa(pd.read_csv(summary_file), model, tokenizer, DEVICE)


        with open(rsa_output_path, "wb") as f:
            import pickle
            pickle.dump(rsa_results, f)
        print(f"RSA scores saved at {rsa_output_path}.")

if __name__ == "__main__":
    main()

In [None]:
!git clone https://github.com/xpuria/glimpse-mds.git

Cloning into 'glimpse-mds'...
remote: Enumerating objects: 388, done.[K
remote: Counting objects: 100% (111/111), done.[K
remote: Compressing objects: 100% (53/53), done.[K
remote: Total 388 (delta 93), reused 66 (delta 58), pack-reused 277 (from 1)[K
Receiving objects: 100% (388/388), 31.66 MiB | 15.19 MiB/s, done.
Resolving deltas: 100% (250/250), done.


In [None]:
cd glimpse-mds

/content/glimpse-mds


In [None]:
ls

[0m[01;34mdata[0m/  [01;34mexamples[0m/  [01;34mglimpse[0m/  [01;34mmds[0m/  pyproject.toml  Readme.md  requirements  [01;34mrsasumm[0m/  [01;34mscripts[0m/


In [None]:
!pip install -r requirements

Collecting numpy==1.25.2 (from -r requirements (line 2))
  Downloading numpy-1.25.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.6 kB)
Collecting gradio (from -r requirements (line 5))
  Downloading gradio-5.12.0-py3-none-any.whl.metadata (16 kB)
Collecting datasets (from -r requirements (line 7))
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio->-r requirements (line 5))
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio->-r requirements (line 5))
  Downloading fastapi-0.115.6-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio->-r requirements (line 5))
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.5.4 (from gradio->-r requirements (line 5))
  Downloading gradio_client-1.5.4-py3-none-any.whl.metadata (7.1 kB)
Collecting markupsafe~=2.0 (from gradio->-r requirements (line 5))
  Downloadin

In [None]:
!python glimpse/data_loading/data_processing.py

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sub_dataset.rename(columns={"review": "text", "metareview": "gold"}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sub_dataset.rename(columns={"review": "text", "metareview": "gold"}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sub_dataset.rename(columns={"review": "text", "metareview": "gold"}, inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.

In [None]:
import nltk
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [None]:
device = 'cuda'

In [None]:
!python glimpse/data_loading/hybrid_summerization \
  --dataset_path data/processed/all_reviews_2017.csv \
  --output_dir data/hybrid_candidates \
  --model_name facebook/bart-large-cnn \
  --batch_size 16 \
  --device cuda

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
Loading dataset...
Generating extractive summaries...
100% 1511/1511 [00:00<00:00, 2568.45it/s]
Map: 100% 1511/1511 [00:00<00:00, 22487.53 examples/s]
config.json: 100% 1.58k/1.58k [00:00<00:00, 11.8MB/s]
2025-01-21 23:38:12.483529: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-01-21 23:38:12.501209: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-01-21 23:38:12.522740: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one ha

In [None]:
import torch

In [None]:
torch.cuda.is_available()

True

In [None]:
import pandas as pd

hybrid_summary_path = "data/hybrid_candidates/hybrid_summaries_20250121_234806.csv"
hybrid_summaries = pd.read_csv(hybrid_summary_path)

print(hybrid_summaries.head())

   index                                         id  \
0      0  https://openreview.net/forum?id=r1rhWnZkg   
1      0  https://openreview.net/forum?id=r1rhWnZkg   
2      0  https://openreview.net/forum?id=r1rhWnZkg   
3      0  https://openreview.net/forum?id=r1rhWnZkg   
4      0  https://openreview.net/forum?id=r1rhWnZkg   

                                                text  \
0  Summary: The paper presents low-rank bilinear ...   
1  Summary: The paper presents low-rank bilinear ...   
2  Summary: The paper presents low-rank bilinear ...   
3  Summary: The paper presents low-rank bilinear ...   
4  Summary: The paper presents low-rank bilinear ...   

                                                gold  \
0  The program committee appreciates the authors'...   
1  The program committee appreciates the authors'...   
2  The program committee appreciates the authors'...   
3  The program committee appreciates the authors'...   
4  The program committee appreciates the authors'...

In [None]:
!python glimpse/src/compute_rsa_hybrid.py \
  --summaries data/hybrid_candidates/hybrid_summaries_20250121_234806.csv \
  --model_name facebook/bart-large-cnn \
  --device cuda \
  --batch_size 8 \
  --max_length 512 \
  --output_dir output

usage: compute_rsa_hybrid.py [-h] [--model_name MODEL_NAME] --summaries SUMMARIES
                             [--output_dir OUTPUT_DIR] [--filter FILTER] [--device DEVICE]
                             [--batch_size BATCH_SIZE] [--max_source_length MAX_SOURCE_LENGTH]
                             [--max_target_length MAX_TARGET_LENGTH] [--rationality RATIONALITY]
                             [--rsa_iterations RSA_ITERATIONS]
compute_rsa_hybrid.py: error: unrecognized arguments: --max_length 512


In [None]:
!python glimpse/src/compute_rsa_hybrid.py \
  --summaries data/hybrid_candidates/hybrid_summaries_20250121_234806.csv \
  --model_name facebook/bart-large-cnn \
  --device cuda \
  --batch_size 8 \
  --rationality 3.0 \
  --rsa_iterations 2 \
  --output_dir output

2025-01-21 23:59:21,134 - INFO - Loading model: facebook/bart-large-cnn
2025-01-21 23:59:21.883773: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-01-21 23:59:21.901895: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-01-21 23:59:21.922965: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-01-21 23:59:21.929377: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered

In [None]:
!python glimpse/src/compute_rsa_hybrid.py \
  --summaries data/hybrid_summaries_20250121_234806.csv \
  --model_name facebook/bart-large-cnn \
  --batch_size 8 \
  --rationality 3.0 \
  --output_dir output

INFO:__main__:Loading model: facebook/bart-large-cnn
config.json: 100% 1.58k/1.58k [00:00<00:00, 10.0MB/s]
2025-01-22 00:48:13.951780: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-01-22 00:48:13.969045: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-01-22 00:48:13.990426: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-01-22 00:48:13.996894: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS w

In [None]:
ls

[0m[01;34mdata[0m/      [01;34mglimpse[0m/  [01;34moutput[0m/         Readme.md     [01;34mrsasumm[0m/
[01;34mexamples[0m/  [01;34mmds[0m/      pyproject.toml  requirements  [01;34mscripts[0m/


In [None]:
import pickle

file_path = 'output/hybrid_summaries_20250121_234806-rsa_reranked-facebook-bart-large-cnn.pk'

with open(file_path, 'rb') as f:
    data = pickle.load(f)


data.keys()

dict_keys(['results', 'metadata'])

In [None]:
data

{'results': [{'id': ('https://openreview.net/forum?id=r1rhWnZkg',),
   'best_rsa': ['The paper builds a model for Visual Question Answering that outperforms the current state-of-art by 0.42%.'],
   'best_base': ['The paper builds a model for Visual Question Answering that outperforms the current state-of-art by 0.42%.'],
   'speaker_scores': array([[-1.6455873, -2.0643191, -2.2354717, -2.4694748, -1.4201528,
           -2.7908816, -1.6842641]], dtype=float32),
   'initial_scores': array([[-0.79280055, -1.2115322 , -1.382685  , -1.6166879 , -0.567366  ,
           -1.9380947 , -0.8314773 ]], dtype=float32),
   'gold': "The program committee appreciates the authors' response to concerns raised in the reviews. While there are some concerns with the paper that the authors are strongly encouraged to address for the final version of the paper, overall, the work has contributions that are worth presenting at ICLR."}],
 'metadata': {'model': 'facebook/bart-large-cnn',
  'rationality': 3.0,
  '

In [None]:
pip install bert-score

Collecting bert-score
  Downloading bert_score-0.3.13-py3-none-any.whl.metadata (15 kB)
Downloading bert_score-0.3.13-py3-none-any.whl (61 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bert-score
Successfully installed bert-score-0.3.13


In [None]:
from bert_score import score


results = data['results']

generated_summaries = [result['best_rsa'][0] for result in results]
gold_references = [result['gold'] for result in results]


P, R, F1 = score(generated_summaries, gold_references, lang="en", verbose=True)

for idx, (gen, gold, f1) in enumerate(zip(generated_summaries, gold_references, F1)):
    print(f"Example {idx+1}:")
    print(f"Generated: {gen}")
    print(f"Gold: {gold}")
    print(f"BERTScore F1: {f1:.4f}\n")

average_f1 = F1.mean().item()
print(f"Average BERTScore F1: {average_f1:.4f}")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


  0%|          | 0/1 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/1 [00:00<?, ?it/s]

done in 0.98 seconds, 1.02 sentences/sec
Example 1:
Generated: The paper builds a model for Visual Question Answering that outperforms the current state-of-art by 0.42%.
Gold: The program committee appreciates the authors' response to concerns raised in the reviews. While there are some concerns with the paper that the authors are strongly encouraged to address for the final version of the paper, overall, the work has contributions that are worth presenting at ICLR.
BERTScore F1: 0.8455

Average BERTScore F1: 0.8455
