In [1]:
!pip install -U datasets
!pip install -U accelerate
!pip install -U transformers
!pip install -U huggingface_hub

Collecting datasets
  Downloading datasets-2.21.0-py3-none-any.whl.metadata (21 kB)
Collecting filelock (from datasets)
  Downloading filelock-3.16.0-py3-none-any.whl.metadata (3.0 kB)
Collecting numpy>=1.17 (from datasets)
  Downloading numpy-2.1.1-cp312-cp312-macosx_14_0_arm64.whl.metadata (60 kB)
Collecting pyarrow>=15.0.0 (from datasets)
  Using cached pyarrow-17.0.0-cp312-cp312-macosx_11_0_arm64.whl.metadata (3.3 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting pandas (from datasets)
  Using cached pandas-2.2.2-cp312-cp312-macosx_11_0_arm64.whl.metadata (19 kB)
Collecting requests>=2.32.2 (from datasets)
  Using cached requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting tqdm>=4.66.3 (from datasets)
  Using cached tqdm-4.66.5-py3-none-any.whl.metadata (57 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp312-cp312-macosx_11_0_arm64.whl.metadata (12 kB)
Collecting multiprocess (from dat

In [24]:
from datasets import load_dataset

dataset = load_dataset("ilsilfverskiold/tech-keywords-topics-summary")
dataset

DatasetDict({
    train: Dataset({
        features: ['id', 'source', 'text', 'timestamp', 'reactions', 'engagement', 'url', 'text_length', 'keywords', 'topic', 'summary', '__index_level_0__'],
        num_rows: 7196
    })
    validation: Dataset({
        features: ['id', 'source', 'text', 'timestamp', 'reactions', 'engagement', 'url', 'text_length', 'keywords', 'topic', 'summary', '__index_level_0__'],
        num_rows: 635
    })
    test: Dataset({
        features: ['id', 'source', 'text', 'timestamp', 'reactions', 'engagement', 'url', 'text_length', 'keywords', 'topic', 'summary', '__index_level_0__'],
        num_rows: 635
    })
})

In [30]:
dataset['train']['text']

['appsmith - Platform to build admin panels, internal tools, and dashboards. Integrates with 15+ databases and any API. - Organizations build internal applications such as dashboards, database GUIs, admin panels, approval apps, customer support dashboards, and more to help their teams perform day-to-day operations. Appsmith is an open-source tool that enables the rapid development of these internal apps. Read more on our website.InstallationThere are two ways to start using Appsmith:Signup on Appsmith Cloud.Ins',
 'Elon Musk went to camp where kids were encouraged to bully others for food/water',
 'Symfony Event System, Understanding and Implementing Event-driven Architecture - Explore real-world use cases and best practices for efficient software development',
 'Introduction To Swift Programming : Swift Cheat Sheet - A comprehensive Guide on Classes, Protocols, Extensions, Enums, Initializers, and Deinitializers',
 '10 Things You Didn’t Know About Famous Programmers Like Bill Gates an

In [5]:
def show_samples(dataset, num_samples=3, seed=42):
    sample = dataset["train"].shuffle(seed=seed).select(range(num_samples))
    for example in sample:
        print(f"\n'>> Text: {example['text']}'")
        print(f"'>> Keywords: {example['keywords']}'")


show_samples(dataset)


'>> Text: Driverless car users will not be prosecuted for fatal crashes in UK'
'>> Keywords: Driverless Cars, Legal Issues, UK'

'>> Text: Google is embedding inaudible watermarks right into its AI generated music -'
'>> Keywords: Google, AI Music, Watermarks, Audio Technology'

'>> Text: What are your thoughts on Nextjs performance? Do you agree with this chart? - ( by 10up where Nextjs appears lower than WordPress on core vitals. Couldn’t post the image here due to community rules. But appreciate any other studies and thought you have on this matter.'
'>> Keywords: Next.js, Performance, 10up, WordPress'


In [4]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

model_name = 'facebook/bart-large' # go smaller if you can
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

texts = dataset['train']['text']

# Tokenize all texts and find the maximum length (max for BART is 1024 tokens)
max_token_length = max(len(tokenizer.encode(text, truncation=True)) for text in texts)
print(f"The longest text is {max_token_length} tokens long.")



The longest text is 245 tokens long.


In [5]:
def get_feature(batch):
  encodings = tokenizer(batch['text'], text_target=batch['keywords'],
                        max_length=1024, truncation=True)

  encodings = {'input_ids': encodings['input_ids'],
               'attention_mask': encodings['attention_mask'],
               'labels': encodings['labels']}

  return encodings

dataset_pt = dataset.map(get_feature, batched=True)
dataset_pt

Map: 100%|██████████| 7196/7196 [00:00<00:00, 25095.57 examples/s]
Map: 100%|██████████| 635/635 [00:00<00:00, 22322.28 examples/s]
Map: 100%|██████████| 635/635 [00:00<00:00, 22430.94 examples/s]


DatasetDict({
    train: Dataset({
        features: ['id', 'source', 'text', 'timestamp', 'reactions', 'engagement', 'url', 'text_length', 'keywords', 'topic', 'summary', '__index_level_0__', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 7196
    })
    validation: Dataset({
        features: ['id', 'source', 'text', 'timestamp', 'reactions', 'engagement', 'url', 'text_length', 'keywords', 'topic', 'summary', '__index_level_0__', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 635
    })
    test: Dataset({
        features: ['id', 'source', 'text', 'timestamp', 'reactions', 'engagement', 'url', 'text_length', 'keywords', 'topic', 'summary', '__index_level_0__', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 635
    })
})

In [6]:
columns = ['input_ids', 'labels', 'attention_mask']
dataset_pt.set_format(type='torch', columns=columns)

In [7]:
from transformers import DataCollatorForSeq2Seq

data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

In [8]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir = 'bart_tech_keywords', # rename to what you want it to be called
    num_train_epochs=3, # your choice
    warmup_steps = 500,
    per_device_train_batch_size=4, 
    per_device_eval_batch_size=4,
    weight_decay = 0.01,
    logging_steps = 10,
    evaluation_strategy = 'steps',
    eval_steps=50, 
    save_steps=1e6,
    gradient_accumulation_steps=16 
)

trainer = Trainer(model=model, args=training_args, tokenizer=tokenizer, data_collator=data_collator,
                  train_dataset = dataset_pt['train'], eval_dataset = dataset_pt['validation'])

trainer.train()

  batch["labels"] = torch.tensor(batch["labels"], dtype=torch.int64)
  3%|▎         | 10/336 [01:20<37:17,  6.86s/it] 

{'loss': 3.6513, 'grad_norm': 40.85301971435547, 'learning_rate': 1.0000000000000002e-06, 'epoch': 0.09}


  6%|▌         | 20/336 [02:41<45:16,  8.60s/it]

{'loss': 3.1626, 'grad_norm': 31.181283950805664, 'learning_rate': 2.0000000000000003e-06, 'epoch': 0.18}


  9%|▉         | 30/336 [03:56<35:56,  7.05s/it]

{'loss': 2.3209, 'grad_norm': 20.88770866394043, 'learning_rate': 3e-06, 'epoch': 0.27}


 12%|█▏        | 40/336 [05:01<31:08,  6.31s/it]

{'loss': 1.7205, 'grad_norm': 10.746589660644531, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.36}


 15%|█▍        | 50/336 [06:26<45:20,  9.51s/it]

{'loss': 1.4049, 'grad_norm': 10.60523509979248, 'learning_rate': 5e-06, 'epoch': 0.44}


                                                
 15%|█▍        | 50/336 [07:27<45:20,  9.51s/it] 

{'eval_loss': 1.1728525161743164, 'eval_runtime': 60.2591, 'eval_samples_per_second': 10.538, 'eval_steps_per_second': 2.639, 'epoch': 0.44}


 18%|█▊        | 60/336 [09:01<45:50,  9.97s/it]  

{'loss': 1.3121, 'grad_norm': 7.623536586761475, 'learning_rate': 6e-06, 'epoch': 0.53}


 21%|██        | 70/336 [10:34<40:46,  9.20s/it]

{'loss': 1.2611, 'grad_norm': 10.073939323425293, 'learning_rate': 7.000000000000001e-06, 'epoch': 0.62}


 24%|██▍       | 80/336 [12:34<1:11:13, 16.70s/it]

{'loss': 1.2297, 'grad_norm': 8.070141792297363, 'learning_rate': 8.000000000000001e-06, 'epoch': 0.71}


 27%|██▋       | 90/336 [36:51<17:12:55, 251.93s/it]

{'loss': 1.1682, 'grad_norm': 13.98412036895752, 'learning_rate': 9e-06, 'epoch': 0.8}


 30%|██▉       | 100/336 [1:26:10<6:15:45, 95.53s/it] 

{'loss': 1.1023, 'grad_norm': 8.362699508666992, 'learning_rate': 1e-05, 'epoch': 0.89}


                                                     
 30%|██▉       | 100/336 [1:27:04<6:15:45, 95.53s/it]

{'eval_loss': 1.0794211626052856, 'eval_runtime': 54.5605, 'eval_samples_per_second': 11.638, 'eval_steps_per_second': 2.914, 'epoch': 0.89}


 33%|███▎      | 110/336 [1:28:45<47:32, 12.62s/it]  

{'loss': 1.0717, 'grad_norm': 13.899110794067383, 'learning_rate': 1.1000000000000001e-05, 'epoch': 0.98}


 36%|███▌      | 120/336 [1:30:14<31:15,  8.68s/it]

{'loss': 1.0577, 'grad_norm': 6.83284854888916, 'learning_rate': 1.2e-05, 'epoch': 1.07}


 39%|███▊      | 130/336 [1:31:33<27:28,  8.00s/it]

{'loss': 1.0065, 'grad_norm': 12.647035598754883, 'learning_rate': 1.3000000000000001e-05, 'epoch': 1.16}


 42%|████▏     | 140/336 [1:32:52<25:47,  7.90s/it]

{'loss': 1.0244, 'grad_norm': 7.274231910705566, 'learning_rate': 1.4000000000000001e-05, 'epoch': 1.25}


 45%|████▍     | 150/336 [1:34:43<40:22, 13.02s/it]

{'loss': 0.9659, 'grad_norm': 11.281682014465332, 'learning_rate': 1.5e-05, 'epoch': 1.33}


                                                   
 45%|████▍     | 150/336 [1:36:01<40:22, 13.02s/it]

{'eval_loss': 0.9492010474205017, 'eval_runtime': 78.0204, 'eval_samples_per_second': 8.139, 'eval_steps_per_second': 2.038, 'epoch': 1.33}


 48%|████▊     | 160/336 [1:37:43<32:13, 10.99s/it]  

{'loss': 0.9744, 'grad_norm': 9.96319580078125, 'learning_rate': 1.6000000000000003e-05, 'epoch': 1.42}


 51%|█████     | 170/336 [1:39:05<23:31,  8.50s/it]

{'loss': 0.9483, 'grad_norm': 10.527077674865723, 'learning_rate': 1.7000000000000003e-05, 'epoch': 1.51}


 54%|█████▎    | 180/336 [1:40:30<22:40,  8.72s/it]

{'loss': 0.9721, 'grad_norm': 6.891709804534912, 'learning_rate': 1.8e-05, 'epoch': 1.6}


 57%|█████▋    | 190/336 [1:41:53<20:05,  8.26s/it]

{'loss': 0.9142, 'grad_norm': 10.293627738952637, 'learning_rate': 1.9e-05, 'epoch': 1.69}


 60%|█████▉    | 200/336 [1:43:16<18:33,  8.19s/it]

{'loss': 0.9226, 'grad_norm': 12.827656745910645, 'learning_rate': 2e-05, 'epoch': 1.78}


                                                   
 60%|█████▉    | 200/336 [1:44:13<18:33,  8.19s/it]

{'eval_loss': 0.8965096473693848, 'eval_runtime': 57.0921, 'eval_samples_per_second': 11.122, 'eval_steps_per_second': 2.785, 'epoch': 1.78}


 62%|██████▎   | 210/336 [1:45:33<18:04,  8.61s/it]

{'loss': 0.9299, 'grad_norm': 8.831805229187012, 'learning_rate': 2.1e-05, 'epoch': 1.87}


 65%|██████▌   | 220/336 [1:46:57<16:35,  8.58s/it]

{'loss': 0.9575, 'grad_norm': 14.106292724609375, 'learning_rate': 2.2000000000000003e-05, 'epoch': 1.96}


 68%|██████▊   | 230/336 [1:48:24<15:15,  8.63s/it]

{'loss': 0.8384, 'grad_norm': 6.936272144317627, 'learning_rate': 2.3000000000000003e-05, 'epoch': 2.05}


 71%|███████▏  | 240/336 [1:49:48<13:53,  8.68s/it]

{'loss': 0.8157, 'grad_norm': 10.196778297424316, 'learning_rate': 2.4e-05, 'epoch': 2.13}


 74%|███████▍  | 250/336 [1:51:12<12:10,  8.50s/it]

{'loss': 0.8488, 'grad_norm': 7.5270676612854, 'learning_rate': 2.5e-05, 'epoch': 2.22}


                                                   
 74%|███████▍  | 250/336 [1:52:13<12:10,  8.50s/it]

{'eval_loss': 0.8998113870620728, 'eval_runtime': 60.4211, 'eval_samples_per_second': 10.51, 'eval_steps_per_second': 2.632, 'epoch': 2.22}


 77%|███████▋  | 260/336 [1:53:39<11:53,  9.39s/it]

{'loss': 0.8173, 'grad_norm': 8.18798828125, 'learning_rate': 2.6000000000000002e-05, 'epoch': 2.31}


 80%|████████  | 270/336 [1:55:06<09:08,  8.31s/it]

{'loss': 0.8219, 'grad_norm': 7.067136287689209, 'learning_rate': 2.7000000000000002e-05, 'epoch': 2.4}


 83%|████████▎ | 280/336 [1:56:35<08:25,  9.03s/it]

{'loss': 0.8307, 'grad_norm': 6.97703218460083, 'learning_rate': 2.8000000000000003e-05, 'epoch': 2.49}


 86%|████████▋ | 290/336 [1:58:04<06:49,  8.90s/it]

{'loss': 0.8388, 'grad_norm': 9.943134307861328, 'learning_rate': 2.9e-05, 'epoch': 2.58}


 89%|████████▉ | 300/336 [1:59:34<05:28,  9.12s/it]

{'loss': 0.8581, 'grad_norm': 6.239572525024414, 'learning_rate': 3e-05, 'epoch': 2.67}


                                                   
 89%|████████▉ | 300/336 [2:00:37<05:28,  9.12s/it]

{'eval_loss': 0.8912317156791687, 'eval_runtime': 62.9613, 'eval_samples_per_second': 10.086, 'eval_steps_per_second': 2.525, 'epoch': 2.67}


 92%|█████████▏| 310/336 [2:02:21<05:09, 11.91s/it]

{'loss': 0.8445, 'grad_norm': 10.027575492858887, 'learning_rate': 3.1e-05, 'epoch': 2.76}


 95%|█████████▌| 320/336 [2:11:29<19:59, 74.94s/it] 

{'loss': 0.817, 'grad_norm': 4.917409420013428, 'learning_rate': 3.2000000000000005e-05, 'epoch': 2.85}


 98%|█████████▊| 330/336 [2:13:27<01:21, 13.51s/it]

{'loss': 0.8061, 'grad_norm': 10.157153129577637, 'learning_rate': 3.3e-05, 'epoch': 2.93}


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
100%|██████████| 336/336 [2:14:45<00:00, 24.07s/it]

{'train_runtime': 8085.979, 'train_samples_per_second': 2.67, 'train_steps_per_second': 0.042, 'train_loss': 1.1810229207788194, 'epoch': 2.99}





TrainOutput(global_step=336, training_loss=1.1810229207788194, metrics={'train_runtime': 8085.979, 'train_samples_per_second': 2.67, 'train_steps_per_second': 0.042, 'total_flos': 3582300094562304.0, 'train_loss': 1.1810229207788194, 'epoch': 2.9883268482490273})

In [16]:
trainer.save_model('tech-keywords-extractor_plp')


Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}


In [70]:
from bert_score import score
predictions = ["hello world", "general kenobi"]
references = ["goodnight moon", "the sun is shining"]
results = score(predictions, references, model_type="distilbert-base-uncased")
print(results)



(tensor([0.7900, 0.5584]), tensor([0.7900, 0.5889]), tensor([0.7900, 0.5732]))


In [8]:
# !huggingface-cli login --token hf_xxxxxxxxxxxx

# check output of fine tuned model
from transformers import pipeline

pipe = pipeline('summarization', model='tech-keywords-extractor_plp')

text_test = "Apple is launching iphone 16 with Apple Intelligence. It also uses OpenAI ChatGPT"

# test_text=dataset['test'][0]['text']
# keywords = dataset['test'][0]['keywords']
print("the text: ", text_test)
print("generated keywords: ", pipe(text_test))
# print("orginal keywords : ",keywords)

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.
Your max_length is set to 128, but your input_length is only 21. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=10)


the text:  Apple is launching iphone 16 with Apple Intelligence. It also uses OpenAI ChatGPT
generated keywords:  [{'summary_text': 'Apple, iphone 16, Apple Intelligence, OpenAI, ChatGPT'}]


In [29]:
# Check back on output of base untrained model

pipe = pipeline('summarization', model = 'facebook/bart-large')

text_test = "Apple is launching iphone 16 with Apple Intelligence. It also uses OpenAI ChatGPT. "

# test_text=dataset['test'][0]['text']
# keywords = dataset['test'][0]['keywords']
print("the text: ", text_test)
print("generated keywords: ", pipe(text_test))
# print("orginal keywords : ",keywords)

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.
Your max_length is set to 128, but your input_length is only 23. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=11)


the text:  Apple is launching iphone 16 with Apple Intelligence. It also uses OpenAI ChatGPT. 
generated keywords:  [{'summary_text': 'Apple is launching iphone 16 with Apple Intelligence. It also uses OpenAI ChatGPT. '}]


In [30]:
# Check back on output of base model fine tuned on CNN daily news

pipe = pipeline('summarization', model = 'facebook/bart-large-cnn')

text_test = "Apple is launching iphone 16 with Apple Intelligence. It also uses OpenAI ChatGPT. "

# test_text=dataset['test'][0]['text']
# keywords = dataset['test'][0]['keywords']
print("the text: ", text_test)
print("generated keywords: ", pipe(text_test))
# print("orginal keywords : ",keywords)

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.
Your max_length is set to 142, but your input_length is only 23. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=11)


the text:  Apple is launching iphone 16 with Apple Intelligence. It also uses OpenAI ChatGPT. 
generated keywords:  [{'summary_text': "Apple is launching iphone 16 with Apple Intelligence. It also uses OpenAI ChatGPT. Apple is launching Apple Intelligence with Apple Artificial Intelligence on the iPhone 16. Apple  also using OpenAI chatGPT on the phone. Apple's new iPhone 16 will be released on September 18."}]


In [None]:
for i in range(0, 50):
    text_test = dataset['test'][i]['text']
    keywords = dataset['test'][i]['keywords']
    print("text: ", text_test)
    print("generated keywords: ", pipe(text_test)[0]['summary_text'])
    print("original keywords: ", keywords)

In [9]:
pipe(dataset['test'][1]['text'])[0]['summary_text']

Your max_length is set to 128, but your input_length is only 12. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=6)


'Distractions, analytical thinking, Fake News'

In [10]:
dataset['test'][1]['keywords']

'Distractions, Analytical Thinking, Fake News'

In [24]:
# you would replace your own name here
# you do not need to create a repository beforehand
trainer.push_to_hub("wbcmthh42/tech-keywords-extractor_plp")

Non-default generation parameters: {'early_stopping': True, 'num_beams': 4, 'no_repeat_ngram_size': 3, 'forced_bos_token_id': 0, 'forced_eos_token_id': 2}
training_args.bin:   0%|          | 0.00/5.18k [00:00<?, ?B/s]
[A
training_args.bin: 100%|██████████| 5.18k/5.18k [00:00<00:00, 18.3kB/s]

[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A


CommitInfo(commit_url='https://huggingface.co/wbcmthh42/bart_tech_keywords/commit/aaa476e530d52c5ef3c58dd4029c1cdf966f6d6c', commit_message='wbcmthh42/tech-keywords-extractor_plp', commit_description='', oid='aaa476e530d52c5ef3c58dd4029c1cdf966f6d6c', pr_url=None, pr_revision=None, pr_num=None)

In [11]:
!pip install bert-score

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting bert-score
  Downloading bert_score-0.3.13-py3-none-any.whl.metadata (15 kB)
Collecting matplotlib (from bert-score)
  Using cached matplotlib-3.9.2-cp312-cp312-macosx_11_0_arm64.whl.metadata (11 kB)
Collecting contourpy>=1.0.1 (from matplotlib->bert-score)
  Downloading contourpy-1.3.0-cp312-cp312-macosx_11_0_arm64.whl.metadata (5.4 kB)
Collecting cycler>=0.10 (from matplotlib->bert-score)
  Using cached cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)
Collecting fonttools>=4.22.0 (from matplotlib->bert-score)
  Using cached fonttools-4.53.1-cp312-cp312-macosx_11_0_arm64.whl.metadata (162 kB)
Collecting kiwisolver>=1.3.1 (from matplotlib->bert-score)
  Downloading kiwisolver-1.4.7-cp312-cp312-macosx_11_0_arm64.whl.metadata (6.3 kB)
Collecting pillow>=8 (from matplotlib->bert-score)
  Using cached pillow-10.4.0-cp312-cp312-macosx_11_0_arm64.whl.metadata (9.2 kB)
Collecting pyparsing>=2.3.1 (from matplotlib->bert-score)
  Downloading pyparsing-3.1.4-py3-none-any.whl.metadata 

In [12]:
!bert-score-show --lang en -r "There are two bananas on the table." -c "On the table are two apples." -f out.png

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Matplotlib is building the font cache; this may take a moment.
tokenizer_config.json: 100%|█████████████████| 25.0/25.0 [00:00<00:00, 67.6kB/s]
config.json: 100%|█████████████████████████████| 482/482 [00:00<00:00, 1.85MB/s]
vocab.json: 100%|████████████████████████████| 899k/899k [00:00<00:00, 5.91MB/s]
merges.txt: 100%|████████████████████████████| 456k/456k [00:00<00:00, 2.95MB/s]
tokenizer.json: 100%|██████████████████████| 1.36M/1.36M [00:00<00:00, 8.56MB/s]
model.safetensors: 100%|███████████████████| 1.42G/1.42G [00:52<00:00, 27.1MB/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Saved figure to file:  out.png
Figure(800x700)


In [20]:
from bert_score import score

P, R, F1 = score(pipe(dataset['test'][0]['text'])[0]['summary_text'], dataset['test'][1]['keywords'], lang="en", verbose=True)

Your max_length is set to 128, but your input_length is only 12. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=6)
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00,  9.27it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<00:00, 499.68it/s]

done in 0.11 seconds, 394.60 sentences/sec





In [48]:
P, R, F1 = score(pipe(dataset['test'][1]['text'])[0]['summary_text'], dataset['test'][1]['keywords'], lang="en", verbose=True)

Your max_length is set to 128, but your input_length is only 12. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=6)
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00,  8.64it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<00:00, 534.58it/s]

done in 0.12 seconds, 368.70 sentences/sec





44

In [41]:
dataset['test'][0]['text']

'Unleash the Edge: Women’s Faux Leather Zip-Up Motorcycle Jacket\u200a—\u200aVintage Grunge Aesthetic Revived - Ride the Rebel Wave: Women’s Faux Leather Zip-Up Motorcycle Jacket\u200a—\u200aA Fusion of Timeless Edge and Vintage Grunge Aesthetic for…'

In [60]:
pipe(dataset['test'][1]['text'])[0]['summary_text']

Your max_length is set to 128, but your input_length is only 12. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=6)


'Distractions, analytical thinking, Fake News'

In [61]:
dataset['test'][1]['keywords']

'Distractions, Analytical Thinking, Fake News'

In [64]:
summary = pipe(dataset['test'][3]['text'])[0]['summary_text']
ground_truth = dataset['test'][3]['keywords']
P, R, F1 = score([summary], [ground_truth], lang="en", verbose=True)

Your max_length is set to 128, but your input_length is only 36. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=18)
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


calculating scores...
computing bert embedding.


100%|██████████| 1/1 [00:00<00:00,  9.24it/s]


computing greedy matching.


100%|██████████| 1/1 [00:00<00:00, 680.34it/s]

done in 0.11 seconds, 8.96 sentences/sec





In [65]:
print(f"System level F1 score: {F1.mean():.3f}")
print(f"System level P score: {P.mean():.3f}")
print(f"System level R score: {R.mean():.3f}")

System level F1 score: 0.963
System level P score: 0.952
System level R score: 0.974


In [79]:
import pandas as pd

results = pd.read_csv('/Users/tayjohnny/Documents/My_MTECH/PLP/bart_tech_keywords_results.csv', header=0)
average_P, average_R, average_F1 = results[['P', 'R', 'F1']].mean()

print(f"System level average P score: {average_P:.3f}")
print(f"System level average R score: {average_R:.3f}")
print(f"System level average F1 score: {average_F1:.3f}")

System level average F1 score: 0.942
System level average P score: 0.939
System level average R score: 0.945
