### **The python notebook includes all of the following steps**:

1. *Data Preprocessing*
2. *Model Implementation*
3. *Training*
4. *Evaluation*
5. *Saving the Model*

In [None]:
!python --version

Python 3.10.12


# 1. Preprocessing data

In [None]:
#Exatracting the dataset
!pip install patool



In [None]:
#source: https://mdiqbalbajmi00786.medium.com/how-to-unzip-a-zip-file-in-jupyter-google-colab-e024c5707e68
import patoolib
patoolib.extract_archive('/content/reproductive_medicine.zip') #Please change to the appropriate folder path

INFO patool: Extracting /content/reproductive_medicine.zip ...
INFO:patool:Extracting /content/reproductive_medicine.zip ...
INFO patool: running /usr/bin/7z x -aou -o./Unpack__4bfcfpp -- /content/reproductive_medicine.zip
INFO:patool:running /usr/bin/7z x -aou -o./Unpack__4bfcfpp -- /content/reproductive_medicine.zip
INFO patool: ... /content/reproductive_medicine.zip extracted to `reproductive_medicine' (local file exists).
INFO:patool:... /content/reproductive_medicine.zip extracted to `reproductive_medicine' (local file exists).


'reproductive_medicine'

In [None]:
# Creating a dataset with a new data structure (csv file that has title and text as colomns)
import os
import json
import pandas as pd

folder_path = '/content/assignementdataset'

# Function to extract features from each JSON file
def extract_text_from_json(json_file):
    with open(json_file, 'r', encoding='utf-8') as f:
        data = json.load(f)

    # Extracting the title and the body_text
    title = data.get('title', '')
    body_text = " ".join([section.get('text', '') for section in data.get('pdf_parse', {}).get('body_text', [])])
    full_text = f"Title: {title}\nBody: {body_text}"

    return title, body_text

# To store the dataset
dataset = []

# Iterating through each JSON file in the folder to append the title and text
for file_name in os.listdir(folder_path):
    if file_name.endswith('.json'):
        json_file = os.path.join(folder_path, file_name)
        title, body_text = extract_text_from_json(json_file)

        dataset.append({
            'title': title,
            'text': body_text
        })

df = pd.DataFrame(dataset)
print(df.head())

# Saving the dataset to a CSV file
df.to_csv('articles.csv', index=False)


                                               title  \
0  An interpretable machine learning model for pr...   
1  Point of care rapid test for diagnosis of syph...   
2  Routine ultrasound for fetal assessment before...   
3  Association of endometriosis and adenomyosis w...   
4  Point of care rapid test for diagnosis of syph...   

                                                text  
0  T he goal of ovarian stimulation during in vit...  
1  Syphilis is a complex, curable sexually-transm...  
2  Ultrasound examination of pregnancy before 24 ...  
3  A n impressive amount of data has recently acc...  
4  Syphilis is a complex, curable sexually-transm...  


In [None]:
# Data cleaning
import re

df = pd.read_csv('articles.csv')

def clean_text(text):
    if isinstance(text, str):
        text = re.sub(r'\d+', '', text)
        text = " ".join(text.split())
        return text

# Applying cleaning to the text column
df['text'] = df['text'].apply(clean_text)

# 2. Model implementation

In [None]:
# Source: https://sbert.net/
from sentence_transformers import SentenceTransformer, util
import torch

# Initializing the Sentence-BERT model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Function for summarizing text
def summarize_text(text, model, top_n=9):
    if not isinstance(text, str):
        return ""
    sentences = text.split('.')
    sentences = [sentence.strip() for sentence in sentences if sentence.strip()]

    # Encoding sentences using Sentence-BERT
    embeddings = model.encode(sentences, convert_to_tensor=True)

    # Computing similarity of each sentence with the entire text
    text_embedding = model.encode(text, convert_to_tensor=True)
    similarities = util.pytorch_cos_sim(embeddings, text_embedding)

    # Ranking sentences by similarity and selecting the top_n (the top 9)
    ranked_sentences = sorted(
        [(score.item(), sentence) for score, sentence in zip(similarities, sentences)],
        key=lambda x: x[0],
        reverse=True
    )

    summary = " ".join([sentence for _, sentence in ranked_sentences[:top_n]])
    return summary

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
# Adding summarized text to the DataFrame
df['summary'] = df['text'].apply(lambda x: summarize_text(x, model))

In [None]:
df.head()

Unnamed: 0,title,text,summary
0,An interpretable machine learning model for pr...,T he goal of ovarian stimulation during in vit...,Although previous studies have established tha...
1,Point of care rapid test for diagnosis of syph...,"Syphilis is a complex, curable sexually-transm...","Syphilis is a complex, curable sexually-transm..."
2,Routine ultrasound for fetal assessment before...,Ultrasound examination of pregnancy before wee...,We included all trials with pregnant women who...
3,Association of endometriosis and adenomyosis w...,A n impressive amount of data has recently acc...,With the aim of attempting to systematize the ...
4,Point of care rapid test for diagnosis of syph...,"Syphilis is a complex, curable sexually-transm...","In , approximately , adverse pregnancy outcome..."


# 3. Training

In [None]:
# Adding a simple binary classifier in the hopes of enhancing the summary results

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

# Preparing data for training and validation

# Generating dummy labels for demonstration (1 for important, 0 for less important)
df['label'] = [1 if i % 2 == 0 else 0 for i in range(len(df))]

# Splitting the data into train and test sets
X = df['text']
y = df['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Ensuring no None values in the test data
X_test, y_test = zip(*[(x, y) for x, y in zip(X_test, y_test) if pd.notna(x) and pd.notna(y)])
X_test = list(X_test)




# Converting y_train (Pandas Series) to a PyTorch tensor and cast to float
y_train_tensor = torch.tensor(y_train.values).float()
y_test_tensor = torch.tensor(y_test).float()

# Encoding summaries using Sentence-BERT for training and testing
X_train_embeddings = torch.stack([model.encode(text, convert_to_tensor=True) for text in X_train])
X_test_embeddings = torch.stack([model.encode(text, convert_to_tensor=True) for text in X_test])

# Defining a simple classifier for evaluation
class SimpleClassifier(torch.nn.Module):
    def __init__(self, input_dim):
        super(SimpleClassifier, self).__init__()
        self.fc = torch.nn.Linear(input_dim, 1)

    def forward(self, x):
        return torch.sigmoid(self.fc(x))


In [None]:
# Initializing and training the classifier

input_dim = X_train_embeddings.size(1)
classifier = SimpleClassifier(input_dim)
criterion = torch.nn.BCELoss()
optimizer = torch.optim.Adam(classifier.parameters(), lr=0.01)


# Training loop
for epoch in range(100):
    classifier.train()
    optimizer.zero_grad()
    outputs = classifier(X_train_embeddings).squeeze()
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()
    print(f"Epoch {epoch+1}, Loss: {loss.item()}")



Epoch 1, Loss: 0.693763792514801
Epoch 2, Loss: 0.6844701170921326
Epoch 3, Loss: 0.6755738258361816
Epoch 4, Loss: 0.6670149564743042
Epoch 5, Loss: 0.6587382555007935
Epoch 6, Loss: 0.6506887674331665
Epoch 7, Loss: 0.6428143382072449
Epoch 8, Loss: 0.6350765824317932
Epoch 9, Loss: 0.6274531483650208
Epoch 10, Loss: 0.6199377775192261
Epoch 11, Loss: 0.6125344634056091
Epoch 12, Loss: 0.6052528619766235
Epoch 13, Loss: 0.5981022715568542
Epoch 14, Loss: 0.5910886526107788
Epoch 15, Loss: 0.5842128992080688
Epoch 16, Loss: 0.5774708390235901
Epoch 17, Loss: 0.5708550214767456
Epoch 18, Loss: 0.5643566846847534
Epoch 19, Loss: 0.557968258857727
Epoch 20, Loss: 0.5516844987869263
Epoch 21, Loss: 0.545502781867981
Epoch 22, Loss: 0.539422869682312
Epoch 23, Loss: 0.5334452986717224
Epoch 24, Loss: 0.527570903301239
Epoch 25, Loss: 0.5217994451522827
Epoch 26, Loss: 0.5161295533180237
Epoch 27, Loss: 0.5105582475662231
Epoch 28, Loss: 0.5050821900367737
Epoch 29, Loss: 0.4996975064277649

# 4. Evaluation metrics and results

In [None]:
# Evaluation
classifier.eval()
y_pred = classifier(X_test_embeddings).squeeze().round()

accuracy = accuracy_score(y_test_tensor.numpy(), y_pred.detach().numpy())
print("Accuracy:", accuracy)
print("Classification Report:\n", classification_report(y_test_tensor.numpy(), y_pred.detach().numpy()))


Accuracy: 0.6666666666666666
Classification Report:
               precision    recall  f1-score   support

         0.0       0.71      0.83      0.77         6
         1.0       0.50      0.33      0.40         3

    accuracy                           0.67         9
   macro avg       0.61      0.58      0.58         9
weighted avg       0.64      0.67      0.65         9



In [None]:
!pip install rouge-score

Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge-score
  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24935 sha256=c18f1e5886c10d15551974eb281f9b2f7919df55c728f8aebd32e041d4b78d35
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge-score
Installing collected packages: rouge-score
Successfully installed rouge-score-0.1.2


In [None]:
from rouge_score import rouge_scorer
# Evaluating summaries using ROUGE (comparing it to the original text )
# Using ROUGE-1,ROUGE-2 and ROUGE-L
scorer = rouge_scorer.RougeScorer(["rouge1", "rouge2", "rougeL"], use_stemmer=True)
def evaluate_summary(row):
    if not isinstance(row['text'], str) or not isinstance(row['summary'], str):
        return None
    scores = scorer.score(row['text'], row['summary'])
    return {
        "rouge1": scores["rouge1"].fmeasure,
        "rouge2": scores["rouge2"].fmeasure,
        "rougeL": scores["rougeL"].fmeasure
    }

df['rouge_scores'] = df.apply(evaluate_summary, axis=1)


In [None]:
df.head()

Unnamed: 0,title,text,summary,label,rouge_scores
0,An interpretable machine learning model for pr...,T he goal of ovarian stimulation during in vit...,Although previous studies have established tha...,1,"{'rouge1': 0.12868757259001162, 'rouge2': 0.12..."
1,Point of care rapid test for diagnosis of syph...,"Syphilis is a complex, curable sexually-transm...","Syphilis is a complex, curable sexually-transm...",0,"{'rouge1': 0.13998082454458294, 'rouge2': 0.13..."
2,Routine ultrasound for fetal assessment before...,Ultrasound examination of pregnancy before wee...,We included all trials with pregnant women who...,1,"{'rouge1': 0.03539708265802269, 'rouge2': 0.03..."
3,Association of endometriosis and adenomyosis w...,A n impressive amount of data has recently acc...,With the aim of attempting to systematize the ...,0,"{'rouge1': 0.1, 'rouge2': 0.097182523478971, '..."
4,Point of care rapid test for diagnosis of syph...,"Syphilis is a complex, curable sexually-transm...","In , approximately , adverse pregnancy outcome...",1,"{'rouge1': 0.11968734733756717, 'rouge2': 0.11..."


*Since the accuracy is low, we've decided to evaluate a sample of the given summary to a reference summary using ROUGE and BERTScore.*

In [None]:
# The score to improve
df['rouge_scores'][1]

{'rouge1': 0.13998082454458294,
 'rouge2': 0.13559322033898305,
 'rougeL': 0.08756791307126878}

In [None]:
!pip install bert_score

Collecting bert_score
  Downloading bert_score-0.3.13-py3-none-any.whl.metadata (15 kB)
Downloading bert_score-0.3.13-py3-none-any.whl (61 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bert_score
Successfully installed bert_score-0.3.13


In [None]:
# Example for reference summary for the second article
ref_summary= ["""
Syphilis is a complex and treatable sexually-transmitted infection caused by the Treponema pallidum bacterium. It can lead to severe health issues for some infected individuals and is responsible for significant disability and death among many, including men, women, and infants. Syphilis is most common during periods of high sexual activity, with an estimated global prevalence of about 0. 5% in women and 0. 3% in men. This translates to around 6 million new cases worldwide each year. The prevalence of syphilis in women also tends to decrease as a countrys average income rises. Diagnosis is based on clinical history, symptoms, and specific serologic tests.

Two types of tests are used for diagnosis: treponemal and non-treponemal tests. Non-treponemal tests, such as VDRL or RPR, measure the body’s response to certain antigens but can yield false-positive or false-negative results. Limitations in resource-limited settings often hinder access to these tests. Therefore, rapid point-of-care tests that detect antibodies or antigens have become more popular in such contexts. These tests provide quick results, enabling immediate treatment without the need for extensive training and equipment. Rapid tests reduce treatment delays and the risk of untreated infections.

The World Health Organization has set ASSURED criteria to improve syphilis testing, which means the tests should be affordable, sensitive, specific, user-friendly, rapid, robust, and equipment-free. Both treponemal and non-treponemal rapid tests exist, with combined tests showing varying sensitivity and specificity based on different study conditions. Implementing these rapid tests should come with systems to ensure quality and expertise. Access to timely diagnosis and treatment can significantly lower the disease burden.

Testing at points of care is crucial, especially for symptomatic patients displaying signs of infection. Rapid tests can help confirm suspicions quickly, leading to prompt treatment and reduced transmission rates. High-quality systematic reviews of available rapid test evidence are essential for improving syphilis diagnosis. This will allow better decision-making at various healthcare levels.

This systematic review aims to assess the accuracy of rapid tests in nonpregnant women and men of reproductive age, focusing on different types of tests based on infection stages and settings. Only studies with a valid diagnostic process, without case-control designs, will be included. The review will analyze a range of studies using electronic and manual search methods, ensuring a comprehensive understanding of test accuracy. The results will facilitate enhanced public health policies focused on timely diagnostics and treatment to reduce syphilis transmission. The systematic review will compare various tests and settings, identify accuracy differences, and adapt methodologies to address unique challenges in diagnostic testing.
"""]

eval_summary=[df['summary'][1]]
eval_summary

['Syphilis is a complex, curable sexually-transmitted infection caused by the Treponema pallidum bacterium that has a variable clinical course (CDC ) We will to assess the accuracy of all POC available test for detecting syphilis infection regardless their type: treponemal or non-treponemal Series that compare syphilis POC tests with the combination of treponemal and non-treponemal tests, have shown a concordance with approximately To determine the diagnostic accuracy of rapid tests at point of care (POC) for detecting syphilis infection in men and nonpregnant women of reproductive age, as verified with the combination of both reactive non-treponemal and treponemal test as the reference standard For syphilis in women, the infection with the most robust available data, the prevalence of infection decreased as average country income increased (Newman ) Diagnosis of syphilis is based on a combination of clinical history, symptom presentation, and serologic test results In this population,

In [None]:
# Source: https://cookbook.openai.com/examples/evaluation/how_to_eval_abstractive_summarization
from bert_score import BERTScorer

# Instantiate the BERTScorer object for English language
scorer = BERTScorer(lang="en")

P, R, F1= scorer.score(eval_summary,ref_summary)
print("Precision: ", P)
print("Recall: ", R)
print("F1 score", F1)


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Precision:  tensor([0.8614])
Recall:  tensor([0.8478])
F1 score tensor([0.8545])


In [None]:
!pip install rouge

Collecting rouge
  Downloading rouge-1.0.1-py3-none-any.whl.metadata (4.1 kB)
Downloading rouge-1.0.1-py3-none-any.whl (13 kB)
Installing collected packages: rouge
Successfully installed rouge-1.0.1


In [None]:
#source: https://fabianofalcao.medium.com/metrics-for-evaluating-summarization-of-texts-performed-by-transformers-how-to-evaluate-the-b3ce68a309c3
from rouge import Rouge

# Initialize the ROUGE object
rouge = Rouge()
# Calculate ROUGE for the generated and reference summaries
scores = rouge.get_scores(eval_summary, ref_summary)
# Print the results
print(scores)

[{'rouge-1': {'r': 0.2537878787878788, 'p': 0.4962962962962963, 'f': 0.3358395945201349}, 'rouge-2': {'r': 0.08478802992518704, 'p': 0.17, 'f': 0.11314475429470036}, 'rouge-l': {'r': 0.23484848484848486, 'p': 0.45925925925925926, 'f': 0.310776937878531}}]


We can see improvements compared to the previous evaluation of ROUGE

# 5. Saving the model

In [None]:
# Save the summarized dataset and model
df.to_csv('summarized_articles.csv', index=False)
torch.save(classifier.state_dict(), 'classifier_model.pth')