In [None]:
!pip install -q bert-extractive-summarizer
!pip install -q spacy==2.1.3
!pip install -q transformers==2.2.2
!pip install -q neuralcoref
!pip install torch==1.6.0+cpu torchvision==0.7.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
!pip install bert-extractive-summarizer==0.5.0
!pip install torch
!pip install rouge/requirements.txt
!pip install rouge-score
from rouge_score import rouge_scorer
import statistics 
import torch
from summarizer import Summarizer
import pandas as pd

In [None]:
# load in WikiHow dataset - change the filename for processing of the 512 < x < 1024 and 1024 < x < 2048 datasets 
df_results = pd.read_csv('gdrive/MyDrive/SNLP_Coursework/WikiHow_sample_leq512_results_BERT.csv')

# obtain all the texts to be summarised 
myText = df_results['text'][:]


In [None]:
# add in columns to the dataset to record the extra variables
df_results['train_length'] = 'empty'
df_results['summary'] = 'empty'
df_results['summary_length'] = 'empty'
df_results.head()

In [None]:
# list to store the text for all passages 
allText = []

for i, text in enumerate(myText):
    allText.append(text)

In [None]:
# this function removes all newline characters and returns the word length of processed input text 
def preprocess(text):
    preprocess_text = text.strip().replace("\n","")
    return len(preprocess_text.split())

In [None]:
# building the model 
model = Summarizer()

In [None]:
# initialising the list of summaries 
allSummaries = []

for i, text in enumerate(allText):

  # preprocess input text
  train_length = preprocess(text)

  # find the max_length of the summary (1/3 of the passage length)
  m = round(0.33 * train_length)

  # output the summary and join each word via spaces
  summary = ''.join(model(text, max_length=m))

  # add the summary to the list of summaries 
  allSummaries.append(summary)

  # add the results to the dataframe, including the summary and its length in words 
  df_results['train_length'][i] = train_length
  df_results['summary'][i] = summary
  df_results['summary_length'][i] = len(summary.split())

  if i % 100 == 0: 
    # save the dataframe to the file every 100 iterations 
    df_results.to_csv('gdrive/MyDrive/SNLP_Coursework/WikiHow_sample_leq512_results_BERT.csv')

  print('Processed', i, 'out of', len(allText))
  


In [None]:
# here we calculate the rouge scores for these summaries 

df_cropped = df_results
start = 36200 # we collected 36200 summaries out of the entire <512 dataset due to GPU limitations 
n = len(df_results)

# deleting the values where the summaries were not collected 
df_cropped.drop(df_cropped.index[range(start, n)], axis=0, inplace=True)

# initialising lists for rouge scores 
rouge_1_precision = []
rouge_1_recall = []
rouge_1_fmeasure = []

rouge_2_precision = []
rouge_2_recall = []
rouge_2_fmeasure = []

rouge_L_precision = []
rouge_L_recall = []
rouge_L_fmeasure = []

rouge_Lsum_precision = []
rouge_Lsum_recall = []
rouge_Lsum_fmeasure = []

# calculating the rouge score for each summary
for i in range(len(df_cropped)):
  summary = df_cropped['summary'][i]
  headline = df_cropped['headline'][i]
  scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL', 'rougeLsum'], use_stemmer=True)
  scores = scorer.score(summary, headline)

  rouge_1_precision.append(scores['rouge1'].precision) 
  rouge_1_recall.append(scores['rouge1'].recall)
  rouge_1_fmeasure.append(scores['rouge1'].fmeasure)

  rouge_2_precision.append(scores['rouge2'].precision) 
  rouge_2_recall.append(scores['rouge2'].recall)
  rouge_2_fmeasure.append(scores['rouge2'].fmeasure)

  rouge_L_precision.append(scores['rougeL'].precision) 
  rouge_L_recall.append(scores['rougeL'].recall)
  rouge_L_fmeasure.append(scores['rougeL'].fmeasure)

  rouge_Lsum_precision.append(scores['rougeLsum'].precision) 
  rouge_Lsum_recall.append(scores['rougeLsum'].recall)
  rouge_Lsum_fmeasure.append(scores['rougeLsum'].fmeasure)

  print('Processed', i, 'out of', len(df_cropped))

# calculate the averages for each rouge metric 
data = {'Rouge1': [statistics.mean(rouge_1_precision), statistics.mean(rouge_1_recall), statistics.mean(rouge_1_fmeasure)], 
        'Rouge2': [statistics.mean(rouge_2_precision), statistics.mean(rouge_2_recall), statistics.mean(rouge_2_fmeasure)], 
        'RougeL': [statistics.mean(rouge_L_precision), statistics.mean(rouge_L_recall), statistics.mean(rouge_L_fmeasure)],
        'RougeLsum': [statistics.mean(rouge_Lsum_precision), statistics.mean(rouge_Lsum_recall), statistics.mean(rouge_Lsum_fmeasure)]} 

# save these averages to a dataframe 
averagesBERT = pd.DataFrame(data, index=['Precision', 'Recall', 'FMeasure'])



In [None]:
# save averages to a file 
df_cropped.to_csv('gdrive/MyDrive/SNLP_Coursework/croppedData_WikiHow_sample_leq512_results_BERT.csv')
averagesBERT.to_csv('gdrive/MyDrive/SNLP_Coursework/averages_WikiHow_sample_leq512_results_BERT.csv')