In [4]:
import warnings
warnings.filterwarnings('ignore')

In [5]:
import torch
torch.cuda.empty_cache()

In [6]:
from sklearn.model_selection import KFold
from datasets import load_dataset, DatasetDict, Dataset, concatenate_datasets
import datasets
import pandas as pd
import os
import logging
import nltk
import numpy as np
from transformers import Seq2SeqTrainingArguments, Seq2SeqTrainer, DataCollatorForSeq2Seq
import evaluate
from transformers import BartForConditionalGeneration, BartTokenizer
from random import sample


train_df = datasets.load_from_disk("/home/y.khan/cai6307-y.khan/Query-Focused-Tabular-Summarization/data/decomposed/decomposed_train")
test_df = datasets.load_from_disk("/home/y.khan/cai6307-y.khan/Query-Focused-Tabular-Summarization/data/decomposed/decomposed_test")
validate_df = datasets.load_from_disk("/home/y.khan/cai6307-y.khan/Query-Focused-Tabular-Summarization/data/decomposed/decomposed_validate")

In [4]:
model_path = "facebook/bart-large"
tokenizer = AutoTokenizer.from_pretrained(model_path)

model = AutoModelForSeq2SeqLM.from_pretrained(model_path)

In [28]:
from typing import List, Dict

def tokenization_with_answer(examples):
    inputs = []
    targets = []
    
    task_prefix = "Given a query and a table, generate a summary that answers the query based on the information in the table: "

    for i, (query, table, answer, coordinates, summary) in enumerate(zip(examples['query'], examples['table'], examples['answers'], examples['coordinates'], examples['summary'])):
        flattened_table = flatten_table(table, i)
        input_text = f"{task_prefix} Table {flattened_table}. Query: {query}"

        inputs.append(input_text)
        targets.append(summary)
        
    model_inputs = tokenizer(inputs, max_length=1024, truncation=True,padding='max_length')
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(examples["summary"], max_length=512, truncation=True)
    
    model_inputs["labels"] = labels["input_ids"] 

    res = tokenizer(inputs, text_target=targets, truncation=True, padding=True)
    return model_inputs

def flatten_table(table: Dict, row_index: int) -> str:
    header = table.get('header', [])
    rows = table.get('rows', [])
    title = table.get('title', [])

    flattened_rows = []
    for i, row in enumerate(rows):
        row_text = f"Row {i}, " + ",".join([f"{col}:{val}" for col, val in zip(header, row)])
        flattened_rows.append("## "+row_text)

    flattened_table = f"Title: {' '.join(map(str, title))}" + " " + " ".join(flattened_rows)
    return flattened_table

tokenized_dataset_train = train_df.map(tokenization_with_answer, batched=True)
tokenized_dataset_test = test_df.map(tokenization_with_answer, batched=True)

processed_data_train = tokenized_dataset_train.remove_columns(['table','summary', 'row_ids', 'example_id', 'query', 'answers', 'coordinates'])
processed_data_test = tokenized_dataset_test.remove_columns(['table','summary', 'row_ids', 'example_id', 'query', 'answers', 'coordinates'])

Map: 100%|██████████| 2000/2000 [00:09<00:00, 213.09 examples/s]
Map: 100%|██████████| 500/500 [00:02<00:00, 240.32 examples/s]


In [29]:
def k_fold_split(dataset, num_folds=5):
    fold_size = len(dataset) // num_folds
    folds = []
    for i in range(num_folds):
        start = i * fold_size
        end = start + fold_size if i < num_folds - 1 else len(dataset)
        folds.append(dataset.select(range(start, end)))
    return folds

In [30]:
def postprocess_text(preds, labels):
        preds = [pred.strip() for pred in preds]
        labels = [label.strip() for label in labels]

        # rougeLSum expects newline after each sentence
        preds = ["\n".join(nltk.sent_tokenize(pred)) for pred in preds]
        labels = ["\n".join(nltk.sent_tokenize(label)) for label in labels]

        return preds, labels

def metric_fn(eval_predictions):
    predictions, labels = eval_predictions
    decoded_predictions = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    for label in labels:
        label[label < 0] = tokenizer.pad_token_id  # Replace masked label tokens
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
    
    decoded_predictions, decoded_labels = postprocess_text(decoded_predictions, decoded_labels)

    rouge = evaluate.load('rouge')

    # Compute ROUGE scores
    rouge_results = rouge.compute(predictions=decoded_predictions, references=decoded_labels)

    return rouge_results

data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model= model)

train_args = Seq2SeqTrainingArguments(
    output_dir="./train_weights_bart_decomposed",
    learning_rate=3e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=4,
    num_train_epochs=20,
    evaluation_strategy="epoch",
    save_strategy = "epoch",
    weight_decay=0.01,
    save_total_limit=5,
    warmup_ratio=0.03,
    load_best_model_at_end=True,
    predict_with_generate=True,
    overwrite_output_dir= True,
    gradient_accumulation_steps = 2
)


trainer = Seq2SeqTrainer(
    model,
    train_args,
    train_dataset=processed_data_train,
    eval_dataset=processed_data_test,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=metric_fn
)

In [10]:
folds = k_fold_split(train_df, num_folds=10)

for i in range(len(folds)):
    val_fold = folds[i]
    train_folds = [folds[j] for j in range(len(folds)) if j != i]
    train_dataset = concatenate_datasets(train_folds)

    tokenized_train = train_dataset.map(tokenization_with_answer, batched=True)
    tokenized_val = val_fold.map(tokenization_with_answer, batched=True)

    # Remove unnecessary columns
    processed_train = tokenized_train.remove_columns(['table', 'summary', 'row_ids', 'example_id', 'query', 'answers', 'coordinates'])
    processed_val = tokenized_val.remove_columns(['table', 'summary', 'row_ids', 'example_id', 'query', 'answers', 'coordinates'])

    # Update your trainer's train_dataset and eval_dataset
    trainer.train_dataset = processed_train
    trainer.eval_dataset = processed_val

    # Train your model
    trainer.train()
    trainer.evaluate()

Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,No log,1.74164,0.310922,0.190126,0.274251,0.284224
2,No log,1.746532,0.3117,0.186292,0.2722,0.282696
3,No log,1.781855,0.308137,0.18452,0.27046,0.282074
4,No log,1.792227,0.310651,0.180755,0.268887,0.28113
5,No log,1.859723,0.304483,0.180463,0.263685,0.27692
6,No log,1.847143,0.306652,0.182138,0.268237,0.279202
7,0.953000,1.914978,0.309419,0.179245,0.267391,0.280251
8,0.953000,1.905485,0.309323,0.185684,0.272819,0.283899
9,0.953000,1.961449,0.309507,0.182913,0.271276,0.283882
10,0.953000,2.014781,0.312802,0.183745,0.271571,0.283286


Downloading builder script: 100%|██████████| 6.27k/6.27k [00:00<00:00, 21.8MB/s]


Map: 100%|██████████| 1800/1800 [00:13<00:00, 134.93 examples/s]
Map: 100%|██████████| 200/200 [00:00<00:00, 411.28 examples/s]


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,No log,0.364045,0.378481,0.317903,0.364101,0.371057
2,No log,0.362913,0.377242,0.305969,0.360126,0.367271
3,No log,0.388991,0.386919,0.31768,0.369138,0.377016
4,No log,0.406257,0.381557,0.308955,0.364593,0.371286
5,No log,0.404682,0.377502,0.302452,0.358389,0.366657
6,No log,0.412435,0.364652,0.289373,0.346564,0.355266
7,0.581300,0.405055,0.371139,0.291156,0.348776,0.357577
8,0.581300,0.413587,0.369951,0.292602,0.35138,0.35885
9,0.581300,0.425972,0.364491,0.288418,0.347132,0.353575
10,0.581300,0.436486,0.361531,0.281145,0.340903,0.348076


Map: 100%|██████████| 1800/1800 [00:04<00:00, 427.80 examples/s]
Map: 100%|██████████| 200/200 [00:00<00:00, 417.10 examples/s]


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,No log,0.1565,0.419272,0.38606,0.416496,0.418385
2,No log,0.136608,0.413219,0.374962,0.407938,0.410821
3,No log,0.129492,0.402444,0.365389,0.394082,0.39672
4,No log,0.141372,0.407959,0.366141,0.40056,0.404135
5,No log,0.16156,0.412432,0.371556,0.406299,0.409584
6,No log,0.135184,0.404734,0.368046,0.398335,0.400729
7,0.299500,0.152729,0.39722,0.357946,0.390428,0.392657
8,0.299500,0.143979,0.400395,0.360895,0.392811,0.395079
9,0.299500,0.145063,0.40478,0.365985,0.397151,0.399483
10,0.299500,0.159074,0.409068,0.364837,0.401305,0.404562


Map: 100%|██████████| 1800/1800 [00:04<00:00, 422.56 examples/s]
Map: 100%|██████████| 200/200 [00:00<00:00, 412.09 examples/s]


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,No log,0.06197,0.412412,0.392762,0.411311,0.412276
2,No log,0.076129,0.403392,0.379299,0.400439,0.401373
3,No log,0.064934,0.408537,0.38547,0.406839,0.407348
4,No log,0.066857,0.404594,0.382167,0.403075,0.403389
5,No log,0.06565,0.404224,0.379157,0.401967,0.40276
6,No log,0.066976,0.408126,0.385139,0.406402,0.407079
7,0.164700,0.06545,0.404206,0.380197,0.402028,0.402371
8,0.164700,0.063854,0.405026,0.382238,0.403485,0.404045
9,0.164700,0.081352,0.398497,0.371701,0.395192,0.396353
10,0.164700,0.06617,0.405291,0.379943,0.403946,0.404182


Map: 100%|██████████| 1800/1800 [00:08<00:00, 212.22 examples/s]
Map: 100%|██████████| 200/200 [00:00<00:00, 376.07 examples/s]


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,No log,0.142327,0.416846,0.394999,0.41309,0.414817
2,No log,0.057901,0.41956,0.398587,0.41592,0.41707
3,No log,0.073529,0.414845,0.393639,0.411853,0.413005
4,No log,0.056515,0.417103,0.395399,0.413033,0.414709
5,No log,0.046258,0.418374,0.395948,0.41382,0.415693
6,No log,0.045891,0.417661,0.393274,0.412191,0.41444
7,0.107100,0.055041,0.416188,0.392838,0.411618,0.413287
8,0.107100,0.045833,0.418086,0.394226,0.412785,0.414664
9,0.107100,0.045942,0.418353,0.394515,0.413284,0.415322
10,0.107100,0.072462,0.417684,0.394105,0.412779,0.414353


Map: 100%|██████████| 1800/1800 [00:05<00:00, 324.95 examples/s]
Map: 100%|██████████| 200/200 [00:00<00:00, 431.59 examples/s]


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,No log,0.038347,0.421725,0.403774,0.421493,0.421319
2,No log,0.043123,0.422697,0.404328,0.422105,0.42206
3,No log,0.042001,0.422435,0.403945,0.421828,0.421818
4,No log,0.039421,0.420669,0.400036,0.41859,0.41858
5,No log,0.059225,0.422748,0.404081,0.421586,0.421749
6,No log,0.035079,0.422347,0.404089,0.4217,0.421723
7,0.070500,0.048578,0.420912,0.402408,0.420228,0.420133
8,0.070500,0.053893,0.419583,0.401293,0.41922,0.418818
9,0.070500,0.036044,0.421895,0.403194,0.421129,0.421212
10,0.070500,0.037174,0.421917,0.403557,0.421226,0.421442


Map: 100%|██████████| 1800/1800 [00:10<00:00, 170.84 examples/s]
Map: 100%|██████████| 200/200 [00:00<00:00, 408.49 examples/s]


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,No log,0.02959,0.433875,0.414325,0.432634,0.433279
2,No log,0.086528,0.433805,0.413301,0.432305,0.433125
3,No log,0.032434,0.434199,0.415285,0.433461,0.43385
4,No log,0.071422,0.433651,0.413588,0.432202,0.43306
5,No log,0.036016,0.434232,0.414824,0.433066,0.433587
6,No log,0.03371,0.433443,0.412903,0.431857,0.432724
7,0.050900,0.058399,0.434282,0.414148,0.432986,0.433688
8,0.050900,0.033302,0.433633,0.413721,0.432533,0.433193
9,0.050900,0.056583,0.431724,0.412366,0.43041,0.431015
10,0.050900,0.041597,0.43255,0.413259,0.431068,0.431909


Map: 100%|██████████| 1800/1800 [00:05<00:00, 328.24 examples/s]
Map: 100%|██████████| 200/200 [00:00<00:00, 416.67 examples/s]


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,No log,0.046131,0.42979,0.411103,0.428886,0.429829
2,No log,0.038581,0.431093,0.411854,0.4304,0.431146
3,No log,0.03697,0.430814,0.41157,0.430334,0.431029
4,No log,0.037363,0.430978,0.412036,0.43034,0.43105
5,No log,0.034091,0.430564,0.411423,0.429776,0.430746
6,No log,0.04005,0.430499,0.411234,0.429707,0.430536
7,0.040300,0.033764,0.430148,0.410287,0.429183,0.430078
8,0.040300,0.033171,0.4302,0.410615,0.429509,0.430359
9,0.040300,0.037319,0.430509,0.411107,0.429838,0.43064
10,0.040300,0.04015,0.430615,0.411002,0.429721,0.430649


Map: 100%|██████████| 1800/1800 [00:04<00:00, 409.70 examples/s]
Map: 100%|██████████| 200/200 [00:00<00:00, 421.83 examples/s]


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,No log,0.028518,0.422896,0.396384,0.422651,0.422708
2,No log,0.059398,0.422887,0.395844,0.42261,0.422595
3,No log,0.02757,0.423011,0.396418,0.42269,0.422768
4,No log,0.03619,0.422772,0.396218,0.422488,0.422492
5,No log,0.033121,0.423097,0.396397,0.422695,0.42274
6,No log,0.029683,0.423715,0.396686,0.423392,0.423469
7,0.032800,0.030137,0.422892,0.39634,0.422562,0.422652
8,0.032800,0.045365,0.422892,0.39634,0.422562,0.422652
9,0.032800,0.030146,0.422772,0.396047,0.422488,0.422492
10,0.032800,0.030626,0.422892,0.39634,0.422562,0.422652


Map: 100%|██████████| 1800/1800 [00:05<00:00, 305.91 examples/s]
Map: 100%|██████████| 200/200 [00:00<00:00, 413.77 examples/s]


Epoch,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
1,No log,0.061752,0.418781,0.400395,0.418817,0.418648
2,No log,0.031002,0.419056,0.400699,0.41894,0.418918
3,No log,0.054678,0.419509,0.400324,0.419117,0.418978
4,No log,0.030299,0.419509,0.400324,0.419117,0.418978
5,No log,0.045297,0.418246,0.398419,0.416998,0.417552
6,No log,0.055381,0.418933,0.40027,0.418866,0.418798
7,0.028000,0.031221,0.418005,0.398201,0.416582,0.417426
8,0.028000,0.066742,0.418498,0.400009,0.417808,0.418028
9,0.028000,0.030686,0.418132,0.399209,0.417553,0.417892
10,0.028000,0.033476,0.417809,0.398796,0.417323,0.417575


In [11]:
model.save_pretrained("BART-decomposed")
tokenizer.save_pretrained("BART-decomposed")

('BART-decomposed-large/tokenizer_config.json',
 'BART-decomposed-large/special_tokens_map.json',
 'BART-decomposed-large/vocab.json',
 'BART-decomposed-large/merges.txt',
 'BART-decomposed-large/added_tokens.json',
 'BART-decomposed-large/tokenizer.json')

In [None]:
#### Predictions

In [38]:
import warnings
import torch
warnings.filterwarnings('ignore')
torch.cuda.empty_cache()

In [39]:
from sklearn.model_selection import KFold
from datasets import load_dataset, DatasetDict, Dataset, concatenate_datasets
import datasets
import pandas as pd
import os
import logging
import nltk
import numpy as np
from transformers import Seq2SeqTrainingArguments, Seq2SeqTrainer, DataCollatorForSeq2Seq
import evaluate
from transformers import BartForConditionalGeneration, BartTokenizer
from random import sample


train_df = datasets.load_from_disk("/home/y.khan/cai6307-y.khan/Query-Focused-Tabular-Summarization/data/decomposed/decomposed_train")
test_df = datasets.load_from_disk("/home/y.khan/cai6307-y.khan/Query-Focused-Tabular-Summarization/data/decomposed/decomposed_test")
validate_df = datasets.load_from_disk("/home/y.khan/cai6307-y.khan/Query-Focused-Tabular-Summarization/data/decomposed/decomposed_validate")

In [40]:
model_path = "/home/y.khan/cai6307-y.khan/Query-Focused-Tabular-Summarization/models/saved_model/bart-decomposed"
tokenizer = BartTokenizer.from_pretrained(model_path)
model = BartForConditionalGeneration.from_pretrained(model_path)

In [41]:
from typing import List, Dict

def tokenization_with_answer(examples):
    inputs = []
    targets = []
    
    task_prefix = "Given a query and a table, generate a summary that answers the query based on the information in the table: "

    for i, (query, table, answer, coordinates, summary) in enumerate(zip(examples['query'], examples['table'], examples['answers'], examples['coordinates'], examples['summary'])):
        flattened_table = flatten_table(table, i)
        input_text = f"{task_prefix} Table {flattened_table}. Query: {query}"

        inputs.append(input_text)
        targets.append(summary)
        
    model_inputs = tokenizer(inputs, max_length=1024, truncation=True,padding='max_length')
    with tokenizer.as_target_tokenizer():
        labels = tokenizer(examples["summary"], max_length=512, truncation=True)
    
    model_inputs["labels"] = labels["input_ids"] 

    res = tokenizer(inputs, text_target=targets, truncation=True, padding=True)
    return model_inputs

def flatten_table(table: Dict, row_index: int) -> str:
    header = table.get('header', [])
    rows = table.get('rows', [])
    title = table.get('title', [])

    flattened_rows = []
    for i, row in enumerate(rows):
        row_text = f"Row {i}, " + ",".join([f"{col}:{val}" for col, val in zip(header, row)])
        flattened_rows.append("## "+row_text)

    flattened_table = f"Title: {' '.join(map(str, title))}" + " " + " ".join(flattened_rows)
    return flattened_table

tokenized_dataset_train = train_df.map(tokenization_with_answer, batched=True)
tokenized_dataset_test = test_df.map(tokenization_with_answer, batched=True)

processed_data_train = tokenized_dataset_train.remove_columns(['table','summary', 'row_ids', 'example_id', 'query', 'answers', 'coordinates'])
processed_data_test = tokenized_dataset_test.remove_columns(['table','summary', 'row_ids', 'example_id', 'query', 'answers', 'coordinates'])

def postprocess_text(preds, labels):
        preds = [pred.strip() for pred in preds]
        labels = [label.strip() for label in labels]

        # rougeLSum expects newline after each sentence
        preds = ["\n".join(nltk.sent_tokenize(pred)) for pred in preds]
        labels = ["\n".join(nltk.sent_tokenize(label)) for label in labels]

        return preds, labels

def metric_fn(eval_predictions):
    predictions, labels = eval_predictions
    decoded_predictions = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    for label in labels:
        label[label < 0] = tokenizer.pad_token_id  # Replace masked label tokens
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
    
    decoded_predictions, decoded_labels = postprocess_text(decoded_predictions, decoded_labels)

    rouge = evaluate.load('rouge')

    # Compute ROUGE scores
    rouge_results = rouge.compute(predictions=decoded_predictions, references=decoded_labels)

    return rouge_results

data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model= model)

train_args = Seq2SeqTrainingArguments(
    output_dir="./train_weights_bart_decomposed",
    learning_rate=3e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=4,
    num_train_epochs=20,
    evaluation_strategy="epoch",
    save_strategy = "epoch",
    weight_decay=0.01,
    save_total_limit=5,
    warmup_ratio=0.03,
    load_best_model_at_end=True,
    predict_with_generate=True,
    overwrite_output_dir= True,
    gradient_accumulation_steps = 2
)


trainer = Seq2SeqTrainer(
    model,
    train_args,
    train_dataset=processed_data_train,
    eval_dataset=processed_data_test,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=metric_fn
)

Map: 100%|██████████| 2000/2000 [00:09<00:00, 207.31 examples/s]
Map: 100%|██████████| 500/500 [00:02<00:00, 236.72 examples/s]


In [42]:
validate_df_size = len(validate_df)
step_size = 3
num_batches = validate_df_size // step_size

valid = []

for i in range(num_batches):
    start_index = i * step_size
    end_index = (i + 1) * step_size
    valid.append(validate_df.select(range(start_index, end_index)))

# If there are remaining data points that don't fit into full batches of size 3
if validate_df_size % step_size != 0:
    remaining_data = validate_df_size % step_size
    valid.append(validate_df.select(range(validate_df_size - remaining_data, validate_df_size)))

In [43]:
import numpy as np
rougeL = []
bert = []
bertscore = evaluate.load("bertscore")

for i in range(len(valid)):
    validate_df = valid[i].map(tokenization_with_answer, batched=True)
    predict_results = trainer.predict(validate_df, max_length = 1024)
    metrics = predict_results.metrics
    predictions = tokenizer.batch_decode(predict_results.predictions, skip_special_tokens=True, clean_up_tokenization_spaces=True)
    predictions = [pred.strip() for pred in predictions]
    
    bert_score = bertscore.compute(predictions=predictions, references=valid[i]['summary'], lang = "en")
    rougeL.append(metrics['test_rougeLsum'])
    bert.append(np.mean(bert_score['f1']))

sum(rougeL)/len(rougeL), sum(bert)/len(bert)

Map: 100%|██████████| 3/3 [00:00<00:00, 130.16 examples/s]


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 3/3 [00:00<00:00, 98.00 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 166.15 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 125.45 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 169.81 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 68.18 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 121.25 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 77.10 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 125.26 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 141.32 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 92.52 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 76.76 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 91.14 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 129.11 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 157.54 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 155.58 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 158.07 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 91.26 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 149.69 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 66.54 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 69.58 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 73.68 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 82.21 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 188.81 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 114.94 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 110.71 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 115.42 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 28.26 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 72.04 examples/s]


Map: 100%|██████████| 3/3 [00:01<00:00,  2.24 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 67.22 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 79.92 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 66.80 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 74.02 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 70.25 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 93.93 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 80.04 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 65.06 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 100.21 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 87.32 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 99.74 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 93.92 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 76.09 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 73.58 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 71.68 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 66.68 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 80.91 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 51.58 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 98.53 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 80.86 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 87.23 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 79.48 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 76.14 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 90.85 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 67.45 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 75.29 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 102.54 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 71.98 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 81.56 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 85.72 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 91.44 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 57.16 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 70.19 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 80.62 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 89.36 examples/s]


Map: 100%|██████████| 3/3 [00:00<00:00, 73.31 examples/s]


Map: 100%|██████████| 2/2 [00:00<00:00, 54.46 examples/s]


(0.4153478758111462, 0.8964134411728795)

In [None]:
#### Example

In [64]:
rougeL

[0.45078147002769864,
 0.4351106427818756,
 0.37665967854647103,
 0.3851103788647161,
 0.31119719109937727,
 0.46369415990894874,
 0.4362706293679158,
 0.4378325123152709,
 0.3879378000320138,
 0.3811747283799507,
 0.39663844029482226,
 0.3800516559026628,
 0.5059316472310567,
 0.45227584010192706,
 0.5223989084146478,
 0.3913086419753086,
 0.4876512105780399,
 0.46204170897445906,
 0.33460269896265776,
 0.2694933561588,
 0.3730994152046783,
 0.4457557317597886,
 0.39991120197420726,
 0.4786450238277827,
 0.4863509321981083,
 0.357163565359385,
 0.5571059431524549,
 0.5014298457790816,
 0.4504016139571471,
 0.330868544600939,
 0.38143618833274,
 0.3568990853201379,
 0.436373174206528,
 0.35056295807178817,
 0.30633230888007956,
 0.41498240321769736,
 0.4956691449101429,
 0.41173069411681484,
 0.40507708070803544,
 0.39611480809302946,
 0.6183053723629911,
 0.48314606741573035,
 0.48514030980868156,
 0.395593220338983,
 0.2720991168124571,
 0.374791418543702,
 0.4030409356725146,
 0.443

In [95]:
np.argmax(rougeL)

40

In [129]:
validate_df = valid[16].map(tokenization_with_answer, batched=True)
predict_results = trainer.predict(validate_df, max_length = 1024)

predictions = tokenizer.batch_decode(predict_results.predictions, skip_special_tokens=True, clean_up_tokenization_spaces=True)
predictions = [pred.strip() for pred in predictions]
predictions

['The yearly trend in the total number of acres affected by the wildfires in California based on the start dates and the acreage of each listed fire shows a general upward trend. The most big fire, Mendocino Complex fire, which started in July 2018, has the most acres affected with 459,123 acres, while the most small fire, Camp fire, started in August 1987, only has 140,948 acres. Other fires, like Butte fire, Ventura fire, and Colusa fire, have smaller acreage ranging from 177,866 acres to 281,893 acres.',
 'In Columbo\'s fourth season, the episodes in order are "An Exercise in Fatality," "Negative Reaction," "By Dawn\'s Early Light," "Troubled Waters," "Playback," and "A Deadly State of Mind." The episodes have corresponding titles, murderers, and victims. The episodes show Robert Conrad, Dick Van Dyke, Patrick McGoohan, Tom Simcox, and Poupée Bocar as the murderers. The victims are Antonette Bower, Don Gordon, Stephen Elliott, and Lesley Ann Warren.',
 'The Utah Jazz have had two pl

In [130]:
validate_df['summary']

['There are not enough datas to make a sure conclusion about yearly trend of total acres affected by wildfires in California. As this table only has biggest wildfires and not all fires in state, it cannot show the full trend of every year. However, we can see that some more new fires, like Mendocino Complex in 2018 and Thomas Fire in 2017, have very big acreage. This may show that there is a going up trend in how bad wildfires are in recent years.',
 'In fourth season of Columbo, first episode is "An Exercise in Fatality" on September 15, 1974. Have murderer Robert Conrad and victim Philip Bruns. Second episode is "Negative Reaction" on October 6, 1974. Have Dick Van Dyke as murderer and Antoinette Bower and Don Gordon as victims. Other episodes in order are: "By Dawn\'s Early Light" with Patrick McGoohan as murderer and Tom Simcox as victim, "Troubled Waters" have Robert Vaughn as murderer and Poupée Bocar as victim, "Playback" with Oskar Werner as murderer and Martha Scott as victim,

In [1]:
import pandas as pd
def to_pandas(item):
  return pd.DataFrame(item['table']["rows"],columns=item['table']["header"])

to_pandas(valid[21][2])

NameError: name 'valid' is not defined

In [132]:
valid[16][2]['example_id']

'1ae05e92-4ca6-483d-9307-488d28b0b31b'

In [69]:
### Evaluation

In [7]:
model_path = "/home/y.khan/cai6307-y.khan/Query-Focused-Tabular-Summarization/models/saved_model/bart-decomposed"
model = BartForConditionalGeneration.from_pretrained(model_path)
tokenizer = BartTokenizer.from_pretrained(model_path)

In [24]:
from typing import List, Dict

def flatten_table(table: Dict) -> str:
    header = table.get('header', [])
    rows = table.get('rows', [])
    title = table.get('title', [])

    flattened_rows = []
    for i, row in enumerate(rows):
        row_text = f"Row {i}, " + ",".join([f"{col}:{val}" for col, val in zip(header, row)])
        flattened_rows.append("## "+row_text)

    flattened_table = f"Title: {' '.join(map(str, title))}" + " " + " ".join(flattened_rows)
    return flattened_table

def generate_predictions(dataset):
    generated_texts = []
    task_prefix = "Given a query and a table, generate a summary that answers the query based on the information in the table: "
    for example in dataset:
        table = example['table']
        query = example['query']
        flattened_table = flatten_table(table)
        input_text = f"{task_prefix} Table {flattened_table}. Query: {query}"
        model_input = tokenizer(input_text, max_length=1024, truncation=True, padding='max_length', return_tensors='pt')
        output_sequences = model.generate(**model_input, min_length = 512)
        generated_text = tokenizer.batch_decode(output_sequences, skip_special_tokens=True)
        generated_texts.append(generated_text)

    return generated_texts

In [25]:
validate_df = datasets.load_from_disk("/home/y.khan/cai6307-y.khan/Query-Focused-Tabular-Summarization/data/decomposed/decomposed_validate")
validate_df = validate_df.shuffle().select(range(10))
predictions = generate_predictions(validate_df)

In [26]:
predictions

[['The trend in UK viewership for Series 2 of Torchwood episodes based on the UK viewers'],
 ['George Harrison\'s "My Sweet Lord" was released in 1970, and it'],
 ['China has the strongest overall power in the 2008 Summer Olympics. This can be seen from'],
 ['The top three deadliest storms according to the provided data are Haiyan (Yolanda'],
 ['Emilia Attías act in two films in 2016. These movies name'],
 ['Robin Frijns had one of his best performances in the Formula Renault 3.5'],
 ['The best performances in terms of batting average (minimum 80 at bats - ab)'],
 ['The Lynx constellation is home to three objects: a spiral galaxy, an'],
 ['Between 1994 and 2018, an athlete from the sport of Alpine skiing has been chosen as'],
 ['Inkster had shortest membership time in Western Wayne Athletic Conference, join in 2013']]

In [27]:
validate_df['summary']

['The trend in UK watch of Torchwood Series 2of episodes change many times in the series, but can see it have a little down in all watch. The series start very good with 4.22 million people watch first episode, named "Kiss Kiss, Bang Bang," have director Ashley Way. The watch people get most at 4.32 million for number seven episode, called "Dead Man Walking," have director Andy Goddard. But after this, the watch number go down slowly and  get low at 3.13 million for final episode, with name "Exit Wounds," and director Ashley Way again. So at last, even there are some up and down of watch people in series, the all trend show a down from start to the end of Series 2.',
 'George Harrison\'s "My Sweet Lord," released in 1970, sold 10 million copies. Other singles from the 1970s, such as Gloria Gaynor\'s "I Will Survive" and Village People\'s "Y.M.C.A.", both released in 1978, managed to sell more copies at 14 million and 12 million respectively. However, Harrison\'s sales performance is st

In [75]:
validate_df = datasets.load_from_disk("/home/y.khan/cai6307-y.khan/Query-Focused-Tabular-Summarization/data/decomposed/decomposed_validate")
validate_df['summary']

["The 2008 - 09 Connecticut Huskies Women's Basketball Team had three players who scored more than 600 points: Maya Moore, Renee Montgomery and Tina Charles. Maya Moore scored 754 points, playing a total of 1209 minutes. She had 284 field goals, 90 three pointers, and 96 free throws. Montgomery Renee Montgomery scored 644 points in 1237 minutes, compiling 226 field goals, 99 three point shots and 93 free throws. Tina Charles scored 642 points in 982 minutes, with 259 field goals and 124 free throws.",
 'Swiss Locomotive and Machine Works built a Mountain Railway Rack Steam Locomotive with an slm number of 988 in 1896. It has a wheel arrangement of 0 - 4 - 2 T, and is located on the Snowdon Mountain Railway. Its name is Snowdon.',
 'The trend of number turbines installed per project in Maine from 2006 to 2017 seem to fluctuate, no consistent increase or decrease over time. Some years have installation of big projects with more number of turbines, like Kibby Mountain in 2010 and Bingham 