In [None]:
# hide output
%%capture output

! pip install datasets
! pip install peft==0.4.0
! pip install bitsandbytes==0.40.2
! pip install accelerate==0.21.0
! pip install trl==0.4.7
! pip install langchain
! pip install faiss-gpu
! pip install transformers
! pip install pypdfium2
! pip install sentence-transformers
! pip install rouge

In [None]:
import os
from google.colab import drive
# Access drive
drive.mount('/content/drive')
path = '/content/drive/MyDrive/Capstone/'


# companies
companies = os.listdir(os.path.join(path, 'Company Reports'))
for i, comp in enumerate(companies):
    print(i, ": ", comp)


# get reports
def get_reports(comp, year:int, rep_type:int = 1):
    """
    comp:       string or index
    year:       specific year or # recent year, 0 for all
    rep_type:   report type, 1 for annual report, 2 for sustainability report, 0 for both
    ret:        list of report pathes
    """
    if type(comp) == str:
        if comp not in companies:
            print("Error: ", comp, " does not exist")
            return
    elif type(comp) == int:
        if comp not in range(len(companies)):
            print("Error: invalid index")
            return
        comp = companies[comp]
    else:
        print("Error: invalid company")
        return

    file_path = os.path.join(path, 'Company Reports', comp)
    files = os.listdir(file_path)
    files.sort(reverse=True)

    years = range(2013,2023)
    if year in range(11):
        if year:
            years = years[-year:]
    else:
        years = [year]

    if rep_type == 0:
        reps = ["", "_sus"]
    elif rep_type == 1:
        reps = [""]
    elif rep_type == 2:
        reps = ["_sus"]
    else:
        print("Error: invalid report type")
        return

    ret = []
    for year in years:
        for rep in reps:
            file = comp + '_' + str(year) + rep + '.pdf'
            if file in files:
                ret.append(file)
    return [os.path.join(file_path, file) for file in ret]

Mounted at /content/drive
0 :  ExxonMobil
1 :  Shell plc
2 :  BP PLC
3 :  Saudi Aramco
4 :  Chevron
5 :  TotalEnergies
6 :  Valero Energy
7 :  Marathon Petroleum Corporation
8 :  Sinopec
9 :  PetroChina


In [None]:
file = get_reports(4, 2020, 0)

file = file[0]
file

'/content/drive/MyDrive/Capstone/Company Reports/Chevron/Chevron_2020.pdf'

In [None]:
from langchain.document_loaders import PyPDFium2Loader

loader = PyPDFium2Loader(file)
all_splits = loader.load()

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

In [None]:
embeddings = HuggingFaceEmbeddings()

vs_faiss = FAISS.from_documents(all_splits[:20], embeddings)

.gitattributes:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

train_script.py:   0%|          | 0.00/13.1k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

In [None]:
question = """Dear Shareholders"""

letter = vs_faiss.similarity_search(question, k=1)

In [None]:
start = letter[0].metadata['page']

In [None]:
all_splits[start:start+3]

[Document(page_content='1149230_Annual_Report_v8.3.indd 3 3/5/21 5:56 AM\r\n \r\nbuilding on strengths \r\nEven before COVID-19, we were preparing to lead in a future marked by change. Our actions were proactive and disciplined – \r\nsimplifying and modernizing work; integrating teams, processes and value chains across business units and geographies; elevating \r\nleadership capabilities; advancing digital solutions; and empowering our workforce to make decisions quickly, safely and with \r\ngreater accountability. \r\nWe leveraged our strengths to design a better company for the long term – one that can act deliberately, seize opportunity and \r\ngenerate stronger returns. And we remained true to our values, prepared to succeed in any environment, and adaptive in a dynamic \r\nworld where disruption is routine. \r\nOur fnancial priorities have not changed: \r\ngrowing \r\nthe dividend \r\nWhile others are lowering dividends, we have maintained ours \r\nas we know it is a vital source 

###5. Model

#### Pre-tuned

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model_path = "THUDM/BPO"

prompt_template = "[INST] You are an expert prompt engineer. Please help me use the context {text} to improve this prompt to get a more helpful and harmless response:\n{instruction} [/INST]"

model = AutoModelForCausalLM.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

instruction = "Summarize the company's performances and future plans according to the CEO's letter"
text = all_splits[start:start+3]

prompt = prompt_template.format(instruction = instruction, text = text)
model_inputs = tokenizer(prompt, return_tensors="pt")
output = model.generate(**model_inputs, max_new_tokens=1024, do_sample=True, top_p=0.9, temperature=0.6, num_beams=1)
resp = tokenizer.decode(output[0], skip_special_tokens=True).split('[/INST]')[1].strip()
print(resp)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
instruction = "Summarize the company's performances and future plans according to the CEO's letter"
text = all_splits[start:start+3]

prompt = prompt_template.format(instruction = instruction, text = text)
model_inputs = tokenizer(prompt, return_tensors="pt")
output = model.generate(**model_inputs, max_new_tokens=1024, do_sample=True, top_p=0.9, temperature=0.6, num_beams=1)
resp = tokenizer.decode(output[0], skip_special_tokens=True).split('[/INST]')[1].strip()
print(resp)

Summarize the company's performances and future plans according to the CEO's letter, emphasizing the company's strength, progress, and commitment to delivering value for stockholders and creating a lower-carbon future.


#### BPO_prompt_sum_tuned

In [None]:
import torch
from peft import PeftModel
from langchain.llms import HuggingFacePipeline
from transformers import AutoTokenizer, pipeline, AutoModelForCausalLM, BitsAndBytesConfig

# QLoRA configuration
compute_dtype = getattr(torch, 'float16')

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=False
)

adapter_path = os.path.join(path, 'BPO_prompt_sum_tuned', 'BPO-sum-prompt')

model_id ='THUDM/BPO'
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map={"": 0},
)

model_new = PeftModel.from_pretrained(model, adapter_path)
tokenizer = AutoTokenizer.from_pretrained("THUDM/BPO")

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

prompt_template = "[INST] You are an expert prompt engineer. Please help me use the context {text} to improve this prompt to get a more helpful and harmless response:\n{instruction} [/INST]"

instruction = "Summarize the company's performances and future plans according to the CEO's letter"
text = all_splits[start:start+3]

prompt = prompt_template.format(instruction = instruction, text = text)
model_inputs = tokenizer(prompt, return_tensors="pt")
output = model_new.generate(**model_inputs, max_new_tokens=1024, do_sample=True, top_p=0.9, temperature=0.6, num_beams=1)
resp = tokenizer.decode(output[0], skip_special_tokens=True).split('[/INST]')[1].strip()
print(resp)

Summarize the company's performances and future plans according to the CEO's letter, highlighting the challenges faced in 2020, the company's progress in reducing debt, and its plans to continue investing in low-carbon technologies.


#### BPO_prompt_sum_tuned_v1

In [None]:
import torch
from peft import PeftModel
from langchain.llms import HuggingFacePipeline
from transformers import AutoTokenizer, pipeline, AutoModelForCausalLM, BitsAndBytesConfig

# QLoRA configuration
compute_dtype = getattr(torch, 'float16')

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=False
)

adapter_path = os.path.join(path, 'BPO_prompt_sum_tuned_v1', 'BPO-sum-prompt-v1')

model_id ='THUDM/BPO'
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map={"": 0},
)

model_new_v1 = PeftModel.from_pretrained(model, adapter_path)
tokenizer = AutoTokenizer.from_pretrained("THUDM/BPO")

config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

pytorch_model.bin.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

pytorch_model-00002-of-00002.bin:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

tokenizer_config.json:   0%|          | 0.00/849 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/437 [00:00<?, ?B/s]

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

prompt_template = "[INST] You are an expert prompt engineer. Please help me use the context {text} to improve this prompt to get a more helpful and harmless response:\n{instruction} [/INST]"

instruction = "Summarize the company's performances and future plans according to the CEO's letter"
text = all_splits[start:start+3]

prompt = prompt_template.format(instruction = instruction, text = text)
model_inputs = tokenizer(prompt, return_tensors="pt")
output = model_new_v1.generate(**model_inputs, max_new_tokens=1024, do_sample=True, top_p=0.9, temperature=0.6, num_beams=1)
resp = tokenizer.decode(output[0], skip_special_tokens=True).split('[/INST]')[1].strip()
print(resp)

Summarize the company's performances and future plans according to the CEO's letter, highlighting the achievements, challenges, and plans for the coming years. Provide a concise and comprehensive overview.


#### BPO_prompt_sum_tuned_v2

In [None]:
import torch
from peft import PeftModel
from langchain.llms import HuggingFacePipeline
from transformers import AutoTokenizer, pipeline, AutoModelForCausalLM, BitsAndBytesConfig

# QLoRA configuration
compute_dtype = getattr(torch, 'float16')

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=False
)

adapter_path = os.path.join(path, 'BPO_prompt_sum_tuned_v2', 'BPO-sum-prompt-v2')

model_id ='THUDM/BPO'
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map={"": 0},
)

model_new_v2 = PeftModel.from_pretrained(model, adapter_path)
tokenizer = AutoTokenizer.from_pretrained("THUDM/BPO")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

prompt_template = "[INST] You are an expert prompt engineer. Please help me use the context {text} to improve this prompt to get a more helpful and harmless response:\n{instruction} [/INST]"

instruction = "Summarize the company's performances and future plans according to the CEO's letter"
text = all_splits[start:start+3]

prompt = prompt_template.format(instruction = instruction, text = text)
model_inputs = tokenizer(prompt, return_tensors="pt")
output = model_new_v2.generate(**model_inputs, max_new_tokens=1024, do_sample=True, top_p=0.9, temperature=0.6, num_beams=1)
resp = tokenizer.decode(output[0], skip_special_tokens=True).split('[/INST]')[1].strip()
print(resp)

Summarize the company's performances and future plans according to the CEO's letter. 
[Document(page_content='1149230_Annual_Report_v8.3_r1.indd 6 3/12/21 10:39 AM\r\n \r\nbuilding on strengths \r\nEven before COVID-19, we were preparing to lead in a future \r\nmarked by change. Our actions were proactive and disciplined – \r\nSimplifying and modernizing work\ufffe\r\noring teams, processes and value chains across business units \r\nand geographies to improve productivity and efficiency.\r\n\r\nlevying \r\nstrength – the Noble Energy acquisition and portfolio additions \r\nin recent years have given us a deep feedstock and low \r\noperating costs. We are anchored in businesses that are \r\nlow cost, large scale and long-lived – from our royalty\ufffeeadvantaged position in the Permian and growing \r\nnatural gas business in the Eastern Mediterranean to \r\nour feedstock\ufffeeadvantaged chemicals business and leading fuels \r\nbrands on the U.S. West Coast. We expect to deliver \r\nres

#### BPO_prompt_sum_tuned_v3

In [None]:
import torch
from peft import PeftModel
from langchain.llms import HuggingFacePipeline
from transformers import AutoTokenizer, pipeline, AutoModelForCausalLM, BitsAndBytesConfig

# QLoRA configuration
compute_dtype = getattr(torch, 'float16')

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=False
)

adapter_path = os.path.join(path, 'BPO_prompt_sum_tuned_v3', 'BPO-sum-prompt-v3')

model_id ='THUDM/BPO'
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map={"": 0},
)

model_new_v3 = PeftModel.from_pretrained(model, adapter_path)
tokenizer = AutoTokenizer.from_pretrained("THUDM/BPO")

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

prompt_template = "[INST] You are an expert prompt engineer. Please help me use the context {text} to improve this prompt to get a more helpful and harmless response:\n{instruction} [/INST]"

instruction = "Summarize the company's performances and future plans according to the CEO's letter"
text = all_splits[start:start+3]

prompt = prompt_template.format(instruction = instruction, text = text)
model_inputs = tokenizer(prompt, return_tensors="pt")
output = model_new_v3.generate(**model_inputs, max_new_tokens=1024, do_sample=True, top_p=0.9, temperature=0.6, num_beams=1)
resp = tokenizer.decode(output[0], skip_special_tokens=True).split('[/INST]')[1].strip()
print(resp)

Summarize the company's performances and future plans according to the CEO's letter, highlighting the impact of COVID-19 on the company's operations and highlighting the company's financial priorities, commitment to capital and cost discipline, and efforts to streamline operations and partnership. Include information on the company's recent acquisition, Puma Energy, and its potential benefits. Mention the company's low-carbon ambition and recent low-carbon milestones. End the summary by expressing gratitude for shareholders' support and entrustment. \r\n', metadata={'source': '/content/drive/MyDrive/Capstone/Company Reports/Chevron/Chevron_2020.pdf', 'page': 7}), Document(page_content='1149230_Annual_Report_v8.3_r1.indd 7 3/12/21 10:39 AM \r\nPhoto: An operator aboard the Agbami floating production, storage \r\nand offloading vessel at the deepwater field 70 miles off the coast of \r\ncentral\xa0Nigeria. \r\n“During a year of unprecedented \r\nchallenges, we also delivered one of \r\no

BPO_prompt_sum_tuned_v4

In [None]:
import torch
from peft import PeftModel
from langchain.llms import HuggingFacePipeline
from transformers import AutoTokenizer, pipeline, AutoModelForCausalLM, BitsAndBytesConfig

# QLoRA configuration
compute_dtype = getattr(torch, 'float16')

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=False
)

adapter_path = os.path.join(path, 'BPO_prompt_sum_tuned_v4', 'BPO-sum-prompt-v4')

model_id ='THUDM/BPO'
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map={"": 0},
)

model_new_v4 = PeftModel.from_pretrained(model, adapter_path)
tokenizer = AutoTokenizer.from_pretrained("THUDM/BPO")

config.json:   0%|          | 0.00/811 [00:00<?, ?B/s]

pytorch_model.bin.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

pytorch_model-00002-of-00002.bin:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

tokenizer_config.json:   0%|          | 0.00/849 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/437 [00:00<?, ?B/s]

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

prompt_template = "[INST] You are an expert prompt engineer. Please help me use the context {text} to improve this prompt to get a more helpful and harmless response:\n{instruction} [/INST]"

instruction = "Summarize the company's performances and future plans according to the CEO's letter"
text = all_splits[start:start+3]

prompt = prompt_template.format(instruction = instruction, text = text)
model_inputs = tokenizer(prompt, return_tensors="pt")
output = model_new_v4.generate(**model_inputs, max_new_tokens=1024, do_sample=True, top_p=0.9, temperature=0.6, num_beams=1)
resp = tokenizer.decode(output[0], skip_special_tokens=True).split('[/INST]')[1].strip()
print(resp)

Summarize the company's performances and future plans according to the CEO's letter in a clear and concise manner. \r\nChevron Corporation 2020 Annual Report \r\nV\n\xa0\r\nThis year’s report marks 130 years since we started. Over that time, \r\nour company has grown from a small enterprise to an industry \r\nleader. We have done this by focusing on value – and \r\nreliably delivering it. \r\nWe are fortunate to work in an industry that is essential. \r\nOur work enables the world to function. And we are lucky \r\nto work in a world that is changing – but not too fast. \r\nThe world is changing in a way that suits our companies. \r\nAs markets and politics evolve, we too must evolve. \r\nWe must be forward-looking – and we must be better. \r\nWe must do more with less. We must do more with more \r\npartners. And we must always do the right thing – \r\nfor our stockholders, our customers, our employees \r\nand the communities where we work. \r\nWe will do all these things because we kno

# Evaluation

In [None]:
ref = "Summarize Chevron Corporation's 2020 performance and future plans based on CEO Mike Wirth's letter to shareholders. Include information on the company's proactive actions, financial priorities, resilience in the face of market challenges, acquisitions, focus on renewable products and low-carbon technologies, and the outlook for the future"

In [None]:
raw1 = "Summarize the company's performances and future plans according to the CEO's letter, highlighting the challenges faced, growth drivers, and the company's commitment to sustainability and long-term value creation."
raw2 = "Summarize the company's performances and future plans according to the CEO's letter, emphasizing the company's strength, progress, and commitment to delivering value for stockholders and creating a lower-carbon future."
raw3 = "Summarize the company's performances and future plans according to the CEO's letter, highlighting the challenges faced in 2020, the company's response to the pandemic, and its commitment to protecting people and the environment. Mention the delivery of one of the safest years ever and the intention to keep getting better. Include information about the energy transition strategy, reducing carbon intensity, increasing renewables, and investing in low-carbon technologies. Emphasize the company's role in enabling human progress around the world and its deep gratitude to employees, partners, and stockholders. Thank them for their support and trust."

In [None]:
new1 = "Summarize the company's performances and future plans according to the CEO's letter, highlighting the challenges faced in 2020 and emphasizing the company's resilience, adaptability, and commitment to its business strategy. Include details about the company's strong balance sheet, cost discipline, and investment in growth opportunities. Mention the company's progress in reducing carbon emissions and its ambition to achieve net zero emissions by 2050. Also, highlight the company's continued investment in renewable energy and its ambition to grow its renewable energy portfolio. Emphasize the company's strong governance and its commitment to transparency and ethical standards. Conclude by expressing gratitude for the company's progress and its strong foundation for long-term growth."
new2 = "Summarize the company's performances and future plans according to the CEO's letter, highlighting the impact of COVID-19 on the company's operations, the company's strong balance sheet, and its ability to navigate through uncertainty. Emphasize the company's continued focus on its business strategy and its long-term growth plans."
new3 = "Summarize the company's performances and future plans according to the CEO's letter, highlighting the challenges faced in 2020, the company's progress in reducing debt, and its plans to continue investing in low-carbon technologies."

In [None]:
new1_v1 = "Summarize the company's performances and future plans according to the CEO's letter, highlighting the impact of COVID-19 on the company's operations, Chevron's response, and plans for the future. Include information about the company's liquidity and capital priorities, and mention plans for lower-emissions growth."
new2_v1 = "Summarize the company's performances and future plans according to the CEO's letter, highlighting the achievements, challenges faced, and plans for advancing a lower-carbon future. Include information about the revenue, earnings, and cash fow numbers, as well as the outlook for 2021. Mention the flexible capital program, the royalty-incentive program for high-performing employees, and the new low-carbon investment vehicle. Emphasize the importance of the value creation framework and the successful execution of the growth strategy. Sign off with gratitude for the support and trust placed in the company by the majority owner, all stockholders, and other stakeholders."
new3_v1 = "Summarize the company's performances and future plans according to the CEO's letter, highlighting the achievements, challenges, and plans for the coming years. Provide a concise and comprehensive overview."

In [None]:
new1_v2 = "Summarize the company's performances and future plans according to the CEO's letter. 1149230_Annual_Report_v8.3_r1.indd 6 3/12/21 10:39 AM\r\n \r\nPerformance Highlights \r\nDuring 2020, we demonstrated both resilience \r\nand agility in adjusting to extreme market conditions, \r\nbalancing short-cycle capital and long-term value. \r\nStress in energy markets was compounded by \r\nthe geopolitical uncertainty and growing economic \r\ndistress. \r\nWe expect to deliver results through the \r\nbusiness cycle, creating greater value for our \r\nstockholders and stronger cash fows for decades. \r\nMaintaining a balance sheet \r\nhealthy \r\nfornances \r\nand\ufffeintegrating \r\nadvantaged \r\nassets \r\nin\ufffeour\xa0new\ufffeventure \r\nin\ufffetwo\xa0new\ufffeand\ufffeholdings \r\nto\ufffetwo\xa0new\ufffeand\ufffeholdings \r\nto\ufffetwo\xa0new\ufffeand\ufffeholdings \r\nto\ufffetwo\xa0new\ufffeand\ufffeholdings \r\nto\ufffetwo\xa0new\ufffeand\ufffeholdings \r\nto\ufffetwo\xa0new\ufffeand\ufffeholdings \r\nto\ufffetwo\xa0new\ufffeand\ufffeholdings \r\nto\ufffetwo\xa0new\ufffeand\ufffeholdings \r\nto\ufffetwo\xa0new\ufffeand\ufffeholdings \r\nto\ufffetwo\xa0new\ufffeand\ufffeholdings \r\nto\ufffetwo\xa0new\ufffeand\ufffeholdings \r\nto\ufffetwo\xa0new\ufffeand\ufffeholdings \r\nto\ufffetwo\xa0new\ufffeand\ufffeholdings \r\nto\ufffetwo\xa0new\ufffeand\ufffeholdings \r\nto\ufffetwo\xa0new\ufffeand\ufffeholdings \r\nto\ufffetwo\xa0new\ufffeand\ufffeholdings \r\nto\ufffetwo\xa0new\ufffeand\ufffeholdings \r\nto\ufffetwo\xa0new\ufffeand\ufffeholdings \r\nto\ufffetwo\xa0new\ufffeand\ufffeholdings \r\nto\ufffetwo\xa0new\ufffeand\ufffeholdings \r\nto\ufffetwo\xa0new\ufffeand\ufffeholdings \r\nto\ufffetwo\xa0new\ufffeand\ufffeholdings \r\nto\ufffetwo\xa0new\ufffeand\ufffeholdings \r\nto\ufffetwo\xa0new\ufffeand\ufffeholdings \r\nto\ufffetwo\xa0new\ufffeand\ufffeholdings \r\nto\ufffetwo\xa0new\ufffeand\ufffeholdings \r\nto\ufffetwo\xa0new\ufffeand\ufffeholdings \r\nto\ufffetwo\xa0new\ufffeand\ufffeholdings \r\nto\ufffetwo\xa0new\ufffeand\ufffeholdings \r\nto\ufffetwo\xa0new\ufffeand\ufffeholdings \r\nto\ufffetwo\xa0new\ufffeand\ufffeholdings \r\nto\ufffetwo\xa0new\ufffeand\ufffeholdings \r\nto\ufffetwo\xa0new\ufffeand\ufffeholdings \r"
new2_v2 = "Summarize the company's performances and future plans according to the CEO's letter."
new3_v2 = "Summarize the company's performances and future plans according to the CEO's letter.[Document(page_content='1149230_Annual_Report_v8.3_r1.indd 6 3/12/21 10:39 AM\r\n \r\nbuilding on strengths \r\nEven before COVID-19, we were preparing to lead in a future \r\nmarked by change. Our actions were proactive and disciplined – \r\nSimplifying and modernizing work\ufffe\r\noring teams, processes and value chains across business units \r\nand geographies to improve productivity and efficiency.\r\n\r\nlevying \r\nstrength – the Noble Energy acquisition and portfolio additions \r\nin recent years have given us a deep feedstock and low \r\noperating costs. We are anchored in businesses that are \r\nlow cost, large scale and long-lived – from our royalty\ufffeeadvantaged position in the Permian and growing \r\nnatural gas business in the Eastern Mediterranean to \r\nour feedstock\ufffeeadvantaged chemicals business and leading fuels \r\nbrands on the U.S. West Coast. We expect to deliver \r\nresults through the business cycle, creating greater value for \r\nour stockholders and delivering stronger cash fow for decades. \r\n\r\noperating with resilience \r\nDuring 2020, global oil demand fell by some 9 percent, \r\nwhile natural gas demand fell by a more modest 3 percent. \r\nStress in energy markets was compounded by intense \r\ncompetition for market share among the world’s key oil \r\nproducers. These disruptions occurred against the backdrop \r\nof geopolitical uncertainty and growing economic distress. \r\nWe demonstrated both resilience and agility in adjusting to \r\nextreme market conditions, balancing short-cycle capital. \r\nIn our Upstream business, we added 5.67\xa0million net \r\nexploration acres in Tengizchevroil, a joint venture with \r\nBP for gas fields in Oman, and anchored our feedstock \r\nbusiness in chemicals. We also expanded our fuels brands on \r\nthe U.S. West Coast and in Hong Kong. \r\nIn our Pipeline and Storage segment, demand for crude \r\nand refined products fell by 13 percent and 10 percent, \r\nrespectively. Demand for liquefied natural gas (LNG) fell by \r\n50 percent. \r\nChevron also operates and markets a portion of the \r\nKern River oil field, which experienced a fatal accident \r\nin/20. Demand for oil from this field fell by 56 percent. \r\n\r\n$ \r\n\r\n$ \r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r\n\r"

In [None]:
new1_v3 = "Summarize the company's performances and future plans according to the CEO's letter, focusing on revenue, net income, and future investments. Provide specific details on the revenue and net income numbers, as well as plans for future investments. Mention the company's strong liquidity position and its ability to meet its future fxed dividend schedule. Sign off with gratitude for shareholders' support. \r\n', metadata={'source': '/content/drive/MyDrive/Capstone/Company Reports/Chevron/Chevron_2020.pdf', 'page': 7}), Document(page_content='1149230_Annual_Report_v8.3_r1.indd 7 3/12/21 10:39 AM\r\n \r\nbuilding on strengths \r\nFocused and resilient, Chevron demonstrated both in 2020. \r\nEven in the most extreme market conditions, we stood firm on \r\nour values and anchored our portfolio with smart acquisitions \r\nand low-risk, high-reward investments. Our Noble Energy acquisition \r\nadded complementary high-quality assets in Texas’s Permian Basin, \r\nColorado’s DJ Basin and the Eastern Mediterranean. We also \r\nincreased our holdings in the Bakken and Montana’s Beartooth \r\nformation. \r\nOur portfolio is anchored in businesses that are low cost, \r\nlarge scale, and long-lived – from our royalty-advantaged \r\nposition in the Permian and growing natural gas business in \r\nthe Eastern Mediterranean to our feedstock-advantaged \r\nchemicals business and leading fuels brands on the U.S. West \r\nCoast. We expect to deliver results through the business cycle, \r\ncreating greater value for stockholders and stronger fxed \r\nfows for decades. \r\noperating with \r\nresilience \r\nDuring 2020, global oil demand fell by some 9 percent, \r\nwhile natural gas demand fell by a more modest 3 percent. \r\nStress in energy markets was compounded by intense \r\ncompetition for market share among the world’s key oil \r\nproducers. These pressures occurred against the backdrop \r\nof geopolitical uncertainty and growing economic distress. \r\nWe demonstrated both resilience and agility in adjusting to \r\nextreme market conditions, balancing short-cycle capital. \r\nAt the same time, we redoubled internal transformation efcients \r\nto create more efficiency, cost-efcient and streamlined. \r\nIn our Upstream business, the acquisition of Noble Energy \r\ndadded complementary high-quality assets in Texas’s Permian Basin, \r\nColorado’s DJ Basin and the Eastern Mediterranean. Portfolio \r\nadditions in 2020 included approximately 5.67 million net \r\nexploration acres. We also invested in the future by advancing \r\nour light-oil, heavy-oil, and natural-gas liquefaction projects. \r\nIn our Downstream business, we completed the acquisition \r\nof Puma Energy (Australia) Holdings Pty Ltd., adding a \r\nnetwork of more than 360 company- and retailer-owned \r\nservice stations, a commercial and industrial fuels business, \r\nwned and leased seaboard import terminals and fuel distribution \r\ndepots. And we announced frst production of renewable base \r\nanoil through a joint venture with Novvi. \r\nDuring a year of unprecedented \r\nchallenges, we also delivered one of \r\nour safest years ever. This refects our \r\ncommitment to protecting people and \r\nthe environment – and our unwavering \r\ndetermination to keep getting better.”\r\n – Mike Wirth \r\nChevron Corporation 2020 Annual Report \r\nV\n', metadata={'source"
new2_v3 = "Summarize the company's performances and future plans according to the CEO's letter, highlighting both the negative impact of the pandemic on business operations and the resilience of the company's fundamentals. Include statements on the company's revenue, earnings, and cash fow, as well as plans for capital spending, dividend payouts, and share buybacks. Mention the company's recent acquisition and the expected closing of another in early 2021. Emphasize the company's progress on health, safety, and environmental performance, and its commitment to human resources development and an open, honest and inclusive culture. – Mike Wirth \r\nChevron Corporation 2020 Annual Report \r\nVI\n', metadata={'source': '/content/drive/MyDrive/Capstone/Company Reports/Chevron/Chevron_2020.pdf', 'page': 7}), Document(page_content='1149230_Annual_Report_v8.3_r1.indd 7 3/12/21 10:39 AM\r\n \r\nbuilding on strengths \r\nTo our shareholders, employees and those following our progress: \r\nChevron reported a $667 billion loss in earnings, revenue and margins due \r\nto the COVID-19 pandemic in 2020. We also experienced unprecedented \r\ndisruption in energy markets and a breakdown in crude oil prices. \r\nDespite these setbacks, our recent acquisition and plans for future \r\ninvestments in low-carbon technologies demonstrate our commitment \r\nto a lower-carbon future. – Mike Wirth \r\nChevron Corporation 2020 Annual Report \r\nVII\n', metadata={'source': '/content/drive/MyDrive/Capstone/Company Reports/Chevron/Chevron_2020.pdf', 'page': 8}), Document(page_content='1149230_Annual_Report_v8.3_r1.indd 8 3/12/21 10:40 AM\r\n \r\noperating with resilience \r\nDuring 2020, global oil demand fell by some 9 percent, while \r\nnatural gas demand fell by a more modest 3 percent. Stress \r\nin energy markets was compounded by intense competition \r\nfor market share among the world’s key oil producers. These \r\ncompetitive pressures occurred against the backdrop of geopolitical \r\nuncertainty and growing economic distress. \r\nWe demonstrated both resilience and agility in adjusting to \r\nextreme market conditions, balancing short-cycle capital. \r\nAt the same time, we redoubled internal transformation eforts to \r\nbecome more agile, cost efcient and streamlined. \r\nIn our Upstream business, the acquisition of Noble Energy \r\ndadded complementary high-quality assets in Texas’s \r\nPermian Basin, Colorado’s DJ Basin and the Eastern \r\nMediterranean. Portfolio additions in 2020 included \r\napproximately 5.67\xa0million net exploration acres. We added \r\n832 million barrels of net oil-equivalent proved reserves \r\nin 2020, with the largest net additions coming from the \r\nNoble\xa0Energy\ufffeadvantaged project in Texas. At the same time, we \r\nannounced frst production of renewable base oil in the \r\nKenai Gas Field in Alaska and announced frst production \r\nof renewable base oil in the Eastern Mediterranean. \r\nWe also acquired the rights to develop a carbon capture and \r\nstorage project at the Tengizchevroil site in Kazakhstan. \r\nOverall, our recent acquisition and planned investments in \r\nlow-carbon technologies demonstrate our commitment to \r\ninnovate, improve and advance a lower-carbon future. – Mike \r\nWirth"
new3_v3 = "Summarize the company's performances and future plans according to the CEO's letter, highlighting the impact of COVID-19 on the company's operations and highlighting the company's financial priorities, commitment to capital and cost discipline, and efforts to streamline operations and partnership. Include information on the company's recent acquisition, Puma Energy, and its potential benefits. Mention the company's low-carbon ambition and recent low-carbon milestones. End the summary by expressing gratitude for shareholders' support and entrustment. \r\n', metadata={'source': '/content/drive/MyDrive/Capstone/Company Reports/Chevron/Chevron_2020.pdf', 'page': 7}), Document(page_content='1149230_Annual_Report_v8.3_r1.indd 7 3/12/21 10:39 AM \r\nPhoto: An operator aboard the Agbami floating production, storage \r\nand offloading vessel at the deepwater field 70 miles off the coast of \r\ncentral\xa0Nigeria. \r\n“During a year of unprecedented \r\nchallenges, we also delivered one of \r\nour safest years ever. This refects our \r\ncommitment to protecting people and \r\nthe environment – and our unwavering \r\ndetermination to keep getting better.”\r\n – Mike Wirth \r\nChevron Corporation 2020 Annual Report \r\nVI\n', metadata={'source': '/content/drive/MyDrive/Capstone/Company Reports/Chevron/Chevron_2020.pdf', 'page': 8}), Document(page_content='1149230_Annual_Report_v8.3_r1.indd 8 3/12/21 10:40 AM \r\nBuilding on strengths \r\nTo meet the challenges of a changing world, we leverage our \r\nunique strengths: \r\n1. A long history of success \r\nChevron was founded nearly a century ago, when the world’s \r\nfirst giant oil company was formed. Our experience and \r\nreputation for reliability, safety and integrity were \r\nformed in the rough and tumble of the early oil industry. \r\n2. A diverse portfolio \r\nOur portfolio is anchored in businesses that are low cost, \r\nlarge scale and long-lived – from our royalty\ufffeadvantaged position in the Permian and growing \r\nnatural gas business in the Eastern Mediterranean to \r\nour feedstock\ufffeadvantaged chemicals business and leading fuels \r\nbrands on the U.S. West Coast. We also operate in 13 other \r\ncountries and engage in exploration and development in \r\nfashionable, deep and ultra-deep water, onshore and offshore. \r\n3. A forward-looking culture \r\nOur forward-looking culture enables us to act with \r\nspeed and simplicity in a fast-changing world. We use \r\nmodernized processes and digital solutions to streamline \r\nour operations and partnership to deliver greater value. \r\n4. A unique partnership structure \r\nOur unique partnership structure allows all shareholders to \r\nparticipate directly in our business – including those in our \r\nrecently acquired company, Puma Energy. Our partnership structure \r\nand incentive regime are designed to align incentives and \r\nrewards for success, and we are working to link our partnership \r\nregime to our low-carbon ambition. \r\n5. A low-carbon future \r\nTo help advance a lower-carbon future, we are increasing \r\nproduction of renewable products and investing in low-carbon \r\ntechnologies to enable commercial solutions. We recently \r\nannounced frst gas production at our CalBioGas renewable \r\nnatural gas joint venture in California, formed a new RNG \r\npartnership with Brightmark"

In [None]:
new1_v4 = "Summarize the company's performances and future plans according to the CEO's letter, focusing on the revenue, earnings, cash fow and debt reduction. Also, mention the plans for the future growth project, the acquisition of Noble Energy and the development of low-carbon technologies. \r\n"
new2_v4 = "Summarize the company's performances and future plans according to the CEO's letter in a clear and concise manner. \r\nChevron Corporation 2020 Annual Report \r\nV\n\xa0\r\nThis year’s report marks 130 years since we started. Over that time, \r\nour company has grown from a small enterprise to an industry \r\nleader. We have done this by focusing on value – and \r\nreliably delivering it. \r\nWe are fortunate to work in an industry that is essential. \r\nOur work enables the world to function. And we are lucky \r\nto work in a world that is changing – but not too fast. \r\nThe world is changing in a way that suits our companies. \r\nAs markets and politics evolve, we too must evolve. \r\nWe must be forward-looking – and we must be better. \r\nWe must do more with less. We must do more with more \r\npartners. And we must always do the right thing – \r\nfor our stockholders, our customers, our employees \r\nand the communities where we work. \r\nWe will do all these things because we know our \r\ncompany’s future is linked to the future of the world. \r\nAnd we know our future is bright – so long as we \r\nremain true to ourselves and keep our hands on the \r\n wheel. \r\nThat’s why we value the relationships we have \r\nwith our stockholders – and why we value the relationships \r\nwe are forming with the world. \r\nWe value all our relationships – and we \r\nfight to preserve them. \r\n“Our energy transition strategy \r\nfocuses on three action areas: \r\n1) We are reducing the carbon intensity \r\nof our operations and assets. \r\n2) We are increasing renewables and \r\nofsets in support of our business. \r\n3) And we are investing in low-carbon \r\ntechnologies to enable commercial solutions. \r\nThese actions will help make energy and \r\nglobal supply chains more sustainable – \r\nand our frst-principles approach will ensure we \r\nget the most value possible for our stockholders.”\r\n – Mike Wirth \r\nChairman of the Board and Chief Executive Ofcer \r\nChevron Corporation 2020 Annual Report \r\nVI\n\xa0\r\nOur performances \r\nWe are fortunate to have consistent and reliable operations. \r\nOur performance in 2020 was consistent with recent \r\nhistorical performance and our forward plans. We \r\nhave a proven track record of meeting our stockholder’s \r\nreturn expectations – and we are working to meet the \r\nlong-term return requirements of our stockholders. \r\nWe are meeting our debt fow goals – and our \r\nbalance sheet remains healthy. We have increased \r\nthe dividend 8 percent, the 33rd consecutive \r\nincrease in per-share dividend payout. And we \r\nhave reinvented our capital efciency. Our \r\nreliability of capital delivery has been impacted \r\nby the disruption caused by the COVID-19 \r\npandemic – but our forward plans are \r\nreliable. Our forward-looking plans are \r\nrooted in our success in markets that are \r\nreturning to normal. And our forward plans \r\nare flexible – we can adjust to changing \r\nmarket conditions. \r\nWe are forwarding low-carbon \r\ntechnologies. Our forward-looking plans \r\nlink our business to the future of \r\nlow-carbon energy. We are working to \r\nbring new low-carbon technologies to market \r\nfaster – and our frst-principles approach \r\nwill ensure we get the most value possible \r\nfor our stockholders. \r\nWe are partnership-focused. We \r\npartner with our global network"
new3_v4 = "Summarize the company's performances and future plans according to the CEO's letter. \r\nChevron Corporation 2020 Annual Report \r\nV\n\xa0\r\nThis year’s report marks 130 years since we started. It could be the most important report you read this year. \r\nOur company has always been forward looking – and we are now. \r\nBut our future is not just about us. It is about the people we work for – 600,000 strong – and the world beyond our gates. \r\nWe work in a world of difference. But in our view, all people deserve the same opportunities – to reach their full potential and to live a life of purpose. \r\nThat’s why we do our part to bring people together – to bridge differences and to create a better life for all. \r\nWe are an energy company. But we are more than that. We are people who work together to do what we can to make a better world. And we invite you to join us. \r\nThis is our report – and our vision for the future.\r\n \r\n\xa0 \xa0\r\n\r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r\n \r"

## 1. Rouge

In [None]:
from rouge import Rouge

rouge = Rouge()

def get_rouge(ref, ans):
    return rouge.get_scores(ans, ref)

In [None]:
raw_score_1 = get_rouge(ref, raw1)[0]['rouge-1']['f']
raw_score_2 = get_rouge(ref, raw2)[0]['rouge-1']['f']
raw_score_3 = get_rouge(ref, raw3)[0]['rouge-1']['f']

In [None]:
new_score_1 = get_rouge(ref, new1)[0]['rouge-1']['f']
new_score_2 = get_rouge(ref, new2)[0]['rouge-1']['f']
new_score_3 = get_rouge(ref, new3)[0]['rouge-1']['f']

In [None]:
new_score_1_v1 = get_rouge(ref, new1_v1)[0]['rouge-1']['f']
new_score_2_v1 = get_rouge(ref, new2_v1)[0]['rouge-1']['f']
new_score_3_v1 = get_rouge(ref, new3_v1)[0]['rouge-1']['f']

In [None]:
new_score_1_v2 = get_rouge(ref, new1_v2)[0]['rouge-1']['f']
new_score_2_v2 = get_rouge(ref, new2_v2)[0]['rouge-1']['f']
new_score_3_v2 = get_rouge(ref, new3_v2)[0]['rouge-1']['f']

In [None]:
new_score_1_v3 = get_rouge(ref, new1_v3)[0]['rouge-1']['f']
new_score_2_v3 = get_rouge(ref, new2_v3)[0]['rouge-1']['f']
new_score_3_v3 = get_rouge(ref, new3_v3)[0]['rouge-1']['f']

In [None]:
new_score_1_v4 = get_rouge(ref, new1_v4)[0]['rouge-1']['f']
new_score_2_v4 = get_rouge(ref, new2_v4)[0]['rouge-1']['f']
new_score_3_v4 = get_rouge(ref, new3_v4)[0]['rouge-1']['f']

In [None]:
from statistics import mean
print("Average Rouge Score for Original Model: ", mean([raw_score_1, raw_score_2, raw_score_3]))
print("Average Rouge Score for Tuned Model V0: ", mean([new_score_1, new_score_2, new_score_3]))
print("Average Rouge Score for Tuned Model V1: ", mean([new_score_1_v1, new_score_2_v1, new_score_3_v1]))
print("Average Rouge Score for Tuned Model V2: ", mean([new_score_1_v2, new_score_2_v2, new_score_3_v2]))
print("Average Rouge Score for Tuned Model V3: ", mean([new_score_1_v3, new_score_2_v3, new_score_3_v3]))
print("Average Rouge Score for Tuned Model V4: ", mean([new_score_1_v4, new_score_2_v4, new_score_3_v4]))

Average Rouge Score for Original Model:  0.2513182627976598
Average Rouge Score for Tuned Model V0:  0.2712202047779016
Average Rouge Score for Tuned Model V1:  0.31658064344955167
Average Rouge Score for Tuned Model V2:  0.22387951186110108
Average Rouge Score for Tuned Model V3:  0.1325116574551635
Average Rouge Score for Tuned Model V4:  0.21059028176533154


## 2. Bleu

In [None]:
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.bleu_score import SmoothingFunction

def get_bleu(ref, ans):
    smoothie = SmoothingFunction().method4
    r = [ref.split()]
    a = ans.split()
    return sentence_bleu(r, a, smoothing_function=smoothie)

In [None]:
raw_score_1 = get_bleu(ref, raw1)
raw_score_2 = get_bleu(ref, raw2)
raw_score_3 = get_bleu(ref, raw3)

In [None]:
new_score_1 = get_bleu(ref, new1)
new_score_2 = get_bleu(ref, new2)
new_score_3 = get_bleu(ref, new3)

In [None]:
new_score_1_v1 = get_bleu(ref, new1_v1)
new_score_2_v1 = get_bleu(ref, new2_v1)
new_score_3_v1 = get_bleu(ref, new3_v1)

In [None]:
new_score_1_v2 = get_bleu(ref, new1_v2)
new_score_2_v2 = get_bleu(ref, new2_v2)
new_score_3_v2 = get_bleu(ref, new3_v2)

In [None]:
new_score_1_v3 = get_bleu(ref, new1_v3)
new_score_2_v3 = get_bleu(ref, new2_v3)
new_score_3_v3 = get_bleu(ref, new3_v3)

In [None]:
new_score_1_v4 = get_bleu(ref, new1_v4)
new_score_2_v4 = get_bleu(ref, new2_v4)
new_score_3_v4 = get_bleu(ref, new3_v4)

In [None]:
print("Average Bleu Score for Original Model: ", mean([raw_score_1, raw_score_2, raw_score_3]))
print("Average Bleu Score for Tuned Model V0: ", mean([new_score_1, new_score_2, new_score_3]))
print("Average Bleu Score for Tuned Model V1: ", mean([new_score_1_v1, new_score_2_v1, new_score_3_v1]))
print("Average Bleu Score for Tuned Model V2: ", mean([new_score_1_v2, new_score_2_v2, new_score_3_v2]))
print("Average Bleu Score for Tuned Model V3: ", mean([new_score_1_v3, new_score_2_v3, new_score_3_v3]))
print("Average Bleu Score for Tuned Model V4: ", mean([new_score_1_v4, new_score_2_v4, new_score_3_v4]))

Average Bleu Score for Original Model:  0.03494675809476397
Average Bleu Score for Tuned Model V0:  0.04162247902035753
Average Bleu Score for Tuned Model V1:  0.04872455357768015
Average Bleu Score for Tuned Model V2:  0.010848170824451726
Average Bleu Score for Tuned Model V3:  0.011308626546764424
Average Bleu Score for Tuned Model V4:  0.03015351923845478
