## 0. Installation and Setup

In [None]:
# hide output
%%capture output

! pip install pdfplumber
! pip install sentence-transformers
! pip install langchain
! pip install faiss-gpu
! pip install pypdf

## 1. Load Data
In Langchiain, we use document_loaders to load our data. We can simply import langchain.document_loaders and specify the data type.
1. folder: DirectoryLoader
2. Azure: AzureBlobStorageContainerLoader
3. CSV file: CSVLoader
4. Google Drive: GoogleDriveLoader
5. Website: UnstructuredHTMLLoader
6. PDF: PyPDFLoader
7. Youtube: YoutubeLoader

For more data loader refer to the following link:
https://python.langchain.com/docs/modules/data_connection/document_loaders.html

In [None]:
import os
from google.colab import drive
# Access drive
drive.mount('/content/drive')
path = '/content/drive/MyDrive/Capstone/'


# companies
companies = os.listdir(os.path.join(path, 'Company Reports'))
for i, comp in enumerate(companies):
    print(i, ": ", comp)


# get reports
def get_reports(comp, year:int, rep_type:int = 1):
    """
    comp:       string or index
    year:       specific year or # recent year, 0 for all
    rep_type:   report type, 1 for annual report, 2 for sustainability report, 0 for both
    ret:        list of report pathes
    """
    if type(comp) == str:
        if comp not in companies:
            print("Error: ", comp, " does not exist")
            return
    elif type(comp) == int:
        if comp not in range(len(companies)):
            print("Error: invalid index")
            return
        comp = companies[comp]
    else:
        print("Error: invalid company")
        return

    file_path = os.path.join(path, 'Company Reports', comp)
    files = os.listdir(file_path)
    files.sort(reverse=True)

    years = range(2013,2023)
    if year in range(11):
        if year:
            years = years[-year:]
    else:
        years = [year]

    if rep_type == 0:
        reps = ["", "_sus"]
    elif rep_type == 1:
        reps = [""]
    elif rep_type == 2:
        reps = ["_sus"]
    else:
        print("Error: invalid report type")
        return

    ret = []
    for year in years:
        for rep in reps:
            file = comp + '_' + str(year) + rep + '.pdf'
            if file in files:
                ret.append(file)
    return [os.path.join(file_path, file) for file in ret]

Mounted at /content/drive
0 :  ExxonMobil
1 :  Shell plc
2 :  BP PLC
3 :  Saudi Aramco
4 :  Chevron
5 :  TotalEnergies
6 :  Valero Energy
7 :  Marathon Petroleum Corporation
8 :  Sinopec
9 :  PetroChina


In [None]:
files = get_reports(0, 2022)
file = files[0]
file

'/content/drive/MyDrive/Capstone/Company Reports/ExxonMobil/ExxonMobil_2022.pdf'

## 2. Split the data
Once we loaded documents, we need to transform them to better suit our application. The simplest example is to split a long document into smaller chunks that can fit into our model's context window. The most common Splitter in LangChain includes:

1. RecursiveCharacterTextSplitter()
2. CharacterTextSplitter()

The paramether of above functions:
 - length_function: how the length of chunks is calculated. Defaults to just counting number of characters, but it's pretty common to pass a token counter here.
 - chunk_size: the maximum size of your chunks (as measured by the length function).
 - chunk_overlap: the maximum overlap between chunks. It can be nice to have some overlap to maintain some continuity between chunks (e.g. do a sliding window).
 - add_start_index: whether to include the starting position of each chunk within the original document in the metadata.

In [None]:
# take pdf as a exapmle. This is helpful if we directly download the documents from company website.
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter


loader = PyPDFLoader(file)
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 400, chunk_overlap = 100)
data = loader.load_and_split(text_splitter)


# We can also use github (Website type) to store our original data.

# from langchain.document_loaders import WebBaseLoader

# loader = WebBaseLoader("https://drive.google.com/file/d/1EA8Iifu4kSIfziXAYz33P7Zon_u_beWb/view?usp=drive_link")
# data = loader.load()

## 3. Vectorstores
We are using FAISS

In [None]:
def load(file_path):
    vs_path_faiss = os.path.join(file_path[:-4], 'faiss')
    if os.path.exists(vs_path_faiss):
        return FAISS.load_local(vs_path_faiss, embeddings)
    else:
        return FAISS.from_documents(data, embeddings)

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

embeddings = HuggingFaceEmbeddings()

vs_faiss = load(file)

Downloading (…)a8e1d/.gitattributes:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

Downloading (…)_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading (…)b20bca8e1d/README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

Downloading (…)0bca8e1d/config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

Downloading (…)ce_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading (…)e1d/data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

Downloading (…)nce_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Downloading (…)a8e1d/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

Downloading (…)8e1d/train_script.py:   0%|          | 0.00/13.1k [00:00<?, ?B/s]

Downloading (…)b20bca8e1d/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)bca8e1d/modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

## 4. Model
We are using Mistral-7b

In [None]:
from langchain.llms import HuggingFacePipeline
from transformers import AutoTokenizer, pipeline, AutoModelForSeq2SeqLM, AutoModelForCausalLM

model_id_mistral = "mistralai/Mistral-7B-Instruct-v0.1"
tokenizer_mistral = AutoTokenizer.from_pretrained(model_id_mistral)
model_mistral = AutoModelForCausalLM.from_pretrained(model_id_mistral)

pipe_mistral = pipeline(
    "text-generation",
    model = model_mistral,
    tokenizer = tokenizer_mistral,
    max_length = 1500,
    pad_token_id = model_mistral.config.eos_token_id
)

llm_mistral = HuggingFacePipeline(pipeline = pipe_mistral)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

## 6. Generate Answer
The key function of this part is RetrievalQA(). We need to feed our model, retriever and prompt into the function to create Q&A object.

For details on RetrievalQA, refers to
https://api.python.langchain.com/en/latest/chains/langchain.chains.retrieval_qa.base.RetrievalQA.html

In [None]:


# wrapper function
from langchain.chains.question_answering import load_qa_chain
import time
def get_answer(q, vs, r, llm):
    s1 = time.time()
    if vs:
        doc = vs.similarity_search(q)
    else:
        doc = r.get_relevant_documents(q)
    t1 = time.time() - s1

    s2 = time.time()
    chain = load_qa_chain(llm, chain_type="stuff")
    res = chain({"input_documents": doc, "question": question}, return_only_outputs=True)
    t2 = time.time() - s2
    return res['output_text'], round(t1, 2), round(t2, 2)


def show_results(q):
    print("-" * 100)
    print("| FAISS + Mistral |")
    print("-" * 100)
    print("    - Q:", q)
    res, t1, t2 = get_answer(q, vs_faiss, None, llm_mistral)
    print("    - A:", res)
    print("         retriver time: ", t1, 's')
    print("         model time:    ", t2, 's')
    print("-" * 100)

## 7. Testing

In [None]:
def print_doc(doc):
    for i, d in enumerate(doc):
        print('-'*100)
        print('|', str(i+1)+'. Page', d.metadata['page'], '|')
        print('-'*14)
        print(d.page_content)
    print('-'*100)

In [None]:
query = 'What is ExxonMobil’s expected worldwide environmental expenditures in 2023?'
print_doc(vs_faiss.similarity_search(query))

----------------------------------------------------------------------------------------------------
| 1. Page 13 |
--------------
include a significant investment in refining infrastructure and technology to manufacture clean fuels, as well as projects to monitor and 
reduce air, water, and waste emissions, and expenditures for asset retirement obligations. Using definitions and guidelines established 
by the American Petroleum Institute, ExxonMobil' s 2022 worldwide environmental expenditures for all such preventative and
----------------------------------------------------------------------------------------------------
| 2. Page 11 |
--------------
* Not included with the 2022 Annual Report to Shareholders but available on the Investor section of our website at www.exxonmobil.com*
**
----------------------------------------------------------------------------------------------------
| 3. Page 9 |
--------------
performance in 2022.Corporate plan through 2027
VIII EXXON MOBIL CORPOR

In [None]:
from langchain.chains import ConversationalRetrievalChain, LLMChain
from langchain.chains.conversation.memory import ConversationBufferWindowMemory
from langchain.prompts import PromptTemplate

DEFAULT_SYSTEM_PROMPT = """
You are a helpful, respectful, professional financial assistant.
Always answer as helpfully as possible, while being safe.
Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content.
 Please ensure that your responses are socially unbiased and positive in nature.
""".strip()

def generate_prompt(prompt: str, system_prompt: str = DEFAULT_SYSTEM_PROMPT) -> str:
    return f"""
[INST] <>
{system_prompt}
<>

{prompt} [/INST]
""".strip()

template = generate_prompt (
   """You are answering the question based on the information in {doc} and your financial sense. If the
question cannot be answered, answer
with "I don't know". Do not make up your answer if you cannot answer the question.
 If you can use financial knowledge in the analysis, it helps a lot.
    Question: {query}
 Answer: """,
    system_prompt=DEFAULT_SYSTEM_PROMPT
)
prompt =  PromptTemplate(
    input_variables=["doc", "query"],
    template=template
)

chain = LLMChain(
    llm=llm_mistral, prompt=prompt)



#qa = load_qa_chain(llm, chain_type="stuff")(llm = llm_mistral,  retriever = vs_faiss.as_retriever(), return_source_documents=True)
query ='What is apple?'
doc = vs_faiss.similarity_search(query)
print(doc)
result = chain({'query': query, 'doc':doc}, return_only_outputs=True)
print(result)

[Document(page_content='Senior Strategist at X\n(formerly Google X)\n(technology)\nDirector since 2021\nJeffrey W. Ubben\nFounder, Portfolio Manager,\nand Managing Partner,\nInclusive Capital Partners, L.P.\n(ﬁnancial services)\nDirector since 2021\nDarren W. Woods\nChairman of the Board and\nChief Executive Of ﬁcer\nDirector since 2016\nAudit Committee\nU.M. Burns (Chair), M.J. Angelakis, G.J. Goff, J.D. Harris II, K.H. Hietala', metadata={'source': '/content/drive/MyDrive/Capstone/Company Reports/ExxonMobil/ExxonMobil_2022.pdf', 'page': 149}), Document(page_content="The aggregate market value of the voting stock held by non-affiliates of the registrant on June 30, 2022, the last business day of the registrant's most recently completed \nsecond fiscal quarter, based on the closing price on that date of$85.64 on the New York Stock Exchange composite tape, was in excess of$356 billion. \nClass Outstanding as of January 31, 2023", metadata={'source': '/content/drive/MyDrive/Capstone/Comp

In [None]:
query ='What is ExxonMobil’s expected worldwide environmental expenditures in 2023?'
doc = vs_faiss.similarity_search(query)
print(doc)
result = chain({'query': query, 'doc':doc}, return_only_outputs=True)
print(result)

[Document(page_content="include a significant investment in refining infrastructure and technology to manufacture clean fuels, as well as projects to monitor and \nreduce air, water, and waste emissions, and expenditures for asset retirement obligations. Using definitions and guidelines established \nby the American Petroleum Institute, ExxonMobil' s 2022 worldwide environmental expenditures for all such preventative and", metadata={'source': '/content/drive/MyDrive/Capstone/Company Reports/ExxonMobil/ExxonMobil_2022.pdf', 'page': 13}), Document(page_content='* Not included with the 2022 Annual Report to Shareholders but available on the Investor section of our website at www.exxonmobil.com*\n**', metadata={'source': '/content/drive/MyDrive/Capstone/Company Reports/ExxonMobil/ExxonMobil_2022.pdf', 'page': 11}), Document(page_content='performance in 2022.Corporate plan through 2027\nVIII EXXON MOBIL CORPORATION  |  2022 ANNUAL REPORT\nOur winning proposition\nUpstream Low Carbon Solutio

In [None]:
question = 'How did the company perform in reducing greenhouse gas emissions?'
print_doc(vs_faiss.similarity_search(question))

----------------------------------------------------------------------------------------------------
| 1. Page 148 |
--------------
50-percent reduction in upstream greenhouse gas intensity, 70- to 80-percent reduction in corporate-wide methane intensity,and 60- to 70-percent reduction in corporate-wide hydrocarbon ﬂaring intensity. Plans cover Scope 1 and Scope 2 emissions
for assets operated by the company, versus 2016 levels. ExxonMobil’s 2030 GHG emission reduction plans, https://corporate.
----------------------------------------------------------------------------------------------------
| 2. Page 147 |
--------------
Applies to Scope 1 and 2 greenhouse gas emissions from operated assets. Emission metrics are based on assets operated byExxonMobil using performance and plan data for full-year 2022 available as of March 1, 2023. The greenhouse gas intensitymetric includes Scope 2 market-based emissions. ExxonMobil reported emissions, reductions, and avoidance performancedata are ba

In [None]:
show_results(question)

----------------------------------------------------------------------------------------------------
| FAISS + Mistral |
----------------------------------------------------------------------------------------------------
    - Q: How did the company perform in reducing greenhouse gas emissions?
    - A:  ExxonMobil reported a 50-percent reduction in upstream greenhouse gas intensity, 70- to 80-percent reduction in corporate-wide methane intensity, and 60- to 70-percent reduction in corporate-wide hydrocarbon flaring intensity. These reductions apply to Scope 1 and 2 greenhouse gas emissions from operated assets. The company also reported emissions, reductions, and avoidance performance data based on a combination of continued to focus on ways to meaningfully reduce our own and others’ greenhouse gas emissions.
         retriver time:  0.02 s
         model time:     129.14 s
----------------------------------------------------------------------------------------------------


In [None]:
question = 'What is the company’s ROCE?'
print_doc(vs_faiss.similarity_search(question))

----------------------------------------------------------------------------------------------------
| 1. Page 145 |
--------------
ADDITIONAL INFORMATION
Stock Performance Graphs 135
Deﬁnitions 136
Footnotes 136
Board of Directors 138
134
----------------------------------------------------------------------------------------------------
| 2. Page 12 |
--------------
of competition which are lawful and appropriate for such purposes. 
Operating data and industry segment information for the Corporation are contained in the Financial Section of this report under the 
following: "Management's Discussion and Analysis of Financial Condition and Results of Operations: Business Results" and "Note
----------------------------------------------------------------------------------------------------
| 3. Page 1 |
--------------
largest inte grated and most technolo gically advanced refinin g and petrochemical complexes .Financial and operating performance significantly led peers1
Continuing to be

In [None]:
show_results(question)

----------------------------------------------------------------------------------------------------
| FAISS + Mistral |
----------------------------------------------------------------------------------------------------
    - Q: What is the company’s ROCE?
    - A:  The company's ROCE is $23B.
         retriver time:  0.02 s
         model time:     37.08 s
----------------------------------------------------------------------------------------------------


In [None]:
question = 'How does the project execution outperform industry average?'
print_doc(vs_faiss.similarity_search(question))

----------------------------------------------------------------------------------------------------
| 1. Page 147 |
--------------
number of factors, including availability of supportive policy, technology for cost-effective abatement, and alignment withour partners and other stakeholders. The company may refer to these opportunities as projects in external disclosures at vari-ous stages throughout their progression.
----------------------------------------------------------------------------------------------------
| 2. Page 147 |
--------------
Performance product (performance chemicals) • Refers to Chemical products that provide differentiated performance for
multiple applications through enhanced properties versus commodity alternatives and bring signi ﬁcant additional value to
customers and end users.
Project • The term “project” as used in this presentation can refer to a variety of different activities and does not necessarily
---------------------------------------------------

In [None]:
show_results(question)

----------------------------------------------------------------------------------------------------
| FAISS + Mistral |
----------------------------------------------------------------------------------------------------
    - Q: How does the project execution outperform industry average?
    - A:  The project execution outperforms industry average due to the company's unique competitive advantages, which have been built over decades, bringing their shareholders exceptional results through the right strategic priorities and extraordinary execution by their employees around the world. The company's five-year plan is expected to drive leading business outcomes and is a continuation of the path that delivered industry-leading performance in 2022. Additionally, ExxonMobil works with industry, including API and Ipieca, to improve emission factors and methodologies, including measurements and estimates.
         retriver time:  0.02 s
         model time:     105.54 s
----------------------

In [None]:
question = 'What percentage is ExxonMobil’s operating cost lower than industry average?'
print_doc(vs_faiss.similarity_search(question))

----------------------------------------------------------------------------------------------------
| 1. Page 147 |
--------------
3. For de ﬁnitions and more information on return on average capital employed, see Page 35 of ExxonMobil’s 2022 Form 10-K
which forms part of this Report.
4. See Stock Performance Graphs on Page 135. Source: S&P Global Platts.5. Based on full-year 2022 ExxonMobil workforce (includes employees and contractors) Lost Time Incident Rate data as of
----------------------------------------------------------------------------------------------------
| 2. Page 11 |
--------------
* Not included with the 2022 Annual Report to Shareholders but available on the Investor section of our website at www.exxonmobil.com*
**
----------------------------------------------------------------------------------------------------
| 3. Page 148 |
--------------
number of factors, including supportive policy, technology, and market conditions.
16. Source: ExxonMobil analysis of EPA

In [None]:
show_results(question)

----------------------------------------------------------------------------------------------------
| FAISS + Mistral |
----------------------------------------------------------------------------------------------------
    - Q: What percentage is ExxonMobil’s operating cost lower than industry average?
    - A:  ExxonMobil’s operating cost is lower than industry average.

ExxonMobil’s operating cost is lower than industry average.

ExxonMobil’s operating cost is lower than industry average.

ExxonMobil’s operating cost is lower than industry average.

ExxonMobil’s operating cost is lower than industry average.

ExxonMobil’s operating cost is lower than industry average.

ExxonMobil’s operating cost is lower than industry average.

ExxonMobil’s operating cost is lower than industry average.

ExxonMobil’s operating cost is lower than industry average.

ExxonMobil’s operating cost is lower than industry average.

ExxonMobil’s operating cost is lower than industry average.

ExxonMobil’s

In [None]:
question = 'What is ExxonMobil’s five-year cumulative total shareholder returns in 2021?'
print_doc(vs_faiss.similarity_search(question))

----------------------------------------------------------------------------------------------------
| 1. Page 146 |
--------------
Fiscal years ended December 31
TEN-YEAR CUMULATIVE TOTAL SHAREHOLDER RETURNS4
$400
300200
1000(value of $100 invested at year-end 2012)
ExxonMobil
Industry GroupS&P 500
2012
ExxonMobil 100 113 118 97 66 195
S&P 500 100 151 171 199 310 327
Industry Group 100 108 116 129 98 192
Fiscal years ended December 312014
120
132
1182013
99
153
892015
114
208
1372017
104
262
1412019
104
399
----------------------------------------------------------------------------------------------------
| 2. Page 11 |
--------------
* Not included with the 2022 Annual Report to Shareholders but available on the Investor section of our website at www.exxonmobil.com*
**
----------------------------------------------------------------------------------------------------
| 3. Page 146 |
--------------
STOCK PERFORMANCE GRAPHS (unaudited)
The annual total shareholder return (TSR) to Exx

In [None]:
show_results(question)

----------------------------------------------------------------------------------------------------
| FAISS + Mistral |
----------------------------------------------------------------------------------------------------
    - Q: What is ExxonMobil’s five-year cumulative total shareholder returns in 2021?
    - A:  The information provided does not include the five-year cumulative total shareholder returns for 2021.
         retriver time:  0.02 s
         model time:     56.32 s
----------------------------------------------------------------------------------------------------


In [None]:
question = 'What is ExxonMobil’s expected worldwide environmental expenditures in 2023?'
print_doc(vs_faiss.similarity_search(question))

----------------------------------------------------------------------------------------------------
| 1. Page 13 |
--------------
include a significant investment in refining infrastructure and technology to manufacture clean fuels, as well as projects to monitor and 
reduce air, water, and waste emissions, and expenditures for asset retirement obligations. Using definitions and guidelines established 
by the American Petroleum Institute, ExxonMobil' s 2022 worldwide environmental expenditures for all such preventative and
----------------------------------------------------------------------------------------------------
| 2. Page 9 |
--------------
VIII EXXON MOBIL CORPORATION  |  2022 ANNUAL REPORT
Our winning proposition
Upstream Low Carbon Solutions Product Solutions
~500K
40-50 %oil-equivalent barrels o f expected 
growth by 2027 versus 202 3
reduction in U pstream
greenhouse gas intensity
by 203 0182X
1Bvolume o f high-value products
with di fferentiated per formance
by 2027 ve

In [None]:
show_results(question)

----------------------------------------------------------------------------------------------------
| FAISS + Mistral |
----------------------------------------------------------------------------------------------------
    - Q: What is ExxonMobil’s expected worldwide environmental expenditures in 2023?
    - A:  ExxonMobil’s expected worldwide environmental expenditures in 2023 are approximately $7.3 billion.
         retriver time:  0.02 s
         model time:     48.9 s
----------------------------------------------------------------------------------------------------


In [None]:
question = 'What is ExxonMobil’s worldwide environmental expenditures in 2022?'
print_doc(vs_faiss.similarity_search(question))

----------------------------------------------------------------------------------------------------
| 1. Page 13 |
--------------
include a significant investment in refining infrastructure and technology to manufacture clean fuels, as well as projects to monitor and 
reduce air, water, and waste emissions, and expenditures for asset retirement obligations. Using definitions and guidelines established 
by the American Petroleum Institute, ExxonMobil' s 2022 worldwide environmental expenditures for all such preventative and
----------------------------------------------------------------------------------------------------
| 2. Page 11 |
--------------
* Not included with the 2022 Annual Report to Shareholders but available on the Investor section of our website at www.exxonmobil.com*
**
----------------------------------------------------------------------------------------------------
| 3. Page 9 |
--------------
VIII EXXON MOBIL CORPORATION  |  2022 ANNUAL REPORT
Our winning proposi

In [None]:
show_results(question)

----------------------------------------------------------------------------------------------------
| FAISS + Mistral |
----------------------------------------------------------------------------------------------------
    - Q: What is ExxonMobil’s worldwide environmental expenditures in 2022?
    - A:  ExxonMobil’s worldwide environmental expenditures in 2022 were not included with the 2022 Annual Report to Shareholders but are available on the Investor section of their website at www.exxonmobil.com.
         retriver time:  0.02 s
         model time:     67.79 s
----------------------------------------------------------------------------------------------------


Adjust `max_length`

In [None]:
pipe_mistral = pipeline(
    "text-generation",
    model = model_mistral,
    tokenizer = tokenizer_mistral,
    max_length = 1500,
    pad_token_id = model_mistral.config.eos_token_id
)

llm_mistral = HuggingFacePipeline(pipeline = pipe_mistral)

In [None]:
question = 'What is ExxonMobil’s expected worldwide environmental expenditures in 2023?'
print_doc(vs_faiss.similarity_search(question))

----------------------------------------------------------------------------------------------------
| 1. Page 13 |
--------------
include a significant investment in refining infrastructure and technology to manufacture clean fuels, as well as projects to monitor and 
reduce air, water, and waste emissions, and expenditures for asset retirement obligations. Using definitions and guidelines established 
by the American Petroleum Institute, ExxonMobil' s 2022 worldwide environmental expenditures for all such preventative and
----------------------------------------------------------------------------------------------------
| 2. Page 9 |
--------------
VIII EXXON MOBIL CORPORATION  |  2022 ANNUAL REPORT
Our winning proposition
Upstream Low Carbon Solutions Product Solutions
~500K
40-50 %oil-equivalent barrels o f expected 
growth by 2027 versus 202 3
reduction in U pstream
greenhouse gas intensity
by 203 0182X
1Bvolume o f high-value products
with di fferentiated per formance
by 2027 ve

In [None]:
show_results(question)

----------------------------------------------------------------------------------------------------
| FAISS + Mistral |
----------------------------------------------------------------------------------------------------
    - Q: What is ExxonMobil’s expected worldwide environmental expenditures in 2023?


ValueError: ignored

## 8. Conversation


In [None]:
from langchain.chains import ConversationalRetrievalChain

# svm_retriever = SVMRetriever.from_documents(all_splits, embeddings)
qa = .from_llm(llm, vectorstore.as_retriever())

chat_history = []
while True:
  question = input('Send a question:')
  # Use chat_history to store history data
  result = qa({'question': question, 'chat_history': chat_history})
  chat_history.append((question, result['answer']))
  print(result['answer'])

Send a question:What's the upstream earnings after income tax in 2017?
14,079
Send a question:What's the upstream earnings after income tax next year?
14,079
Send a question:What's the upstream earnings before income tax in 2017?
14,079
Send a question:What's the liquid production in 2017?
4.7 million barrels per day
Send a question:Does it perform well?
Technology has allowed us to effectively respond to a dynami c and challenging underpins our strong market position in high-performance products
Send a question:Who is the CEO?
Douglas R. Oberhelman
Send a question:Wrong answer, find another one
(iii).


KeyboardInterrupt: ignored

## 8. Old code

In [None]:
from langchain.chains.llm import LLMChain
from langchain.prompts import PromptTemplate
from langchain.llms import HuggingFacePipeline
from transformers import AutoTokenizer, pipeline, AutoModelForSeq2SeqLM
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

model_id = 'google/flan-t5-large'
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForSeq2SeqLM.from_pretrained(model_id)

pipe = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=100
)

llm = HuggingFacePipeline(pipeline=pipe)


# Create Q&A object
template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer as concise as possible.
Always say "thanks for asking!" at the end of the answer.
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)



qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever = svm_retriever,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

# Feed our question and get the answer.
result = qa_chain({"query": question})
result["result"]

'14,079 13,355 196 7,101 27,548'

In [None]:
from langchain.chains.llm import LLMChain
from langchain.prompts import PromptTemplate
from langchain.llms import HuggingFacePipeline
from transformers import AutoTokenizer, pipeline, AutoModelForSeq2SeqLM
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

model_id = 'google/flan-t5-large'
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForSeq2SeqLM.from_pretrained(model_id)

pipe = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=100
)

llm = HuggingFacePipeline(pipeline=pipe)


# Create Q&A object
template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer as concise as possible.
Always say "thanks for asking!" at the end of the answer.
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)



qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectorstore.as_retriever(),
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

# Feed our question and get the answer.
result = qa_chain({"query": question})
result["result"]

'14,079 13,355 196 7,101 27,548'

Reference:
https://python.langchain.com/docs/use_cases/question_answering/#step-4-retrieve