# 1. Import libraries

In [29]:

from llama_parse import LlamaParse
from llama_index.core import Settings
from llama_index.core import StorageContext
from llama_index.core import VectorStoreIndex
from llama_index.core.node_parser import MarkdownElementNodeParser
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core import SimpleDirectoryReader
from llama_index.vector_stores.pinecone import PineconeVectorStore
from IPython.display import Markdown, display
from llama_index.postprocessor.cohere_rerank import CohereRerank
from pinecone import Pinecone, ServerlessSpec


# llama-parse is async-first, this is needed in order to run async code in notebooks
import nest_asyncio
nest_asyncio.apply()

import os
from dotenv import load_dotenv
load_dotenv()

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# 2. Pinecone Setup todo

In [30]:
pc = Pinecone()


In [None]:
# Dimensions are for text-embedding-ada-002
try:
    pc.create_index(
        "quickstart-index",
        dimension=1536,
        metric="euclidean",
        spec=ServerlessSpec(cloud="aws", region="us-west-2"),
    )
except Exception as e:
    # Most likely index already exists
    print(e)
    pass

In [34]:
pinecone_index = pc.Index("nuclear-rag")#or quickstart-index"
vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
storage_context = StorageContext.from_defaults(vector_store=vector_store)



# 3. Setup LLM and Embedding models 

In [16]:
EMBEDDING_MODEL  = "text-embedding-3-large"
GENERATION_MODEL = "gpt-4-turbo-2024-04-09" 

llm = OpenAI(model=GENERATION_MODEL)
embed_model = OpenAIEmbedding(model=EMBEDDING_MODEL)

Settings.llm = llm
Settings.embed_model = embed_model

# 4. Load and Parse Data via LLamaParse 

https://github.com/run-llama/llama_parse

In [4]:
pdf_files_location= './data'
pdf_paths = []
for file in os.listdir(pdf_files_location):
    pdf_paths.append(os.path.join(pdf_files_location, file))


In [5]:
pdf_paths

['./data/180340.pdf']

In [6]:
parsing_instructions = '''The documents you are parsing today are about the Mechanisms Engineering Test Loop (METL) facilities. The documents include details of METL facilities and critical components that serve as a means to explore and engineer thermal hydraulic sodium cooled loop tests for advanced nuclear reactors. The authors explain in detail how METL components work, and demonstrate rules and handlings that must be followed during the experiments. Technical reports illustrate information about designing, performing, testing, and inspecting experiments. In addition, instrumentation and control capabilities are provided in detail.'''


In [7]:
parsing_instructions

'The documents you are parsing today are about the Mechanisms Engineering Test Loop (METL) facilities. The documents include details of METL facilities and critical components that serve as a means to explore and engineer thermal hydraulic sodium cooled loop tests for advanced nuclear reactors. The authors explain in detail how METL components work, and demonstrate rules and handlings that must be followed during the experiments. Technical reports illustrate information about designing, performing, testing, and inspecting experiments. In addition, instrumentation and control capabilities are provided in detail.'

In [8]:
documents = LlamaParse(result_type="markdown", parsing_instructions=parsing_instructions, num_worker=-1,verbose=True,language="en")\
    .load_data(pdf_paths)


Parsing files: 100%|██████████| 1/1 [00:04<00:00,  4.52s/it]


In [9]:
documents[0]



In [10]:
documents[0].text[:512]

'# Argonne National Laboratory\n\nANL-ART-263\n\nNATIONAL LABORATORY ANL-METL-44\n\n# Mechanisms Engineering Test Loop (METL) Experimenter’s Guide\n\nNuclear Science and Engineering Division\n---\nAbout Argonne National Laboratory\nArgonne is a U.S. Department of Energy laboratory managed by UChicago Argonne, LLC\nunder contract DE-AC02-06CH11357. The Laboratory’s main facility is outside Chicago, at\n9700 South Cass Avenue, Lemont, Illinois 60439. For information about Argonne\nand its pioneering science and technology p'

In [11]:
dict(documents[0]).keys()

dict_keys(['id_', 'embedding', 'metadata', 'excluded_embed_metadata_keys', 'excluded_llm_metadata_keys', 'relationships', 'text', 'start_char_idx', 'end_char_idx', 'text_template', 'metadata_template', 'metadata_seperator'])

# 5. Extract text and table nodes

In [17]:
node_parser = MarkdownElementNodeParser(llm=llm, num_workers=-1).from_defaults()


In [18]:
# Retrieve base nodes (text) and objects (table)
nodes = node_parser.get_nodes_from_documents(documents)
base_nodes, objects = node_parser.get_nodes_and_objects(nodes)


15it [00:00, 136178.70it/s]
100%|██████████| 15/15 [00:17<00:00,  1.17s/it]


In [21]:
len(base_nodes), len(objects)

(19, 15)

In [22]:
base_nodes[0].text[:512]


'Argonne National Laboratory\n\nANL-ART-263\n\nNATIONAL LABORATORY ANL-METL-44\n\n Mechanisms Engineering Test Loop (METL) Experimenter’s Guide\n\nNuclear Science and Engineering Division\n---\nAbout Argonne National Laboratory\nArgonne is a U.S. Department of Energy laboratory managed by UChicago Argonne, LLC\nunder contract DE-AC02-06CH11357. The Laboratory’s main facility is outside Chicago, at\n9700 South Cass Avenue, Lemont, Illinois 60439. For information about Argonne\nand its pioneering science and technology prog'

In [24]:
objects[0].obj.text[:512]


'The table lists various sections of a document, each detailing different aspects of laboratory policies and design considerations.,\nwith the following columns:\n- 3.1: None\n- Required Laboratory Policies: None\n- 9: None\n\n|3.1|Required Laboratory Policies|9|\n|---|---|---|\n|3.2|Test Vessel Design Constraints|9|\n|3.3|General Design Considerations|13|\n'

In [27]:
objects[7].obj.text

'The table lists various components and items related to a piping and instrumentation diagram (P&ID) for a project issued for construction in 2015. It includes components like pipes, pumps, and other related items.,\nwith the following table title:\nFigure 1 - The METL piping & instrumentation diagram (P&ID).,\nwith the following columns:\n- U: None\n- 634: None\n- Ueter: None\n- 8 5: None\n\n|U|634|Ueter|8 5|\n|---|---|---|---|\n|Rnkaon| | | |\n|Veagel Vent 3/4" Plpe| | | |\n|Raturr 11/7 3/4" Ripa| | | |\n|slaply| | | |\n|Pump| | | |\n|Vozuut 1-1/2 73187| | | |\n|ISSUED FOR2015|Supol| | |\n|CONSTRUCTION| | | |\n|FAIDD| | | |\n|Figure 1 - The METL piping & instrumentation diagram (P&ID).| | | |\n'

# 6. Upload data to pinecone

In [37]:
recursive_index = VectorStoreIndex(
    base_nodes + objects, storage_context=storage_context
)


Upserted vectors:   0%|          | 0/34 [00:00<?, ?it/s]

# 7. Test query

In [41]:
query_engine = recursive_index.as_query_engine()
response = query_engine.query("What did the author do growing up?")

In [42]:
display(Markdown(f"<b>{response}</b>"))


<b>The provided text does not contain any information about the author's childhood or activities they engaged in while growing up.</b>

In [45]:
response = query_engine.query("What are the basic standard procedures for the METL facilities?")

In [46]:
display(Markdown(f"<b>{response}</b>"))


<b>The basic standard procedures for the METL facilities include:

1. **Project Kickoff and Test Article Design**: This involves introductions, a tour of the METL facility, discussions on experimental goals, feasibility, budget, schedule, and impacts on other experiments. It also includes discussions on safety and lab-wide policies.

2. **Data Package Preparation / Documentation Requirements**: Before fabrication begins, necessary calculations and documentation such as literature reviews, hand calculations, simulations, and professional engineer sign-offs are required.

3. **Fabrication, Inspection, and Acceptance**: This stage involves determining the quality assurance deliverables needed prior to acceptance by ANL and the METL team, which may include material certifications, weld certifications, and various tests and verifications.

4. **Design Reviews**: Conducted at various stages (conceptual, preliminary, and final) to ensure the test article meets the METL requirements and desired functions.

5. **Assembly and Testing**: Components are assembled and tested in a non-sodium environment to verify correct function before qualifying for insertion into METL.

6. **Qualification for Insertion into METL**: Includes reviewing the design package, verifying work planning and control paperwork, confirming interface with the crane and flexi-cask system, and finding a location for the control system on the METL platform.

7. **Insertion into METL Test Vessel**: Involves draining and cooling the vessel, removing the vessel flange, installing the flexi-cask, performing leak checks, and installing the test article along with other components. The vessel is then preheated and filled with sodium.

These procedures ensure that all tests and experiments conducted at METL adhere to the required safety and quality standards set by the Department of Energy and Argonne National Laboratory.</b>