# Step 01: Install all necessary packages

In [29]:
# Install necessary Python packages using pip

# Install langchain package for working with language chains
!pip install langchain

# Install openai package for accessing OpenAI's API
!pip install openai

# Install pyPDF2 package for working with PDF files
!pip install pyPDF2

# Install faiss-cpu package for using Faiss library for similarity search
!pip install faiss-cpu

# Install tiktoken package for working with TikTok API
!pip install tiktoken




# Step 02: Import all required libraries

In [31]:
# Importing PdfReader class from PyPDF2 module to read PDF files.
from PyPDF2 import PdfReader

# Importing OpenAIEmbeddings class from langchain.embeddings.openai module
# to access embeddings provided by OpenAI for natural language processing tasks.
from langchain.embeddings.openai import OpenAIEmbeddings

# Importing CharacterTextSplitter class from langchain.text_splitter module
# to split text into individual characters.
from langchain.text_splitter import CharacterTextSplitter

# Importing FAISS class from langchain.vectorstores.faiss module
# to utilize FAISS, a library for efficient similarity search and clustering of dense vectors.
from langchain.vectorstores.faiss import FAISS

# Importing multiple vector store options including ElasticVectorSearch, Pinecone, Weaviate, and FAISS
# from langchain.vectorstores module for storing and querying dense vectors efficiently.
from langchain.vectorstores import ElasticVectorSearch, Pinecone, Weaviate, FAISS

# Importing load_qa_chain function from langchain.chains.question_answering module
# to load a pre-trained question answering model.
from langchain.chains.question_answering import load_qa_chain

# Importing OpenAI module from langchain.llms to access OpenAI's large language models (LLMs).
from langchain.llms import OpenAI



# Step 03: Setup owr environment

In [33]:
# Setting environment variable for OpenAI API key
import os
# Replace 'Your Openai Api key' with your actual API key
os.environ["OPENAI_API_KEY"] = 'Your Openai Api key'

# Step 04: Extracting text from the PDF document using PDF Reader

In [5]:
# Creating a PdfReader object and assigning it to the variable 'reader'.
# The PdfReader object is initialized with the file path to the PDF document to be read.
# In this case, the file path points to a PDF document stored at the specified location.
reader = PdfReader("/content/drive/MyDrive/PDF_Documents/nasa_systems_engineering_handbook_0.pdf")

# Steo 05: Read data from PDF file and put into a varible raw_text

In [34]:
# Initialize an empty string to store the combined text from all pages.
raw_text = ""

# Loop through each page in the reader object and keep track of the page index using 'enumerate'.
for i, page in enumerate(reader.pages):
    # Extract the text content from the current page.
    text = page.extract_text()

    # Check if there is any text content on the page.
    if text:
        # If text exists on the page, concatenate it to the 'raw_text' string.
        raw_text += text

In [36]:
# print raw_text content
raw_text



In [10]:
#  extracts the first 100 characters from the variable 'raw_text'.
raw_text[:100]

'National Aeronautics and  \n Space Administration\nNASA  \nSYSTEMS ENGINEERING  \nHANDBOOK\ndesign\ntest\ni'

# Step 06: Split text into smaller Chunks

In [38]:
# Specifying the length_function to len, which is a built-in Python function that returns the length of an object.
# This function will be used to calculate the length of the text being processed, which is likely important for chunking.
textsplitter = CharacterTextSplitter(
    separator="\n",
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len
)


In [39]:
# Splitting the raw text into smaller text chunks
texts = textsplitter.split_text(raw_text)



In [14]:
# Getting the length of the texts list to see how many text chunks were generated
len_texts = len(texts)

1041

In [15]:
texts[0]

'National Aeronautics and  \n Space Administration\nNASA  \nSYSTEMS ENGINEERING  \nHANDBOOK\ndesign\ntest\nintegrate\nfly\nwww.nasa.govNASA SP-2016-6105 Rev2 supersedes SP-2007-6105 Rev 1 dated December, 2007.\nCover photos:  Top left:  In this photo, engineers led by researcher Greg Gatlin have sprayed fluorescent oil on a 5.8 percent scale \nmodel of a futuristic hybrid wing body during tests in the 14- by 22-Foot Subsonic Wind Tunnel at NASA’s Langley Research Center \nin Hampton, VA. The oil helps researchers “see” the flow patterns when air passes over and around the model. (NASA Langley/\nPreston Martin) Top right:  Water impact test of a test version of the Orion spacecraft took place on August 24, 2016, at NASA \nLangley Research Center (NASA) Bottom left:  two test mirror segments are placed onto the support structure that will hold them. \n(NASA/Chris Gunn) Bottom right: This self-portrait of NASA’s Curiosity Mars rover shows the vehicle at the “Mojave” site, where its'

In [16]:
texts[1]

'(NASA/Chris Gunn) Bottom right: This self-portrait of NASA’s Curiosity Mars rover shows the vehicle at the “Mojave” site, where its \ndrill collected the mission’s second taste of Mount Sharp. (NASA/JPL-Caltech/MSSS)\nComments, questions, and suggestions  regarding this document can be sent to:\nSteven R. Hirshorn\nChief Engineer, Aeronautics Research Mission Directorate (ARMD)\nOffice of the Chief Engineer\nNASA Headquarters, Room 6D37\n300 E St SW\nWashington, DC 20546-0001\n202-358-0775\nsteven.r.hirshorn@nasa.goviii\nNASA SYSTEMS ENGINEERING HANDBOOKTable of Contents\nPreface                                        viii\nAcknowledgments                              ix\n1.0 Introduction  1\n1.1 Purpose    .  .  .  .  .  .  .  .  .  .  .  .  .  .  .  . 1\n1.2 Scope and Depth    .  .  .  .  .  .  .  .  .  .  . 1\n2.0 Fundamentals of Systems Engineering  3\n2.1 The Common Technical Processes  \nand the SE Engine   .  .  .  .  .  .  .  .  .  .  . 5\n2.2 An Overview of the SE Engine by'

In [17]:
texts[2]

'2.0 Fundamentals of Systems Engineering  3\n2.1 The Common Technical Processes  \nand the SE Engine   .  .  .  .  .  .  .  .  .  .  . 5\n2.2 An Overview of the SE Engine by  \nProject Phase    .  .  .  .  .  .  .  .  .  .  .  .  . 8\n2.3 Example of Using the SE Engine   .  .  .  .  10\n2.4 Distinctions between Product  \nVerification and Product Validation   .  .  .  11\n2.5 Cost Effectiveness Considerations   .  .  .  11\n2.6 Human Systems Integration (HSI)  \nin the SE Process    .  .  .  .  .  .  .  .  .  .  . 12\n2.7 Competency Model for  \nSystems Engineers   .  .  .  .  .  .  .  .  .  .  .  13\n3.0 NASA Program/Project Life Cycle  17\n3.1 Program Formulation   .  .  .  .  .  .  .  .  .  . 20\n3.2 Program Implementation   .  .  .  .  .  .  .  . 20\n3.3 Project Pre-Phase A: Concept Studies   . 21\n3.4 Project Phase A: Concept and  \nTechnology Development   .  .  .  .  .  .  .  . 23\n3.5 Project Phase B: Preliminary Design  \nand Technology Completion   .  .  .  .  .  .  25'

# Step 07: Download embeddings from OpenAi

In [40]:
# Creating an instance of the OpenAIEmbeddings class to access OpenAI's embeddings
embeddings = OpenAIEmbeddings()

In [41]:
# Creating a FAISS index for efficient text search
# The 'from_texts' method builds the index from the provided list of text chunks using the embeddings
docsearch = FAISS.from_texts(texts, embeddings)

In [42]:
# Loading a question answering (QA) chain using OpenAI API
# The 'load_qa_chain' function initializes a QA chain with the specified provider (in this case, OpenAI) and type of chain ("stuff")
chain = load_qa_chain(OpenAI(), chain_type="stuff")

# Step 08: Buinding querys to OpenAi response

In [43]:
query = 'Who is the authors of this handbook'
# Querying for documents similar to the input query using the FAISS index
docs = docsearch.similarity_search(query)

# Running the question answering chain on the retrieved documents with the input query
# The chain will attempt to answer the question based on the provided documents
chain.run(input_documents=docs, question=query)

' The handbook has multiple authors, but it was edited by David D. Walden and others in 2015.'

In [26]:
query = 'What this handbook talks about?'
docs = docsearch.similarity_search(query)
chain.run(input_documents=docs, question=query)

' This handbook talks about systems engineering best practices that should be incorporated in the development and implementation of large and small NASA programs/projects. It provides guidance and information on systems engineering that will be useful to the NASA community and is meant to increase awareness and consistency across the Agency and advance the practice of SE.'

In [32]:
query = 'What are some of the best practices cited in the handbook?'
docs = docsearch.similarity_search(query)
chain.run(input_documents=docs, question=query)

' Some of the best practices cited in the handbook include using boxes and figures to define and illustrate concepts, incorporating guidance from NASA practitioners in the field, and following a systematic and disciplined set of processes throughout the life cycle of programs and projects.'