## The first model is a gpt

In [None]:
import os
from dotenv import load_dotenv


load_dotenv()

In [2]:
# Initially using OPEN AI to compare the results we get from llama
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

MODEL = "gpt-3.5-turbo"

In [7]:
from langchain_openai.chat_models import ChatOpenAI
from langchain_openai.embeddings import OpenAIEmbeddings

model = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model=MODEL)
embeddings = OpenAIEmbeddings()


# Notice that this generates an AIMessage we need to use a parser to extract the contents of the message
model.invoke("I have an interview this Monday!")



AIMessage(content="That's great news! Congratulations on securing an interview. Make sure to prepare well by researching the company, practicing common interview questions, and dressing professionally. Good luck!", response_metadata={'token_usage': {'completion_tokens': 33, 'prompt_tokens': 14, 'total_tokens': 47}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_3b956da36b', 'finish_reason': 'stop', 'logprobs': None}, id='run-9bd0fa5f-1dfe-489f-ac4b-b7231ffd2127-0')

### Notice that the 

In [8]:
from langchain_core.output_parsers import StrOutputParser

parser = StrOutputParser()

# Basic pipe does not change the output for llama prompts as it is already in string format
# Still required for openai
chain = model | parser

chain.invoke("I have an interview this Monday!")

"That's great news! Congratulations on securing an interview. Make sure to prepare well by researching the company, practicing your answers to common interview questions, and dressing professionally. Good luck on Monday!"

### For pdf extraction

In [11]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("40-419D.pdf")
pages = loader.load_and_split()
pages

[Document(page_content='pickering test.com\nPXI 64-Channel Semi-Dynamic Digital I/O 40-419\nISSUE 2.3  JAN 2024 y64-Channel I/O (8 Ports of 8 Channels)\n yI/O Direction Selection for Each Port/Channel\n yOpen-Drain Outputs (Low side Driver) for \nEvery Channel\n yUp to 60 V External Voltage Clamping \nCapability for Each Port\n yOver Output Current Limitation Detection for \nEvery Channel\n ySemi-Dynamic 8-bit Pattern Acquistion/\nGeneration with Internal/External Clock\n yVISA, IVI & Kernel Drivers Supplied for \nWindows\n ySupported by PXI or LXI Chassis\n y3 Year Warranty\nDigital I/O modules in general are \nsuitable for operating external devices, \nsuch as power, RF and high voltage \nrelays, solenoids and lamps. They \ncan also be used for interfacing with \nexternal logic such as a programmable \ninstrument with a BCD interface \ndepending upon the module’s driving \ncapabilities.\nThe 40-419 is available with 16, 32, 48 \nor 64 channels arranged in ports of 8.  \nEach port can

In [12]:
from langchain.prompts import PromptTemplate

template = """
Answer the question based on the context below. If you can't answer the question, reply "N/A".

Context: {context}

Question: {question}
"""

prompt = PromptTemplate.from_template(template)
print(prompt.format(context = "Here is some context", question = "Here is a question"))


Answer the question based on the context below. If you can't answer the question, reply "N/A".

Context: Here is some context

Question: Here is a question



In [13]:
chain = prompt | model

In [14]:
chain.input_schema.schema()

{'title': 'PromptInput',
 'type': 'object',
 'properties': {'context': {'title': 'Context', 'type': 'string'},
  'question': {'title': 'Question', 'type': 'string'}}}

In [15]:
chain.invoke(
    {
        "context": "Super Mario Sunshine was released in 2002. Super Mario 64 was released in 1996",
        "question": "What year was Super Mario Sunshine released?"

    }
)

AIMessage(content='Super Mario Sunshine was released in 2002.', response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 63, 'total_tokens': 73}, 'model_name': 'gpt-3.5-turbo', 'system_fingerprint': 'fp_3b956da36b', 'finish_reason': 'stop', 'logprobs': None}, id='run-ccb0a24c-3ee4-48e9-a9c6-563e07b47e49-0')

In [None]:
from langchain_community.vectorstores import DocArrayInMemorySearch

vectorstore = DocArrayInMemorySearch.from_documents(
    pages,
    embedding=embeddings
    
    )

In [9]:
retriever = vectorstore.as_retriever()

In [10]:
from operator import itemgetter

chain = (
    {
        "context": itemgetter("question") | retriever,
        "question": itemgetter("question"),
    }
    | prompt
    | model
    | parser
)

### Initialize dataframe to hold responses

In [12]:
import re
import pandas as pd

df = pd.DataFrame(columns=['Name', 'Max Voltage', 'Max Current', 'Bandwith'])

In [13]:
def extract_number(response):
    match = re.search(r'\d+', response)

    if match:
        number_str = match.group()
        number = int(number_str)
        return number
    else:
        return 0

In [14]:
def extract_name(response):
    match = re.search(r'"([^"]*)"', response)

    if match:
        str_in_quotes = match.group(1)
        return str_in_quotes
    else:
        return response

In [15]:
questions = [
    "Retrieve the product name and nothing more",
    "Retrieve the maximum voltage from the I/O specifications. Please only return a number and nothing else",
    "Retrieve the maximum current from the I/O specifications. Please only return a number and nothing else",
    "Retrieve the bandwith from the I/O specifications. The bandwith associated with kHz.Please only return a number and nothing else",
]

for question in questions:
    print(f"Question: {question}")
    # print(f"Answer: {chain.invoke({'question': question})}")
    response = chain.invoke({'question': question})
    print(f"Answer: {response}")
    print()

    if "product name" in question.lower():
        df.loc[len(df)] = [extract_name(response), None, None, None]  # First question is about the product name
    elif "voltage" in question.lower():
        df.loc[len(df) - 1, ' Max Voltage'] = response  # Voltage question follows the product name question
    elif "current" in question.lower():
        df.loc[len(df) - 1, 'Max Current'] = (extract_number(response)/1000)  # Current question follows the voltage question
    elif "bandwidth" in question.lower():
        df.loc[len(df) - 1, 'Bandwidth'] = response  # Bandwidth question follows the current question

Question: Retrieve the product name and nothing more
Answer: The product name is: 64-Channel Semi-Dynamic Digital I/O, 40-419.

Question: Retrieve the maximum voltage from the I/O specifications. Please only return a number and nothing else
Answer: 60 V

Question: Retrieve the maximum current from the I/O specifications. Please only return a number and nothing else
Answer: 300



TypeError: unsupported operand type(s) for /: 'function' and 'int'

In [None]:
print(df)

                                                name max voltage max current  \
0  The product name is "64-Channel Semi-Dynamic D...        None        None   

  bandwith voltage current  
0     None      60     300  


In [None]:
df

Unnamed: 0,name,max voltage,max current,bandwith


In [None]:
df.to_csv('data.csv', index=False)