In [None]:
!pip install openai

Collecting openai
  Downloading openai-1.37.0-py3-none-any.whl (337 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m337.0/337.0 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
Collecting httpx<1,>=0.23.0 (from openai)
  Downloading httpx-0.27.0-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai)
  Downloading httpcore-1.0.5-py3-none-any.whl (77 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.9/77.9 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai)
  Downloading h11-0.14.0-py3-none-any.whl (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.3/58.3 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: h11, httpcore, httpx, openai
Successfully installed h11-0.14.0 httpcore-1.0.5 ht

In [None]:
from openai import OpenAI
from google.colab import userdata

open_ai_key = userdata.get('open_ai_key')
client = OpenAI(api_key=open_ai_key)


# Prompt engineering - few shot

In [None]:
def chat(message):
    """
    Send a message to the OpenAI GPT-3.5 model and return its response.

    This function interacts with the OpenAI API, specifically using the GPT-3.5-turbo model. It takes a user's message as input, sends it to the model, and returns the model's text-only response. The function ensures the AI's output is concise by providing a system-level instruction.

    Parameters:
    message (str): A string containing the user's message to the AI.

    Returns:
    str: The text response generated by the GPT-3.5 model.
    """

    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        temperature=0.3,
        # response_format={ "type": "json_object" },
        messages=[
            {"role": "system", "content": """Below are examples of text messages and their classifications. After studying these examples, please classify the new text message at the end.

              Example 1:

              Text: "Can you send me the files by tomorrow? It's not urgent, but I'd like to review them."
              Classification: Non-Urgent
              Example 2:

              Text: "Please review the final report ASAP! We need it ready by our meeting in the morning!"
              Classification: Urgent
              Example 3:

              Text: "Let's schedule a meeting for next week to discuss the project. No rush."
              Classification: Non-Urgent
              Example 4:

              Text: "URGENT: The server is down and needs immediate attention!"
              Classification: Urgent
              """},
            {"role": "user", "content": f"Classify the following message: {message}"}
        ]
    )

    text_only = response.choices[0].message.content
    return text_only


In [None]:
chat("Reminder: Tomorrow's team meeting has been postponed. Please update your calendars")

'Classification: Non-Urgent'

### JSON mode and Log Probs

In [None]:
def chat(message):
    """
    Send a message to the OpenAI GPT-3.5 model and return its response.

    This function interacts with the OpenAI API, specifically using the GPT-3.5-turbo model. It takes a user's message as input, sends it to the model, and returns the model's text-only response. The function ensures the AI's output is concise by providing a system-level instruction.

    Parameters:
    message (str): A string containing the user's message to the AI.

    Returns:
    str: The text response generated by the GPT-3.5 model.
    """

    response = client.chat.completions.create(
        model="gpt-4-turbo-preview",
        response_format={ "type": "json_object" },
        logprobs=True,
        messages=[
            {"role": "system", "content": """
            You are a helpful assistant classifations.

            Below are examples of text messages and their classifications. After studying these examples, please classify the new text message at the end.

              Example 1:

              Text: "Can you send me the files by tomorrow? It's not urgent, but I'd like to review them."
              Classification: Non-Urgent
              Example 2:

              Text: "Please review the final report ASAP! We need it ready by our meeting in the morning!"
              Classification: Urgent
              Example 3:

              Text: "Let's schedule a meeting for next week to discuss the project. No rush."
              Classification: Non-Urgent
              Example 4:

              Text: "URGENT: The server is down and needs immediate attention!"
              Classification: Urgent

              """},
            {"role": "user", "content": f"Classify the following message as Non-Urgent or Urgent and return with probability as JSON: {message}"}
        ]
    )
    text_only = response.choices[0].message.content
    return text_only


In [None]:
response = chat("Reminder: Tomorrow's team meeting has been postponed. Please update your calendars")

In [None]:
print(response)

{
  "Classification": "Non-Urgent",
  "Probability": 0.95
}


# Chain of thought reasoning

In [None]:
def chat(message):
    """
    Send a message to the OpenAI GPT model and return its response.

    This function interacts with the OpenAI API, specifically using the GPT model. It takes a user's message as input, sends it to the model, and returns the model's text-only response. The function ensures the AI's output is concise by providing a system-level instruction.

    Parameters:
    message (str): A string containing the user's message to the AI.

    Returns:
    str: The text response generated by the GPT model.
    """

    response = client.chat.completions.create(
        model="gpt-4-turbo-preview",
        messages=[
            {"role": "system", "content": """
            You are a helpful assistant classifations.

            Below are examples of questions and how to calculate the answer

              Example 1: Arithmetic Problem
              Prompt: "If a shirt costs $20 and there is a 10% discount, how much does the shirt cost after the discount?"
              Chain of Thought Answer:
                Calculate the amount of discount: 10% of $20 is $2.
                Subtract the discount from the original price: $20 - $2 = $18.
                The shirt costs $18 after the discount.

              Example 2: Logic Puzzle
              Prompt: "There are four apples and you take away three. How many apples do you have?"
              Chain of Thought Answer:
                You start with four apples.
                You take away three apples.
                After taking three, you now have those three apples.
                You have 3 apples

              """},
            {"role": "user", "content": f"Answer the following question: {message}"}
        ]
    )
    text_only = response.choices[0].message.content
    return text_only


In [None]:
print(chat("If a tree absorbs 48 pounds of CO2 a year, how much CO2 will 10 trees absorb in a year?"))

Chain of Thought Answer:
- Start with the given amount of CO2 absorption per tree: 48 pounds per year per tree.
- Determine the absorption by 10 trees: 10 trees * 48 pounds of CO2 per tree per year = 480 pounds.
- Therefore, 10 trees will absorb 480 pounds of CO2 in a year.


# Langchain & summarizing PDF's

In [None]:
# Get a PDF
!wget https://arxiv.org/pdf/2401.16212.pdf

--2024-07-24 12:34:15--  https://arxiv.org/pdf/2401.16212.pdf
Resolving arxiv.org (arxiv.org)... 151.101.131.42, 151.101.67.42, 151.101.195.42, ...
Connecting to arxiv.org (arxiv.org)|151.101.131.42|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: http://arxiv.org/pdf/2401.16212 [following]
--2024-07-24 12:34:15--  http://arxiv.org/pdf/2401.16212
Connecting to arxiv.org (arxiv.org)|151.101.131.42|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 512852 (501K) [application/pdf]
Saving to: ‘2401.16212.pdf’


2024-07-24 12:34:15 (8.83 MB/s) - ‘2401.16212.pdf’ saved [512852/512852]



In [None]:
!ls

2401.16212.pdf	sample_data


In [None]:
!wget https://www.morganstanley.com/im/publication/insights/articles/article_increasingreturns.pdf

--2024-07-24 12:34:16--  https://www.morganstanley.com/im/publication/insights/articles/article_increasingreturns.pdf
Resolving www.morganstanley.com (www.morganstanley.com)... 23.204.193.233
Connecting to www.morganstanley.com (www.morganstanley.com)|23.204.193.233|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [application/pdf]
Saving to: ‘article_increasingreturns.pdf’

article_increasingr     [ <=>                ] 570.73K  --.-KB/s    in 0.09s   

2024-07-24 12:34:17 (6.08 MB/s) - ‘article_increasingreturns.pdf’ saved [584426]



In [None]:
!pip install -U langchain-community pypdf langchain-openai

Collecting langchain-community
  Downloading langchain_community-0.2.10-py3-none-any.whl (2.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.3/2.3 MB[0m [31m21.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pypdf
  Downloading pypdf-4.3.1-py3-none-any.whl (295 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m295.8/295.8 kB[0m [31m28.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain-openai
  Downloading langchain_openai-0.1.17-py3-none-any.whl (46 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.7/46.7 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl (28 kB)
Collecting langchain<0.3.0,>=0.2.9 (from langchain-community)
  Downloading langchain-0.2.11-py3-none-any.whl (990 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m990.3/990.3 kB[0m [31m56.3 MB/s[0m eta [36m0:00:00[

In [None]:
from langchain.document_loaders import PyPDFLoader # Update import statement

loader = PyPDFLoader("2401.16212.pdf")
pages = loader.load_and_split()

In [None]:
pages[0]

Document(metadata={'source': '2401.16212.pdf', 'page': 0}, page_content='Better Call GPT, Comparing Large Language Models Against Lawyers\nLAUREN MARTIN, NICK WHITEHOUSE, STEPHANIE YIU, LIZZIE CATTERSON, RIVINDU\nPERERA, AI Center of Excellence, Onit Inc., New Zealand\nThis paper presents a groundbreaking comparison between Large Language Models (LLMs) and traditional legal contract review-\ners—Junior Lawyers and Legal Process Outsourcers (LPOs). We dissect whether LLMs can outperform humans in accuracy, speed,\nand cost-efficiency during contract review. Our empirical analysis benchmarks LLMs against a ground truth set by Senior Lawyers,\nuncovering that advanced models match or exceed human accuracy in determining legal issues. In speed, LLMs complete reviews in\nmere seconds, eclipsing the hours required by their human counterparts. Cost-wise, LLMs operate at a fraction of the price, offering a\nstaggering 99.97 percent reduction in cost over traditional methods. These results are 

In [None]:
from langchain.chains.summarize import load_summarize_chain
from langchain_community.document_loaders import WebBaseLoader
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(temperature=0.1, model_name="gpt-4-turbo-preview", api_key=open_ai_key)
chain = load_summarize_chain(llm, chain_type="stuff")

res = chain.invoke(pages[0:3])
res



{'input_documents': [Document(metadata={'source': '2401.16212.pdf', 'page': 0}, page_content='Better Call GPT, Comparing Large Language Models Against Lawyers\nLAUREN MARTIN, NICK WHITEHOUSE, STEPHANIE YIU, LIZZIE CATTERSON, RIVINDU\nPERERA, AI Center of Excellence, Onit Inc., New Zealand\nThis paper presents a groundbreaking comparison between Large Language Models (LLMs) and traditional legal contract review-\ners—Junior Lawyers and Legal Process Outsourcers (LPOs). We dissect whether LLMs can outperform humans in accuracy, speed,\nand cost-efficiency during contract review. Our empirical analysis benchmarks LLMs against a ground truth set by Senior Lawyers,\nuncovering that advanced models match or exceed human accuracy in determining legal issues. In speed, LLMs complete reviews in\nmere seconds, eclipsing the hours required by their human counterparts. Cost-wise, LLMs operate at a fraction of the price, offering a\nstaggering 99.97 percent reduction in cost over traditional method

In [None]:
print(res["output_text"])

This paper, titled "Better Call GPT, Comparing Large Language Models Against Lawyers" by Lauren Martin and colleagues from Onit Inc., New Zealand, presents a pioneering study comparing the performance of Large Language Models (LLMs) with that of human legal professionals (Junior Lawyers and Legal Process Outsourcers) in the context of legal contract review. The study aims to evaluate whether LLMs can surpass humans in terms of accuracy, speed, and cost-efficiency in identifying and analyzing legal issues within contracts. The findings reveal that LLMs not only match but in some cases exceed human accuracy in spotting legal issues, complete tasks significantly faster (in seconds compared to hours), and are vastly more cost-effective, offering a 99.97% reduction in costs compared to traditional methods. This research underscores a potential paradigm shift in legal practice, highlighting the disruptive impact of LLMs on the industry and suggesting a future where legal services are more ac

In [None]:
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.chains.llm import LLMChain
from langchain.prompts import PromptTemplate

# Define prompt
prompt_template = """Write a concise summary in a maximum of 3 bullets of the following text enclosed within three backticks:
```{text}```
CONCISE SUMMARY:"""
prompt = PromptTemplate.from_template(prompt_template)

# Define LLM chain
llm = ChatOpenAI(temperature=0, model_name="gpt-4-turbo-preview", api_key=open_ai_key)
llm_chain = LLMChain(llm=llm, prompt=prompt)

# Define StuffDocumentsChain
stuff_chain = StuffDocumentsChain(llm_chain=llm_chain, document_variable_name="text")

res = stuff_chain.invoke(pages[0:3])

In [None]:
print(res["output_text"])

- The study compares Large Language Models (LLMs) with Junior Lawyers and Legal Process Outsourcers (LPOs) in legal contract review, finding LLMs match or exceed human accuracy, complete reviews significantly faster, and operate at a fraction of the cost.
- Empirical analysis shows LLMs offer a 99.97 percent reduction in cost over traditional methods, highlighting a potential for significant disruption in the legal industry by enhancing accessibility and efficiency of legal services.
- The research underscores the advent of LLM dominance in legal contract review, suggesting a need for a reimagined future of legal workflows and challenging the traditional reliance on human legal expertise.
