In [None]:
from decouple import AutoConfig
config = AutoConfig(search_path='./../.env')

In [None]:
import os
import openai

openai.api_key = config('OPENAI_API_KEY')
os.environ['OPENAI_API_KEY'] = openai.api_key

In [None]:
from langchain_community.document_loaders import WebBaseLoader
import bs4

urls = ('http://www.moneycontrol.com/news/business/markets/stock-radar-adani-enterprises-paytm-zomato-indigo-and-others-in-focus-12750205.html',)
tag = ('div', 'p')
tag_classes = ('content_wrapper arti-flow',)

# Load, chunk and index the contents of the blog.
loader = WebBaseLoader(
    web_paths=urls,
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(tag,
            class_=tag_classes
        )
    ),
)

docs = loader.load()
doc = docs[0]

In [None]:
print(doc.page_content)

In [None]:
from pydantic import BaseModel, Field
class Overview(BaseModel):
    """Overview of a section of text."""
    summary: str = Field(description="Provide a concise summary of the content.")
    language: str = Field(description="Provide the language that the content is written in.")
    keywords: str = Field(description="Provide keywords related to the content.")

In [None]:
from langchain.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_messages([
    ("system", "Extract the relevant information, if not explicitly provided do not guess. Extract partial info"),
    ("human", "{input}")
])

In [None]:
from langchain_openai import ChatOpenAI
model = ChatOpenAI(
    model="gpt-3.5-turbo",
    temperature=0.0,
    max_tokens=1024
)

In [None]:
from langchain.output_parsers.openai_functions import JsonOutputFunctionsParser
from langchain.utils.openai_functions import convert_pydantic_to_openai_function

overview_tagging_function = [
    convert_pydantic_to_openai_function(Overview)
]
tagging_model = model.bind(
    functions=overview_tagging_function,
    function_call={"name":"Overview"}
)
tagging_chain = prompt | tagging_model | JsonOutputFunctionsParser()

In [None]:
tagging_chain.invoke({"input": doc})

In [None]:
from typing import List

class Tip(BaseModel):
    """Information about papers mentioned."""
    company_name: str = Field(description="The name of the company.")
    sentiment: str = Field(description="The sentiment associated with the company in the text.")
    signal: str = Field(description="Infer BUY or SELL signal associated with the company in the text.")
    signal_phrase: str = Field(description="The signal associated with the company in the text.")


class Info(BaseModel):
    """Information to extract"""
    tips: List[Tip]

In [None]:
template = """A news article will be passed to you. Extract from it all the company that are mentioned in this article. 
If no papers are mentioned that's fine - you don't need to extract any! Just return an empty list.
Do not make up or guess ANY extra information. Only extract what exactly is in the text."""

prompt = ChatPromptTemplate.from_messages([
    ("system", template),
    ("human", "{input}")
])

In [None]:
from langchain.output_parsers.openai_functions import JsonKeyOutputFunctionsParser

tip_extraction_function = [
    convert_pydantic_to_openai_function(Info)
]
extraction_model = model.bind(
    functions=tip_extraction_function, 
    function_call={"name":"Info"}
)
extraction_chain = prompt | extraction_model | JsonKeyOutputFunctionsParser(key_name="tips")

In [None]:
extraction_chain.invoke({"input": doc})