# Install Packages

In [None]:
%pip install langchain_community
%pip install langchain_openai

# Set up LLM & Embedding Model

In [None]:
import json
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings

with open("secrets/openai_secrets.json", "r") as file:
  openai_secrets = json.load(file)


llm = AzureChatOpenAI(
    azure_endpoint=openai_secrets["openai_api_base"],
    openai_api_version="2023-03-15-preview",
    deployment_name="gpt-4-32k",
    openai_api_key=openai_secrets["openai_api_key"],
    openai_api_type="azure",
    temperature=0,
)

embedding_model = AzureOpenAIEmbeddings(
    chunk_size=1,
    deployment="text-embedding-ada-002",
    openai_api_version="2023-03-15-preview",
    azure_endpoint=openai_secrets["openai_api_base"],
    openai_api_key=openai_secrets["openai_api_key"],
    openai_api_type="azure",
)

# Entity Extraction

In [None]:
from langchain.prompts.few_shot import FewShotPromptTemplate
from typing import List, Dict, Any
import json

class EntityExtractionWithLLM:
    """
    Class to perform entity extraction using a Large Language Model (LLM) based on few shot examples.
    """
    def __init__(self, llm: Any, examples: List[Dict]) -> None:
        """
        Initializes the EntityExtractionWithLLM instance.

        Args:
        - llm (Any): The large language model used for entity extraction.
        - examples (List[Dict]): List of example dictionaries used for entity extraction.
        """
        self.llm = llm
        self.examples = examples
    
    def extract_entities(
        self,
        input: str,
        example_prompt: str,
        prefix_intructions: str
    ) -> str:
        """
        Extracts entities from the input text using the LLM and example-based prompt.

        Args:
        - input (str): The input text from which entities are to be extracted.
        - example_prompt (str): Prompt template for examples to guide entity extraction.
        - prefix_instructions (str): Prefix instructions to guide the LLM in entity extraction.

        Returns:
        - str: Extracted entities based on the input text.
        """

        prompt = FewShotPromptTemplate(
            examples=self.examples,
            example_prompt=example_prompt,
            prefix=prefix_intructions,
            suffix="""Question: {input}\nCompany Name:""",
            input_variables=["input"],
            example_separator="\n",
        )
        
        chain = prompt | self.llm

        return chain.invoke({"input": input})
    

In [None]:
from langchain.prompts.prompt import PromptTemplate

EXTRACTION_TEMPLATE = f"""
Extract company name from the following passage. If there are no company names mentioned, return an empty list.

Examples:
"""

ENTITY_EXTRACTION_EXAMPLES = [
    {
        "query": "A man looks at a Tesla Cybertruck parked near the SpaceX Starship as it stands on the launch pad",
        "company_name": ["Tesla"],
    },
    {
        "query": "A man looks at a car parked near the SpaceX Starship as it stands on the launch pad",
        "company_name": [],
    },
    {
        "query": "The Bank of England said it forecast a\n  shortage of around 250 mln stg in the money market today.",
        "company_name": ["Bank of England"],
    },
    {
        "query": "Australian markets are booming as foreign\n  fund managers redirect capital away from the United States",
        "company_name": [],
    },
    {
        "query": "In 2021, Gill raised Bachan’s first round of capital, after an investor at Prelude Growth Partners realized the company had no institutional backers, yet it had maintained its position as Amazon’s No. 1 selling barbecue sauce.",
        "company_name": ["Prelude Growth Partners", "Bachan"],
    }, 
]
example_prompt = PromptTemplate(
    input_variables=["query", "company_name"], template="Query: {query}\Company Name: {company_name}"
)

## Entity Extraction - Example

In [None]:
input = "President Joe Biden said in a somewhat defensive interview with NBC’s Lester Holt on Monday he is back “on the horse” following a shaky debate performance against former President Donald Trump"

In [None]:
entity_extraction = EntityExtractionWithLLM(llm, ENTITY_EXTRACTION_EXAMPLES)

response = entity_extraction.extract_entities(
    input=input, example_prompt=example_prompt, prefix_intructions=EXTRACTION_TEMPLATE
)


print(response.content)

['NBC']


# Assess Credit Risk

In [None]:
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.output_parsers import PydanticOutputParser

class CompanyCreditRisk(BaseModel):
    credit_risk_rating: int = Field(..., ge=1, le=10, description="Credit risk rating on a scale of 1 to 10")
    justification: str = Field(..., description="Justification for the credit risk rating.")

class CreditRiskAssessment(BaseModel):
    __root__: Dict[str, CompanyCreditRisk]

parser = PydanticOutputParser(pydantic_object=CreditRiskAssessment)

template = """
You are an expert financial analyst specializing in credit risk assessment. Given the following article and all mentioned companies in the article, assess their credit risk on a scale from 1 to 10, where 1 indicates the lowest risk and 10 indicates the highest risk. Provide a brief justification for each rating based on the content of the article.

Article:
{input}

Mentioned companies:
{company_name}

Format the answer as a valid JSON object where each key is the company name and each value is a dictionary with two keys:
- "credit_risk_rating": the rating score
- "justification": text to justify the score
"""

prompt = PromptTemplate(
    template=template,
    input_variables=["input", "company_name"],
)

chain = prompt | llm | parser


## Assess Credit Risk - Example

In [None]:
result = chain.invoke({"input": input, "company_name": ['NBC']})
print(result)

__root__={'NBC': CompanyCreditRisk(credit_risk_rating=2, justification='NBC is mentioned in the context of conducting an interview with President Joe Biden. There is no mention of financial instability, legal issues, or other factors that might indicate a high credit risk. As a well-established media company, NBC likely has a low credit risk.')}


In [None]:
print(result.__root__['NBC'].credit_risk_rating)

2


# Apply to all 400 articles

In [None]:
with open("data/task1_json/forbes.json", "r") as file:
  forbes_articles = json.load(file)

with open("data/task1_json/reuters.json", "r") as file:
  reuters_articles = json.load(file)

In [None]:
articles = forbes_articles + reuters_articles
print(len(articles))

400


In [None]:
entity_extraction = EntityExtractionWithLLM(llm, ENTITY_EXTRACTION_EXAMPLES)
articles_with_rating = []

for article in articles:
  input = article['content']

  # extract entities
  response = entity_extraction.extract_entities(
    input=input, example_prompt=example_prompt, prefix_intructions=EXTRACTION_TEMPLATE
  )
  company_name = response.content
  print(company_name)

  # assess credit risk if company name exists in article
  if company_name != [] and company_name != '[]':
    result = chain.invoke({"input": input, "company_name": company_name})
    print(result)

    companies = []
    ratings = []
    justifications = []

    for company, risk in result.__root__.items():
      companies.append(company)
      ratings.append(risk.credit_risk_rating)
      justifications.append(risk.justification)
    article['companies'] = companies
    article['credit_risk_rating'] = ratings
    article['justification'] = justifications
    print(f"article title: {article['title']}, rating: {article['credit_risk_rating']}")
  
  else:
    article['companies'] = []
  articles_with_rating.append(article)


['NBC', 'Forbes', 'RealClearPolitics']
__root__={'NBC': CompanyCreditRisk(credit_risk_rating=2, justification='NBC is a well-established media company with a strong reputation. The article mentions that NBC conducted an interview with President Biden, indicating that it continues to have access to high-profile individuals and events. This suggests that NBC is likely to have a stable financial position and therefore a low credit risk.'), 'Forbes': CompanyCreditRisk(credit_risk_rating=2, justification='Forbes is a reputable and well-known media and publishing company. The article mentions Forbes in the context of breaking news alerts and election coverage, suggesting that it continues to be a reliable source of information. This indicates that Forbes likely has a stable financial position and therefore a low credit risk.'), 'RealClearPolitics': CompanyCreditRisk(credit_risk_rating=3, justification='RealClearPolitics is a political news and polling data aggregator. The article mentions th

In [None]:
with open('data/task2/articles_with_rating.json', 'w', encoding='utf-8') as f:
  json.dump(articles_with_rating, f, indent=4)