# Install packages

In [None]:
%pip install langchain_openai

# Set up LLM

In [None]:
from langchain_openai import AzureChatOpenAI

with open("../secrets/openai_secrets.json", "r") as file:
  openai_secrets = json.load(file)


llm = AzureChatOpenAI(
    azure_endpoint=openai_secrets["openai_api_base"],
    openai_api_version="2023-03-15-preview",
    deployment_name="gpt-4-32k",
    openai_api_key=openai_secrets["openai_api_key"],
    openai_api_type="azure",
    temperature=0,
)

In [None]:
import json

# [Task 2] Get aggregated credit risk ratings and justifications by company

In [None]:
with open('../data/task2_json/company_with_mean_rating_summary_finding.json', 'r') as file:
  company_with_mean_rating = json.load(file)

In [None]:
with open('../data/task2_json/company_with_summarised_justification_summary_finding.json', 'r') as file:
    company_with_summarised_justification = json.load(file)

# [Task 3] Aggregating credit risk ratings and justifications by company

In [None]:
import json

with open('../data/task3_json/company_with_rating.json', 'r') as file:
    data = json.load(file)

In [None]:
company_with_rating_rag = {}
company_with_justification_rag = {}

for company, rating_justification in data.items():
  company_with_rating_rag[company] = rating_justification[0]
  company_with_justification_rag[company] = rating_justification[1]


In [None]:
company_with_rating_rag

{'NBC': 4,
 'Forbes': 2,
 'RealClearPolitics': 1,
 'Tesla': 3,
 'Ford': 7,
 'GM': 6,
 'Stellantis': 6,
 'Truth Social': 3,
 'Turning Point USA': 3,
 'Wood Mackenzie': 3,
 'Bachan': 9,
 'Prelude Growth Partners': 9,
 'Whole Foods': 8,
 'Amazon': 2,
 'Walmart': 8,
 'Sonoma Brands Capital': 9,
 'Facebook': 8,
 'McCormick': 9,
 'Cholula': 3,
 'Heinz': 9,
 'Tabasco': 9,
 'General Motors': 4,
 'Frigidaire': 5,
 'National Cash Register': 5,
 'Delco Electronics': 5,
 'WWE': 2,
 'GoFundMe': 2,
 'Citadel': 1,
 'Heritage Foundation': 2,
 'Daily Wire': 3,
 'Griffin Catalyst': 2,
 'Citadel Securities': 1,
 'Bethel Park Skilled Nursing and Rehabilitation Center': 2,
 'HBO Max': 2,
 'Republican National Committee': 4,
 'Democratic Party': 4,
 'UPS': 3,
 'Teamsters National Black Caucus': 4,
 'AFL-CIO': 4,
 'United Auto Workers': 4,
 'AFSCME': 3,
 'National Education Association': 3,
 'Service Employees International Union': 3,
 'Eli Lilly': 3,
 'Novo Nordisk': 4,
 'Zealand Pharma': 6,
 'Boehringer In

In [None]:
company_with_justification_rag

{'NBC': "While the passage does not provide specific financial information about NBC, it does discuss the broader landscape of the television and streaming industry, which NBC is a part of. The industry is facing challenges such as rising production costs, flat subscription revenues, and a shift in viewer demographics. However, companies like NBC have been adapting to these changes, for example, by introducing new compensation models and seeking to attract advertisers. Therefore, while there are risks associated with the changing industry landscape, NBC's ability to adapt to these changes suggests a moderate level of credit risk.",
 'Forbes': "Forbes is a well-established and reputable media company with a long history of operation. The passage does not provide any specific financial information about Forbes, but given its reputation and the fact that it is a leading source of reliable business news and financial information, it is reasonable to assume that it has a low credit risk. Th

# Compare ratings and justification between Task 2 and 3

In [None]:
from langchain.prompts.prompt import PromptTemplate

template = """Compare the following two justifications and highlight the differences:
Justification 1: {justification_rag}
Justification 2: {justification_prompt}"""

prompt = PromptTemplate(
    template=template,
    input_variables=["justification_rag", "justification_prompt"],
)

chain = prompt | llm

In [None]:
import pandas as pd

# get all companies
companies = set(company_with_rating_rag.keys()).union(company_with_mean_rating.keys())
print(len(companies))

325


In [None]:

comparison_data = []

for i, company in enumerate(companies):
    print(i, company)
    
    rating_rag = company_with_rating_rag.get(company, None)
    justification_rag = company_with_justification_rag.get(company, "No justification available.")
    rating_prompt = company_with_mean_rating.get(company, None)
    justification_prompt = company_with_summarised_justification.get(company, "No justification available.")
    
    # Calculate the rating difference
    rating_difference = (rating_rag - rating_prompt) if rating_rag is not None and rating_prompt is not None else None
    
    # Generate the justification difference 
    justification_difference = chain.invoke({"justification_rag": justification_rag, "justification_prompt": justification_prompt})
    
    comparison_data.append({
        "company": company,
        "rating_rag": rating_rag,
        "rating_prompt": rating_prompt,
        "rating_difference": rating_difference,
        "justification_rag": justification_rag,
        "justification_prompt": justification_prompt,
        "justification_difference": justification_difference.content
    })

comparison_df = pd.DataFrame(comparison_data)

0 Campbell Red Lake Mines
1 Seattle Mariners
2 Yamaichi Securities Co Ltd
3 Goldman Sachs International
4 Getty Images
5 Forbes
6 Detroit Tigers
7 New York Times
8 United Airlines
9 U.S. Federal Reserve
10 Bethel Park Skilled Nursing and Rehabilitation Center
11 Federal Reserve Bank of New York
12 UBS-Phillips and Drew
13 Central Bank of Venezuela
14 Midland Bank
15 Columbia University
16 Cholula
17 Houston Astros
18 Nomura Investment Trust and Management Co Ltd
19 CNN
20 Heinz
21 Swiss Bank Corp
22 NBC New York
23 International Monetary Fund
24 FX
25 Volkskas Bank
26 George Washington University
27 University of Arizona
28 Newmont Gold
29 Institute for International Economics
30 Colorado Rockies
31 Girozentrale
32 Bank of Spain
33 Sony Corp
34 Chase Bank AG
35 Deutsche Bank
36 Tesla
37 GM
38 Truth Social
39 Reserve Bank of New Zealand
40 Columbia Students for Justice in Palestine
41 Portland State University
42 CBS Sports
43 Florida State University
44 Citicorp
45 ASA Ltd
46 Pfizer
47

In [None]:
comparison_df

Unnamed: 0,company,rating_rag,rating_prompt,rating_difference,justification_rag,justification_prompt,justification_difference
0,Campbell Red Lake Mines,3,2.00,1.00,Campbell Red Lake Mines is benefiting from the...,Campbell Red Lake Mines has also seen a rise i...,Justification 1 is more detailed and provides ...
1,Seattle Mariners,3,3.00,0.00,The passage does not provide any direct financ...,The Seattle Mariners are mentioned as a past o...,Justification 1 is more detailed and analytica...
2,Yamaichi Securities Co Ltd,6,5.00,1.00,Yamaichi Securities Co Ltd is exposed to signi...,Yamaichi Securities Co Ltd's deputy general ma...,Justification 1 and Justification 2 both discu...
3,Goldman Sachs International,4,6.00,-2.00,Goldman Sachs International is mentioned in th...,Goldman Sachs International's credit risk is s...,Justification 1 focuses on Goldman Sachs Inter...
4,Getty Images,2,3.00,-1.00,The passage does not provide specific financia...,Getty Images is a reputable stock photo agency...,Justification 1 and Justification 2 both asses...
...,...,...,...,...,...,...,...
320,University of Texas-Austin,4,3.00,1.00,The University of Texas-Austin is mentioned in...,The University of Texas-Austin is experiencing...,Justification 1 is more detailed and comprehen...
321,New York Federal Reserve Bank,1,1.00,0.00,"The New York Federal Reserve Bank, as part of ...","The New York Federal Reserve Bank, being a par...",Justification 1 and Justification 2 both argue...
322,Manhattan District Attorney,1,2.00,-1.00,The Manhattan District Attorney is a governmen...,The Manhattan District Attorney's office is in...,Justification 1 focuses on the nature of the M...
323,The Milwaukee Journal-Sentinel,2,1.75,0.25,The passage does not provide any specific fina...,The credit risk rating for the Milwaukee Journ...,Justification 1 and Justification 2 both argue...


In [None]:
comparison_df.to_csv("../data/task3_csv/difference_between_task2_and_task3.csv", index=False)