In [42]:
!pip install langchain neo4j openai wikipedia tiktoken langchain_openai pdfplumber



In [43]:
#Package Imports
from langchain.graphs import Neo4jGraph
import neo4j

In [44]:
#Neo4j connection
url = "neo4j+s://e1557b9c.databases.neo4j.io"
username ="neo4j"
password = "m8CFT88LsU8EZ9BzYC4dC0c7Pwgc1ZBQxHKqQKpA3HY"
graph = Neo4jGraph(
    url=url,
    username=username,
    password=password
)

In [45]:
#Langchain for graph
from langchain_community.graphs.graph_document import (
    Node as BaseNode,
    Relationship as BaseRelationship,
    GraphDocument,
)
from langchain.schema import Document
from typing import List, Dict, Any, Optional
from langchain.pydantic_v1 import Field, BaseModel

class Property(BaseModel):
  """A single property consisting of key and value"""
  key: str = Field(..., description="key")
  value: str = Field(..., description="value")

class Node(BaseNode):
    properties: Optional[List[Property]] = Field(
        None, description="List of node properties")

class Relationship(BaseRelationship):
    properties: Optional[List[Property]] = Field(
        None, description="List of relationship properties"
    )

class KnowledgeGraph(BaseModel):
    """Generate a knowledge graph with entities and relationships."""
    nodes: List[Node] = Field(
        ..., description="List of nodes in the knowledge graph")
    rels: List[Relationship] = Field(
        ..., description="List of relationships in the knowledge graph"
    )

In [46]:
def format_property_key(s: str) -> str:
    words = s.split()
    if not words:
        return s
    first_word = words[0].lower()
    capitalized_words = [word.capitalize() for word in words[1:]]
    return "".join([first_word] + capitalized_words)

def props_to_dict(props) -> dict:
    """Convert properties to a dictionary."""
    properties = {}
    if not props:
      return properties
    for p in props:
        properties[format_property_key(p.key)] = p.value
    return properties

def map_to_base_node(node: Node) -> BaseNode:
    """Map the KnowledgeGraph Node to the base Node."""
    properties = props_to_dict(node.properties) if node.properties else {}
    # Add name property for better Cypher statement generation
    properties["name"] = node.id.title()
    return BaseNode(
        id=node.id.title(), type=node.type.capitalize(), properties=properties
    )


def map_to_base_relationship(rel: Relationship) -> BaseRelationship:
    """Map the KnowledgeGraph Relationship to the base Relationship."""
    source = map_to_base_node(rel.source)
    target = map_to_base_node(rel.target)
    properties = props_to_dict(rel.properties) if rel.properties else {}
    return BaseRelationship(
        source=source, target=target, type=rel.type, properties=properties
    )

In [47]:
import openai
import json
import os
from langchain.chat_models import AzureChatOpenAI
from langchain.schema import HumanMessage
from langchain import LLMChain
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Load config values
with open(r'config.json') as config_file:
    config_details = json.load(config_file)

# The base URL for your Azure OpenAI resource.
openai_api_base = config_details['OPENAI_API_BASE']

# API version e.g. "2023-07-01-preview"
openai_api_version = config_details['OPENAI_API_VERSION']

# The name of your Azure OpenAI deployment chat model. e.g. "gpt-35-turbo-0613"
deployment_name = config_details['DEPLOYMENT_NAME']

# The API key for your Azure OpenAI resource.
openai_api_key = os.getenv("OPENAI_API_KEY")

# This is set to `azure`
openai_api_type = "azure"


from langchain.chat_models import AzureChatOpenAI

# Create an instance of chat llm
llm = AzureChatOpenAI(
    azure_endpoint=openai_api_base,
    openai_api_version=openai_api_version,
    deployment_name=deployment_name,
    openai_api_key=openai_api_key,
    openai_api_type=openai_api_type,
)

In [48]:
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.openai_functions import create_structured_output_chain

def get_extraction_chain(
    allowed_nodes: Optional[List[str]] = None,
    allowed_rels: Optional[List[str]] = None
    ):
    prompt = ChatPromptTemplate.from_messages(
    [(
      "system",
      f"""# Knowledge Graph Instructions for GPT-4
## 1. Overview
You are a specialized algorithm designed to extract structured financial data from bank annual reports to build a comprehensive knowledge graph.
- **Nodes** represent financial terms, entities, departments, and concepts specific to the banking industry.
- The goal is to structure information in a manner that highlights financial relationships, decision-making hierarchies, and policy summaries.

## 2. Sections and Nodes
- **Sections**: Target key sections such as 'Director's Report', 'Statements of Financial Position', 'Income Statement', etc. Identify relevant financial data and decisions.
- **Entities and Concepts**: Create nodes for identifiable entities like board members, financial metrics, and policy terms.
- **Node IDs**: Use names or specific identifiers for node labels. Avoid integers or vague references.
- **Relationships**: Map relationships that reflect financial dependencies, reporting structures, and policy impacts.

## 3. Detailed Financial Extraction
- **Extract Details**: Focus on numbers and financial statements, converting them into properties of nodes. For instance, assets and liabilities figures should be attached to the 'Statement of Financial Position' node.
- **Accuracy and Precision**: Ensure the extraction of financial data is accurate, citing exact figures and contextual information.
- **Contextual Relevance**: Attach relevant notes and assumptions from the 'Notes to Financial Statements' to the appropriate financial statements or metrics.

## 4. Compliance and Consistency
- **Regulatory Statements**: Extract and highlight compliance statements from 'Independent Auditors' Report' and 'Statutory Declaration'.
- **Maintain Consistency**: Use consistent terminology across different sections of the report to avoid confusion.
- **Strict Compliance**: Adhere strictly to the rules for knowledge graph construction.

## 5. Symbolic Tokens and Special Characters
- **Symbolic Tokens**: Identify and properly handle symbolic tokens (e.g., %, $, £, etc.) and ensure they are accurately represented in the data.
- **Special Characters**: Recognize special characters and their meanings (e.g., ± for approximately, > for greater than, etc.) and handle them appropriately in the extraction process.

## 6. Domain Vocabulary and Abbreviations
- **Domain Vocabulary**: Utilize and recognize domain-specific vocabulary and terminology relevant to the banking and financial industry.
- **Abbreviations**: Identify and expand domain-related abbreviations (e.g., FVOCI for Fair Value through Other Comprehensive Income) to ensure clarity and accuracy.

## 7. N-gram Extraction
- **N-grams**: Extract meaningful N-grams (bigrams, trigrams, etc.) that represent significant financial terms or phrases and include them as part of the node properties.

## 8. Tips
- Remember to format financial data as attributes of the nodes and structure the graph to reflect the organization and flow of the annual report.
- Use the given format to extract information from the following input: <input here>
- Tip: Make sure to answer in the correct format
"""),
        ("human", "Use the given format to extract information from the following input: {input}"),
        ("human", "Tip: Make sure to answer in the correct format"),
    ])
    return create_structured_output_chain(KnowledgeGraph, llm, prompt, verbose=False)


In [49]:
def extract_and_store_graph(
    document: Document,
    nodes: Optional[List[str]] = None,
    rels: Optional[List[str]] = None
) -> None:
    # Extract graph data using OpenAI functions
    extract_chain = get_extraction_chain(nodes, rels)
    data = extract_chain.invoke(document)['function']
    
    # Construct a graph document
    graph_document = GraphDocument(
        nodes=[map_to_base_node(node) for node in data.nodes],
        relationships=[map_to_base_relationship(rel) for rel in data.rels],
        source=document.dict()  # Convert Document object to dictionary
    )
    
    # Store information into a graph
    graph.add_graph_documents([graph_document])


In [50]:
import pdfplumber
import os
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
from langchain.text_splitter import TokenTextSplitter
from tqdm import tqdm
import pickle

# Ensure nltk resources are downloaded
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('punkt')

# Define the folder containing the PDF files and the ground truth folder
pdf_folder_path = r"C:\Users\WendyChuaXingZhao\OneDrive - SRKK Group of Companies\Documents\MsDS\Knowledge Graph Test\Financial Documents"
ground_truth_folder_path = r"C:\Users\WendyChuaXingZhao\OneDrive - SRKK Group of Companies\Documents\MsDS\Knowledge Graph Test\ground_truth"

# Create ground truth folder if it doesn't exist
os.makedirs(ground_truth_folder_path, exist_ok=True)

def extract_text_and_tables_from_pdf(pdf_path):
    try:
        with pdfplumber.open(pdf_path) as pdf:
            full_text = ''
            tables = []
            for page in pdf.pages:
                page_text = page.extract_text()
                if page_text:
                    full_text += page_text
                page_tables = page.extract_tables()
                if page_tables:
                    tables.extend(page_tables)
        return full_text, tables
    except Exception as e:
        print(f"An error occurred while reading the PDF {pdf_path}: {e}")
        return None, None

def preprocess_text(text):
    # Convert text to lower case
    text = text.lower()
    
    # Remove noise
    text = re.sub(r'[\s]+', ' ', text)
    text = re.sub(r'[^a-z0-9\s]+', ' ', text)
    
    # Tokenization
    tokens = word_tokenize(text)
    
    # Stop-word removal
    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word.lower() not in stop_words]
    
    # Lemmatization
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(word) for word in tokens]
    
    # Join tokens back to string
    preprocessed_text = ' '.join(tokens)
    return preprocessed_text

def extract_and_preprocess_pdf(pdf_path):
    raw_text, _ = extract_text_and_tables_from_pdf(pdf_path)
    if raw_text:
        preprocessed_text = preprocess_text(raw_text)
        text_splitter = TokenTextSplitter(chunk_size=2048, chunk_overlap=24)
        documents = text_splitter.split_text(preprocessed_text)
        return documents, preprocessed_text
    return [], None

# List all PDF files in the directory
pdf_paths = [os.path.join(pdf_folder_path, f) for f in os.listdir(pdf_folder_path) if f.lower().endswith('.pdf')]

# Extract and preprocess documents
all_documents = []
for pdf_path in pdf_paths:
    documents, preprocessed_text = extract_and_preprocess_pdf(pdf_path)
    all_documents.extend(documents)
    
    # Save the preprocessed text as ground truth
    if preprocessed_text:
        pdf_name = os.path.basename(pdf_path).replace('.pdf', '.txt')
        ground_truth_path = os.path.join(ground_truth_folder_path, pdf_name)
        with open(ground_truth_path, 'w', encoding='utf-8') as f:
            f.write(preprocessed_text)

# Save preprocessed documents for later use
with open('processed_documents.pkl', 'wb') as f:
    pickle.dump(all_documents, f)


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\WendyChuaXingZhao\AppData\Roaming\nltk_data..
[nltk_data]     .
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\WendyChuaXingZhao\AppData\Roaming\nltk_data..
[nltk_data]     .
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\WendyChuaXingZhao\AppData\Roaming\nltk_data..
[nltk_data]     .
[nltk_data]   Package punkt is already up-to-date!


In [51]:
import os
import re
import pickle
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
from sklearn.metrics import precision_score, recall_score, f1_score

# Ensure nltk resources are downloaded
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('punkt')

def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[\s]+', ' ', text)
    text = re.sub(r'[^a-z0-9\s]+', ' ', text)
    tokens = word_tokenize(text)
    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word.lower() not in stop_words]
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(word) for word in tokens]
    preprocessed_text = ' '.join(tokens)
    return preprocessed_text

def load_text_files(folder_path):
    text_data = {}
    for file_name in os.listdir(folder_path):
        if file_name.endswith('.txt'):
            file_path = os.path.join(folder_path, file_name)
            with open(file_path, 'r', encoding='utf-8') as file:
                text = file.read()
                text_data[file_name] = preprocess_text(text)
    return text_data

# Load the ground truth data
ground_truth_folder_path = r"C:\Users\WendyChuaXingZhao\OneDrive - SRKK Group of Companies\Documents\MsDS\Knowledge Graph Test\ground_truth"
ground_truth_data = load_text_files(ground_truth_folder_path)

# Load extracted data from the saved pickle file
def load_extracted_data(pickle_file_path):
    with open(pickle_file_path, 'rb') as f:
        extracted_data = pickle.load(f)
    return extracted_data

extracted_data_path = 'processed_documents.pkl'
extracted_data_raw = load_extracted_data(extracted_data_path)

# Convert the extracted data into a dictionary with filenames as keys for comparison
extracted_data = {}
for i, document in enumerate(extracted_data_raw):
    doc_name = f'document_{i+1}.txt'  # Generate a name for each document
    extracted_data[doc_name] = ' '.join(document)  # Join document chunks into a single string

def evaluate_extraction(ground_truth, extracted):
    """
    Evaluate the extraction against the ground truth.
    
    :param ground_truth: Dictionary containing ground truth data
    :param extracted: Dictionary containing extracted data
    :return: Dictionary containing precision, recall, and F1-score for each document
    """
    results = {}
    
    for doc, true_text in ground_truth.items():
        if doc in extracted:
            extracted_text = extracted[doc]
            
            # Convert texts to sets of words for comparison
            true_words = set(true_text.split())
            extracted_words = set(extracted_text.split())
            
            # Calculate true positives, false positives, false negatives
            tp = len(true_words & extracted_words)
            fp = len(extracted_words - true_words)
            fn = len(true_words - extracted_words)
            
            # Handle edge cases where there are no positives
            precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
            recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
            f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0.0
            
            results[doc] = {
                'precision': precision,
                'recall': recall,
                'f1': f1
            }
        else:
            results[doc] = {
                'precision': 0,
                'recall': 0,
                'f1': 0
            }
    
    return results

# Evaluate the extracted data against the ground truth data
evaluation_results = evaluate_extraction(ground_truth_data, extracted_data)

# Print evaluation results
for doc, metrics in evaluation_results.items():
    print(f"Document: {doc}")
    print(f"Precision: {metrics['precision']}")
    print(f"Recall: {metrics['recall']}")
    print(f"F1-score: {metrics['f1']}")
    print("\n")


[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\WendyChuaXingZhao\AppData\Roaming\nltk_data..
[nltk_data]     .
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\WendyChuaXingZhao\AppData\Roaming\nltk_data..
[nltk_data]     .
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\WendyChuaXingZhao\AppData\Roaming\nltk_data..
[nltk_data]     .
[nltk_data]   Package punkt is already up-to-date!


Document: cimb-fs-2023.txt
Precision: 0
Recall: 0
F1-score: 0


Document: cimb-integrated-report-2023.txt
Precision: 0
Recall: 0
F1-score: 0


Document: Maybank-AR2023-Financial-Statements.txt
Precision: 0
Recall: 0
F1-score: 0


Document: Maybank-AR2023-Integrated-Annual-Report.txt
Precision: 0
Recall: 0
F1-score: 0


Document: PBB_ar_2023.txt
Precision: 0
Recall: 0
F1-score: 0


Document: PBB_fs_2023.txt
Precision: 0
Recall: 0
F1-score: 0




In [52]:
import json
import logging

# Set up logging
logging.basicConfig(level=logging.INFO, filename='error_log.log', filemode='w',
                    format='%(name)s - %(levelname)s - %(message)s')

def extract_and_store_graph(document: Document, nodes: Optional[List[str]] = None, rels: Optional[List[str]] = None) -> None:
    try:
        # Extract graph data using OpenAI functions
        extract_chain = get_extraction_chain(nodes, rels)
        response = extract_chain.invoke(document)
        
        # Log the response for debugging
        logging.info("Response from extract_chain: %s", response)
        
        data = response['function']
        
        # Construct a graph document
        graph_document = GraphDocument(
            nodes=[map_to_base_node(node) for node in data.nodes],
            relationships=[map_to_base_relationship(rel) for rel in data.rels],
            source=document.dict()  # Convert Document object to dictionary
        )
        
        # Store information into a graph
        graph.add_graph_documents([graph_document])
    
    except json.JSONDecodeError as e:
        logging.error("JSON decoding error: %s", e)
        logging.error("Invalid JSON response: %s", response)
    except KeyError as e:
        logging.error("Key error: %s", e)
        logging.error("Response missing expected key: %s", response)
    except Exception as e:
        logging.error("An unexpected error occurred: %s", e)
        logging.error("Response: %s", response)

# Process each document chunk and extract/store the graph
for i, chunk in tqdm(enumerate(documents), total=len(documents)):
    doc = Document(page_content=chunk, metadata={"source": "batch_process", "chunk": i})
    try:
        extract_and_store_graph(doc)
        print(f"Successfully processed and stored chunk {i+1}")
    except Exception as e:
        print(f"An error occurred while processing chunk {i+1}: {e}")


  2%|▏         | 1/54 [00:13<12:14, 13.86s/it]

Successfully processed and stored chunk 1


  4%|▎         | 2/54 [00:27<11:39, 13.45s/it]

Successfully processed and stored chunk 2


  6%|▌         | 3/54 [00:49<15:03, 17.71s/it]

Successfully processed and stored chunk 3


ERROR:root:An unexpected error occurred: 1 validation error for _OutputFormatter
__root__
  Expecting property name enclosed in double quotes: line 1799 column 7 (char 45025) (type=value_error.jsondecode; msg=Expecting property name enclosed in double quotes; doc={
  "output": {
    "nodes": [
      {
        "id": "market_data_subsidary",
        "type": "concept",
        "properties": [
          {
            "key": "content",
            "value": "market data subsidiary"
          }
        ]
      },
      {
        "id": "impairment_testing",
        "type": "concept",
        "properties": [
          {
            "key": "content",
            "value": "impairment testing"
          }
        ]
      },
      {
        "id": "viu_estimate",
        "type": "concept",
        "properties": [
          {
            "key": "content",
            "value": "viu estimate"
          }
        ]
      },
      {
        "id": "estimated_future_cash_flow",
        "type": "concept",
 

An error occurred while processing chunk 4: local variable 'response' referenced before assignment


  9%|▉         | 5/54 [05:18<1:09:04, 84.59s/it]

Successfully processed and stored chunk 5


 11%|█         | 6/54 [05:30<48:04, 60.10s/it]  

Successfully processed and stored chunk 6


 13%|█▎        | 7/54 [05:31<31:51, 40.68s/it]

Successfully processed and stored chunk 7


ERROR:root:An unexpected error occurred: 1 validation error for _OutputFormatter
__root__
  Expecting value: line 2404 column 6 (char 63561) (type=value_error.jsondecode; msg=Expecting value; doc={
  "output": {
    "nodes": [
      {
        "id": "rate_reporting_date",
        "type": "financial_term",
        "properties": [
          {
            "key": "description",
            "value": "Rate reporting date"
          }
        ]
      },
      {
        "id": "exchange_difference",
        "type": "financial_term",
        "properties": [
          {
            "key": "description",
            "value": "Exchange difference"
          }
        ]
      },
      {
        "id": "comprehensive_income",
        "type": "financial_term",
        "properties": [
          {
            "key": "description",
            "value": "Comprehensive income"
          }
        ]
      },
      {
        "id": "foreign_currency_translation_reserve",
        "type": "financial_term",
      

An error occurred while processing chunk 8: local variable 'response' referenced before assignment


 17%|█▋        | 9/54 [08:28<43:50, 58.46s/it]  

Successfully processed and stored chunk 9


 19%|█▊        | 10/54 [08:32<30:39, 41.80s/it]

Successfully processed and stored chunk 10


 20%|██        | 11/54 [09:04<27:49, 38.82s/it]

Successfully processed and stored chunk 11


 22%|██▏       | 12/54 [10:16<34:18, 49.00s/it]

Successfully processed and stored chunk 12


ERROR:root:An unexpected error occurred: 1 validation error for _OutputFormatter
__root__
  Expecting property name enclosed in double quotes: line 2450 column 12 (char 50404) (type=value_error.jsondecode; msg=Expecting property name enclosed in double quotes; doc={
  "output": {
    "nodes": [
      {
        "id": "risk_parameter",
        "type": "financial_term",
        "properties": [
          {
            "key": "value",
            "value": "107"
          }
        ]
      },
      {
        "id": "exchange_difference",
        "type": "financial_term",
        "properties": [
          {
            "key": "value",
            "value": "16"
          }
        ]
      },
      {
        "id": "31_december_2023",
        "type": "date",
        "properties": [
          {
            "key": "value",
            "value": "31 December 2023"
          }
        ]
      },
      {
        "id": "4_854",
        "type": "financial_term",
        "properties": [
          {
      

An error occurred while processing chunk 13: local variable 'response' referenced before assignment


ERROR:root:An unexpected error occurred: 1 validation error for _OutputFormatter
output -> rels
  field required (type=value_error.missing)
 26%|██▌       | 14/54 [13:36<45:51, 68.78s/it]

An error occurred while processing chunk 14: local variable 'response' referenced before assignment


 28%|██▊       | 15/54 [13:46<33:13, 51.11s/it]

Successfully processed and stored chunk 15


 30%|██▉       | 16/54 [13:57<24:44, 39.07s/it]

Successfully processed and stored chunk 16


 31%|███▏      | 17/54 [15:00<28:30, 46.24s/it]

Successfully processed and stored chunk 17


 33%|███▎      | 18/54 [15:07<20:40, 34.44s/it]

Successfully processed and stored chunk 18


 35%|███▌      | 19/54 [15:30<18:00, 30.87s/it]

Successfully processed and stored chunk 19


 37%|███▋      | 20/54 [15:53<16:12, 28.59s/it]

Successfully processed and stored chunk 20


 39%|███▉      | 21/54 [16:06<13:12, 24.02s/it]

Successfully processed and stored chunk 21


 41%|████      | 22/54 [18:47<34:35, 64.85s/it]

Successfully processed and stored chunk 22


 43%|████▎     | 23/54 [21:09<45:30, 88.07s/it]

Successfully processed and stored chunk 23


 44%|████▍     | 24/54 [21:16<31:57, 63.93s/it]

Successfully processed and stored chunk 24


 46%|████▋     | 25/54 [21:22<22:25, 46.40s/it]

Successfully processed and stored chunk 25


 48%|████▊     | 26/54 [21:54<19:43, 42.27s/it]

Successfully processed and stored chunk 26


 50%|█████     | 27/54 [22:53<21:14, 47.21s/it]

Successfully processed and stored chunk 27


 52%|█████▏    | 28/54 [23:16<17:18, 39.95s/it]

Successfully processed and stored chunk 28


 54%|█████▎    | 29/54 [23:41<14:43, 35.35s/it]

Successfully processed and stored chunk 29


ERROR:root:An unexpected error occurred: 1 validation error for _OutputFormatter
output -> rels
  field required (type=value_error.missing)
 56%|█████▌    | 30/54 [24:09<13:13, 33.04s/it]

An error occurred while processing chunk 30: local variable 'response' referenced before assignment


 57%|█████▋    | 31/54 [24:16<09:45, 25.45s/it]

Successfully processed and stored chunk 31


ERROR:root:An unexpected error occurred: 1 validation error for _OutputFormatter
output -> rels
  field required (type=value_error.missing)
 59%|█████▉    | 32/54 [24:44<09:33, 26.08s/it]

An error occurred while processing chunk 32: local variable 'response' referenced before assignment


 61%|██████    | 33/54 [24:54<07:26, 21.25s/it]

Successfully processed and stored chunk 33


 63%|██████▎   | 34/54 [24:57<05:18, 15.90s/it]

Successfully processed and stored chunk 34


ERROR:root:An unexpected error occurred: 1 validation error for _OutputFormatter
output -> rels
  field required (type=value_error.missing)
 65%|██████▍   | 35/54 [25:25<06:12, 19.59s/it]

An error occurred while processing chunk 35: local variable 'response' referenced before assignment


 67%|██████▋   | 36/54 [25:29<04:24, 14.72s/it]

Successfully processed and stored chunk 36


 69%|██████▊   | 37/54 [27:19<12:16, 43.33s/it]

Successfully processed and stored chunk 37


 70%|███████   | 38/54 [27:23<08:24, 31.53s/it]

Successfully processed and stored chunk 38


 72%|███████▏  | 39/54 [28:07<08:51, 35.44s/it]

Successfully processed and stored chunk 39


 74%|███████▍  | 40/54 [29:09<10:04, 43.17s/it]

Successfully processed and stored chunk 40


ERROR:root:An unexpected error occurred: 1 validation error for _OutputFormatter
__root__
  Unterminated string starting at: line 10 column 22 (char 180) (type=value_error.jsondecode; msg=Unterminated string starting at; doc={
  "output": {
    "nodes": [
      {
        "id": "page_content",
        "type": "Text",
        "properties": [
          {
            "key": "content",
            "value": "195 009 472 1 744 599 22 526 612 2 317 91 236 715 1 736 681 378 104 1 603 280 2 694 775 21 561 399 2 eunever lanretxe 137 983 4 500 061 376 401 723 55 5 627 922 4 881 625 059 05 218 55 200 1 147 311 2 439 64 080 224 1 910 31 stnemges rehto morf eunever 010 514 52 137 983 4 865 975 2 464 756 991 746 509 472 1 371 522 72 318 247 2 366 07 444 375 1 936 781 416 515 3 042 921 2 675 999 31 481 600 3 eunever latot esnepxe emocni tseretni ten 048 616 01 504 3 890 583 1 518 613 549 384 833 485 741 532 9 359 861 2 283 02 873 61 206 42 497 941 048 806 900 456 5 145 239 emocni gniknab cimalsi dna 74

An error occurred while processing chunk 41: local variable 'response' referenced before assignment


ERROR:root:An unexpected error occurred: 1 validation error for _OutputFormatter
output -> rels
  field required (type=value_error.missing)
 78%|███████▊  | 42/54 [32:33<13:18, 66.57s/it]

An error occurred while processing chunk 42: local variable 'response' referenced before assignment


 80%|███████▉  | 43/54 [32:50<09:26, 51.50s/it]

Successfully processed and stored chunk 43


 81%|████████▏ | 44/54 [33:12<07:08, 42.85s/it]

Successfully processed and stored chunk 44


 83%|████████▎ | 45/54 [34:02<06:43, 44.84s/it]

Successfully processed and stored chunk 45


 85%|████████▌ | 46/54 [34:25<05:06, 38.25s/it]

Successfully processed and stored chunk 46


ERROR:root:An unexpected error occurred: 1 validation error for _OutputFormatter
output -> rels
  field required (type=value_error.missing)
 87%|████████▋ | 47/54 [34:28<03:13, 27.65s/it]

An error occurred while processing chunk 47: local variable 'response' referenced before assignment


 89%|████████▉ | 48/54 [34:35<02:09, 21.61s/it]

Successfully processed and stored chunk 48


ERROR:root:An unexpected error occurred: 1 validation error for _OutputFormatter
__root__
  Unterminated string starting at: line 10 column 22 (char 180) (type=value_error.jsondecode; msg=Unterminated string starting at; doc={
  "output": {
    "nodes": [
      {
        "id": "page_content",
        "type": "Text",
        "properties": [
          {
            "key": "content",
            "value": "022 845 9 981 170 25 031 528 587 899 116 excluding equity security credit risk 270 public bank berhad 2023 integrated annual report5 credit risk continued 5 1 distribution credit exposure continued b geographical analysis continued hong kong malaysia china cambodia country total group rm 000 rm 000 rm 000 rm 000 rm 000 2022 balance sheet exposure cash balance bank gross 13 890 980 3 126 314 2 315 480 2 436 192 21 768 966 reverse repurchase agreement 4 193 4 193 financial asset fair value profit loss 558 083 558 083 derivative financial asset 234 657 28 162 183 745 446 564 financial inves

An error occurred while processing chunk 49: local variable 'response' referenced before assignment


 93%|█████████▎| 50/54 [37:33<03:13, 48.31s/it]

Successfully processed and stored chunk 50


 94%|█████████▍| 51/54 [38:19<02:23, 47.84s/it]

Successfully processed and stored chunk 51


ERROR:root:An unexpected error occurred: 1 validation error for _OutputFormatter
output -> rels
  field required (type=value_error.missing)
 96%|█████████▋| 52/54 [38:46<01:22, 41.46s/it]

An error occurred while processing chunk 52: local variable 'response' referenced before assignment


 98%|█████████▊| 53/54 [39:18<00:38, 38.70s/it]

Successfully processed and stored chunk 53


100%|██████████| 54/54 [40:45<00:00, 45.28s/it]

Successfully processed and stored chunk 54



