In [1]:
# #!pip install chromadb
# %pip install -U langchain langchainhub openai --quiet

# Traceability - output might give different results

#  Initialise GenAI call

In [2]:
# GenAI Variables and models

import os

OPENAI_API_KEY = 'sk-xxx'
VERTEX_API_KEY = "xx"

OPENAI_MODEL_GPT4_0125 = 'gpt-4-0125-preview'
OPENAI_MODEL_VISION_PREVIEW = 'gpt-4-1106-vision-preview'
OPENAI_MODEL_GPT3 = 'gpt-3.5-turbo-0125'

VERTEX_MODEL_GEMINI = 'gemini-1.0-pro'
VERTEX_MODEL_GEMINI_VISION = 'gemini-1.0-pro-vision'

VERTEX_MODEL_ANTROPIC_HAIKU = 'claude-3-haiku@20240307'
VERTEX_MODEL_ANTROPIC_SONNET = 'claude-3-sonnet@20240229'
VERTEX_MODEL_MISTRAL = 'mistralai/Mixtral-8x7B-v0.1'
OPENAI_EMBEDDING_3_LARGE = 'text-embedding-3-large'
OPENAI_EMBEDDING_3_SMALL = 'text-embedding-3-small'
OPENAI_EMBEDDING_ADA_002 = 'text-embedding-ada-002'


os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY


In [3]:
def list_file (dir:str, filter):
    return [os.path.join(root, file)  for root, dirs, files in os.walk(dir) for file in files if filter in file]

# Initilise ChromaDB

In [4]:
import chromadb
from chromadb.utils import embedding_functions
from chromadb.utils.embedding_functions import OpenAIEmbeddingFunction

embedding_fn = OpenAIEmbeddingFunction(api_key=os.environ.get('OPENAI_API_KEY'), model_name=OPENAI_EMBEDDING_ADA_002)
# embedding_fn = embedding_functions.DefaultEmbeddingFunction()

class Collections:
    Name: str
    Documents: list
    Metadatas: list
    Ids: list
    count_items: int

    def __init__ (self, Name, Documents, Metadatas, Ids):
        if ( 
            len(Documents) != len (Ids) &
            len(Metadatas) != len(Documents)
        ):
            raise Exception("Error: Length of parameters do not match")
        
        self.Name = Name
        self.Documents = Documents
        self.Metadatas = Metadatas
        self.Ids = Ids
        self.count_items = len(Ids)

    def get_count(self):return self.count_items
    def get_Documents(self): return self.Documents
    def get_Metadatas(self): return self.Metadatas
    def get_Ids(self): return self.Ids
    def get_Name(self): return self.Name


class Document_vdb:
    vdb: chromadb
    collections: Collections

    def __init__ (
            self, 
            vdb = None, 
            collections = None, 
            persistent_dir = None
        ):
        if vdb == None:
            if persistent_dir == None:
                self.vdb = chromadb.Client()
            else:
                self.vdb = chromadb.PersistentClient(path = persistent_dir)
        else:
            self.vdb = vdb
        
        if collections != None:
            self.set_collections(collections)
        else: self.collections = None

    def get_vdb (self):
        return self.vdb
    
    def get_collection(
            self, 
            collection_name:str
        ):
        return self.vdb.get_collection(name = collection_name )

    def set_collections(
            self, 
            collections:Collections, 
            emb_fn = None
        ):

        if emb_fn == None: emb_fn = embedding_fn
        count_items = collections.get_count()

        collection = self.vdb.get_or_create_collection(
            name=collections.get_Name(), 
            embedding_function=emb_fn 
        )
        
        Documents = collections.get_Documents()
        Metadatas = collections.get_Metadatas()
        Ids = collections.get_Ids()

        for i in range(0, count_items):
            collection.add( 
                documents = Documents[i],
                metadatas = Metadatas[i],
                ids = Ids[i]
            )


In [5]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter, TokenTextSplitter
import hashlib
import uuid 
import pypdf


text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size=2000,
    chunk_overlap=0,
    length_function=len,
    is_separator_regex=False,
)

text_splitter_token = TokenTextSplitter(chunk_size=500, chunk_overlap=0)

def create_collections_dir (dir, collection_name) -> Collections:
    files = list_file(dir, ".pdf")
    Metadatas = []
    Ids = []
    Documents = []

    for file in files:
        with open(file, 'rb') as f:
            hash = str(hashlib.md5(f.read()).hexdigest())

        pdfFileObject = open(file, 'rb')
        pdfReader = pypdf.PdfReader(pdfFileObject)
        count = len(pdfReader.pages)

        for i in range(count):
            metadata = {
                'source': file,
                'page': i,
                'source_hash': hash,
            }
            page_chunks = text_splitter.split_text (pdfReader.pages[i].extract_text())
            for page_chunk in page_chunks:
                Ids.append(uuid.uuid1().hex)
                Documents.append(page_chunk)
                Metadatas.append(metadata)

    return Collections(
        Name = collection_name ,
        Documents=Documents,
        Metadatas=Metadatas,
        Ids=Ids
    )


def create_collection_token (file, collection_name) -> Collections:
    Metadatas = []
    Ids = []
    Documents = []

    with open(file, 'rb') as f:
        hash = str(hashlib.md5(f.read()).hexdigest())

    pdfFileObject = open(file, 'rb')
    pdfReader = pypdf.PdfReader(pdfFileObject)
    count = len(pdfReader.pages)

    for i in range(count):
        metadata = {
            'source': file,
            'page': i,
            'source_hash': hash,
        }
        page_chunks = text_splitter_token.split_text (pdfReader.pages[i].extract_text())
        for page_chunk in page_chunks:
            Ids.append(uuid.uuid1().hex)
            Documents.append(page_chunk)
            Metadatas.append(metadata)

    return Collections(
        Name = collection_name ,
        Documents=Documents,
        Metadatas=Metadatas,
        Ids=Ids
    )



def create_collection_char (file, collection_name) -> Collections:
    Metadatas = []
    Ids = []
    Documents = []

    with open(file, 'rb') as f:
        hash = str(hashlib.md5(f.read()).hexdigest())

    pdfFileObject = open(file, 'rb')
    pdfReader = pypdf.PdfReader(pdfFileObject)
    count = len(pdfReader.pages)

    for i in range(count):
        metadata = {
            'source': file,
            'page': i,
            'source_hash': hash,
        }
        page_chunks = text_splitter.split_text (pdfReader.pages[i].extract_text())
        for page_chunk in page_chunks:
            Ids.append(uuid.uuid1().hex)
            Documents.append(page_chunk)
            Metadatas.append(metadata)

    return Collections(
        Name = collection_name ,
        Documents=Documents,
        Metadatas=Metadatas,
        Ids=Ids
    )


In [6]:
# from langchain.embeddings import OpenAIEmbeddings


# dir = r'/Users/peterwirija/Documents/GenAI/Data/Tan_Hooi_Ling'
# persist_dir = r'/Users/peterwirija/Documents/GenAI/Data/Tan_Hooi_Ling/chromadb_THL_pages_token'
# collection_name = "Tan_Hooi_Ling"
# files = list_file( dir, ".pdf")
# no_files = len(files)
# i = 1

# for file in files:
#     print (f"===== Processing file {i} / {no_files} -- {file}")
#     FS_collection  = create_collection_token(file, collection_name=f"{collection_name}_{i}")
#     test = Document_vdb(collections= FS_collection, persistent_dir= persist_dir)
#     i = i + 1

In [7]:
# from langchain.embeddings import OpenAIEmbeddings


# dir = r'/Users/peterwirija/Documents/GenAI/Data/Tan_Hooi_Ling'
# persist_dir_char = r'/Users/peterwirija/Documents/GenAI/Data/Tan_Hooi_Ling/chromadb_THL_pages_char'
# collection_name = "Tan_Hooi_Ling"
# files = list_file( dir, ".pdf")
# no_files = len(files)
# i = 1

# for file in files:
#     print (f"===== Processing file {i} / {no_files} -- {file}")
#     FS_collection  = create_collection_char(file, collection_name=f"{collection_name}_{i}")
#     test = Document_vdb(collections= FS_collection, persistent_dir= persist_dir_char)
#     i = i + 1

# Summarising Tan Hooi Ling

In [8]:
persist_dir_char = r'/Users/peterwirija/Documents/GenAI/Data/Tan_Hooi_Ling/chromadb_THL_pages_char'
persist_dir_token = r'/Users/peterwirija/Documents/GenAI/Data/Tan_Hooi_Ling/chromadb_THL_pages_token'

from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI
from langchain.retrievers.multi_query import MultiQueryRetriever


# OpenAIEmbeddings = OpenAIEmbeddings(api_key=os.environ.get('OPENAI_API_KEY'), model_name=OPENAI_EMBEDDING_ADA_002)
persist_dir = r'/Users/peterwirija/Documents/GenAI/Data/Tan_Hooi_Ling/chromadb_THL_pages_token'


llm = ChatOpenAI(model_name=OPENAI_MODEL_GPT4_0125, temperature=0)

client = chromadb.PersistentClient(path=persist_dir_char)  # or HttpClient()
collections = client.list_collections()
llm_results = []
customer_name = "Tan Hooi Ling"

for collection in collections:
    print (collection.name)


    vectordb = Chroma(persist_directory=persist_dir, collection_name = collection.name, embedding_function=OpenAIEmbeddings())

    retriever = vectordb.as_retriever( search_type="mmr" , search_kwargs={"k": 5, "include_metadata": True})




    retriever_from_llm = MultiQueryRetriever.from_llm( retriever=retriever, llm=llm)


    question = """

        Assume the role of a research assistant researching the biography of a person. Your current target is {customer_name}.
        You are to review the content in the text below. 

        Task: Review a document about {customer_name} and identify key biographical facts:

        1) Early Life:
            1a) Birthplace & Date
            1b) Hometown
            1c) Siblings (number)
        2) Education:
            2a) Schools attended (names & locations)
            2b) Degrees/Courses studied
            2c) Dates of attendance
        3) Career:
            3a) Employers & Positions
            3b) Employment Dates (start date and end date)


        Output:
         - Output will be markdown format.
         - Each of the 3 section will be a # Heading level 1
         - Refer to the output template in markdown format below

        <Output Template> 
        # Summary of {customer_name}
        <Place the summary of {Customer_Name} here>

        # Early Life       
        <place the {Customer_Name} early Life here in text>

        # Education 
        Output the Education background result in the following markdown format
        | School Name | Location | Course name | Degree type | Start Year | Graduation Year |
        |---|---|---|---|---|---|
        
        # Career 
        Output the career result in the following markdown format
        | Employer Name | Location | Job title | Start Year | End Year |
        |---|---|---|---|---|

        End year = refers to when the person is no longer part of the company


        contrainst:    
            Where you are unable to find the information, state them as "Not Available"
        """
    qa_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)

    # unique_docs = retriever_from_llm.from_llm(query=question)
    result = qa_chain({"query": question})

    llm_results.append (result['result'])

    print ("================ END OF COLLECTION 1 =================")



Tan_Hooi_Ling_4


  warn_deprecated(
Number of requested results 20 is greater than number of elements in index 8, updating n_results = 8


Tan_Hooi_Ling_5


Number of requested results 20 is greater than number of elements in index 2, updating n_results = 2


Tan_Hooi_Ling_2
Tan_Hooi_Ling_1


Number of requested results 20 is greater than number of elements in index 3, updating n_results = 3


Tan_Hooi_Ling_3


Number of requested results 20 is greater than number of elements in index 4, updating n_results = 4


Tan_Hooi_Ling_6


In [9]:

combined_results = "\n\n\n".join ([text for text in llm_results])


prompt_combine_result= rf"""
    You are summarising a set of result from LLM retrieval. The topic is the biography you like to write include {customer_name} early Life, Education, and Career
    Each result of the LLM is separated by three blank lines (\n\n\n).
    The documents were originally formatted in Markdown.and structured in the following manner 
    
    <RESULT STRUCTURE> 
    # Summary of document
    Place the summary of the document here
    
    # Summary of {customer_name}
    <Place a comprehensive summary of {customer_name} here>

    # Early Life       
    <place the {customer_name} early Life here in text>

    # Education 
    Output the Education background result in the following markdown format
    | School Name | Location | Course name | Degree type | Start Year | Graduation Year |
    |---|---|---|---|---|---|
    
    # Career 
    Output the career result in the following markdown format
    | Employer Name | Location | Job title | Start Year | End Year |
    |---|---|---|---|---|
    
    Sort the table by Employer Name

    The following contraints are only for Start Year and End Year Columns:
    - Start Year = Year of start of employment. Only output Start year without month or day. Where Start Year is not available output a 0. Where Start Year is "present" or "current", replace it with the current year
    - End Year = Year of end of employment. Only output End year without month or day. Where End Year is not available output 0. Where End Year is "present" or "current", Replace it with the current year


    Task: 
    review the text between <RESULTS> and <\RESULTS>. 
    Combine the information for each section 
        - # Summary of {customer_name}
        - # Early Life  
        - # Education 
        - # Career 

    <RESULTS>
     {combined_results}
    <\RESULTS>

    contrainst:
        Ensure that # Summary of {customer_name} is comprehensive
        The output should retain the same structure in each section
        Do not review results that are "No information found!!"
        Do not include <RESULTS> or <\RESULTS> in the output
        Start year and End Year should only have Year (YYYY) without month or day
"""

result_summary = llm.invoke(prompt_combine_result)

In [10]:
print (result_summary.content)

# Summary of Tan Hooi Ling

Tan Hooi Ling is a prominent figure in the tech industry, best known as the co-founder of Grab, a leading ride-hailing platform in Southeast Asia. Born and raised in Kuala Lumpur, Malaysia, in a middle-class family, she has demonstrated remarkable dedication and strategic thinking throughout her career. Tan Hooi Ling holds a Bachelor's degree in Mechanical Engineering from the University of Bath and an MBA from Harvard Business School. Before co-founding Grab, she gained significant experience at Eli Lilly, McKinsey & Company, and Salesforce. Despite stepping down from operational roles and the board by the end of 2023, she continues to serve as an adviser to Grab. Tan Hooi Ling is also known for her contributions to the board of Wise (formerly TransferWise) and is celebrated for her commitment to improving lives through technology, her strategic role in Grab's growth, and her passion for adventure and personal growth.

# Early Life

Tan Hooi Ling was born a

In [22]:
import re
import pandas as pd
def MD_to_df (text):
    lines = text.strip().split("\n")
    header = lines[0].strip("|").split("|")
    print (header)
    data = [] 

    # Loop through lines starting from 2
    for line in lines[3:]:
        print (line)
        # Break once we hit an empty line
        if not line.strip():
            break
            
        cols = line.strip("|").split("|")
        row = dict(zip(header, cols))
        data.append(row)
        
    return pd.DataFrame(data)

In [24]:
import pandas as pd


result_string = str(result_summary.content)
results_content = result_string[result_string.find("# Career") + 8:]

employment_history =  MD_to_df (results_content)


print (employment_history)
employment_history[' Start Year '] = employment_history[' Start Year '].astype(int)
employment_history[' End Year '] = employment_history[' End Year '].astype(int) 

employment_history = employment_history.astype({' Start Year ':'int64', ' End Year ':'int64'})

for i, years in employment_history.apply(
    lambda x: range(
        min(x[' Start Year '], x[' End Year ']),
        max(x[' Start Year '], x[' End Year ']) + 1
    ),
    axis='columns'
).items():
    for year in years:
        new_index = len(employment_history.index)
        employment_history.loc[new_index] = employment_history.loc[i].values
        employment_history.loc[new_index, 'year'] = int(year)

employment_history.dropna(inplace = True)
employment_history.reset_index()

# employment_history.loc[(employment_history[' Start Year '] == 0) & (employment_history[' End Year '] != 0) & (employment_history['year'] != 0) ]
employment_history.drop(employment_history.loc[(employment_history[' Start Year '] == 0) & (employment_history[' End Year '] != 0) & (employment_history['year'] != 0) ].index, inplace=True)
employment_history.drop(employment_history.loc[(employment_history[' Start Year '] != 0) & (employment_history[' End Year '] == 0) & (employment_history['year'] != 0) ].index, inplace=True)

employment_history

[' Employer Name ', ' Location ', ' Job title ', ' Start Year ', ' End Year ']
| Grab | Kuala Lumpur, Malaysia & Regional/Singapore | Co-founder, COO, Chief Operating Officer, Technology Leader, various operational and technology leadership roles | 2012 | 2023 |
| McKinsey & Company | Malaysia/Various (Southeast Asia, North America, Latin America, Australia) | Management Consultant, Business Analyst, Associate | 2006 | 2013 |
| Salesforce | San Francisco, USA | Not Available | 2013 | 2015 |
| Wise (formerly TransferWise) | Not Available | Board Member | 0 | 0 |

                   Employer Name   \
0                           Grab    
1             McKinsey & Company    
2                     Salesforce    
3   Wise (formerly TransferWise)    

                                           Location   \
0       Kuala Lumpur, Malaysia & Regional/Singapore    
1   Malaysia/Various (Southeast Asia, North Ameri...   
2                                San Francisco, USA    
3                    

Unnamed: 0,Employer Name,Location,Job title,Start Year,End Year,year
4,Grab,"Kuala Lumpur, Malaysia & Regional/Singapore","Co-founder, COO, Chief Operating Officer, Tec...",2012,2023,2012.0
5,Grab,"Kuala Lumpur, Malaysia & Regional/Singapore","Co-founder, COO, Chief Operating Officer, Tec...",2012,2023,2013.0
6,Grab,"Kuala Lumpur, Malaysia & Regional/Singapore","Co-founder, COO, Chief Operating Officer, Tec...",2012,2023,2014.0
7,Grab,"Kuala Lumpur, Malaysia & Regional/Singapore","Co-founder, COO, Chief Operating Officer, Tec...",2012,2023,2015.0
8,Grab,"Kuala Lumpur, Malaysia & Regional/Singapore","Co-founder, COO, Chief Operating Officer, Tec...",2012,2023,2016.0
9,Grab,"Kuala Lumpur, Malaysia & Regional/Singapore","Co-founder, COO, Chief Operating Officer, Tec...",2012,2023,2017.0
10,Grab,"Kuala Lumpur, Malaysia & Regional/Singapore","Co-founder, COO, Chief Operating Officer, Tec...",2012,2023,2018.0
11,Grab,"Kuala Lumpur, Malaysia & Regional/Singapore","Co-founder, COO, Chief Operating Officer, Tec...",2012,2023,2019.0
12,Grab,"Kuala Lumpur, Malaysia & Regional/Singapore","Co-founder, COO, Chief Operating Officer, Tec...",2012,2023,2020.0
13,Grab,"Kuala Lumpur, Malaysia & Regional/Singapore","Co-founder, COO, Chief Operating Officer, Tec...",2012,2023,2021.0


In [25]:
persist_dir = r'/Users/peterwirija/Documents/GenAI/Data/Tan_Hooi_Ling/chromadb_THL_pages_token'

llm = ChatOpenAI(model_name=OPENAI_MODEL_GPT4_0125, temperature=0)

client = chromadb.PersistentClient(path=persist_dir_char)  # or HttpClient()
collections = client.list_collections()
llm_results = []
combined_result = []

for i in range(0, len(employment_history)):
    for collection in collections:
        # text = "\n\n\n".join ([item for item in collection.get()['documents'] if "hooi ling".lower() in item.lower()])


        for text in collection.get()['documents']:
            if not("hooi ling".lower() in text.lower()): continue
            prompt_employment_SOW= rf"""
                You are an analyst. Your job is to review the text between <START OF TEXT> and <END OF TEXT> below to identify  {customer_name}'s income.
                Income is defined as any salary, shares, bonuses, and benefits. 

                {customer_name} has multiple employment. You are to gather information about a specific employment
                    Employer: {employment_history.iloc[i,0]}
                    Location: {employment_history.iloc[i,1]}
                    Job Title: {employment_history.iloc[i,2]}
                    Year of employment : {employment_history.iloc[i,5]}

                <START OF TEXT>
                {text}
                <END OF TEXT>

                Task: 
                    Review the between <START OF TEXT> and <END OF TEXT>
                    Extract key income information that {customer_name} received during {customer_name} time in {employment_history.iloc[i,0]} for the year {employment_history.iloc[i,5]}. information should include
                    - Salary
                    - Shares
                    - Bonuses
                    - Other benefits
                    Provide a narrative to {customer_name} employment in {employment_history.iloc[i,0]}

                Output:
                    Put the output in markdown format as per the following table:
                    | Employer Name | Location | Job title | Year | Salary from Employment | Bonus from Employment | Shares | Benefit received from Employment  | Narrative |

                    Markdown table Data dictionary:
                    Employer Name =  Employer name
                    Location =  location employer is located at
                    Job Title = Job title at place of employment 
                    Year = Current Year under review
                    Salary from employment  = Salary {customer_name} received for the year that {customer_name} was employed by employer
                    Bonus from Employment = Bonus  {customer_name} received for the year that {customer_name} was employed by employer
                    Shares = Shares  {customer_name} held in the company for the year
                    Benefit from Employment = Benefits, other than Salary, Bonus and shares, receiverd by {customer_name} for the year that {customer_name} was employed by employer

                contrainst:
                    Ensure that Narrative column is comprehensive
                    Do not make up information.
                    Where information is not available state "Not Available"
            """

            result = llm.invoke(prompt_employment_SOW)
            # qa_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)

            # # unique_docs = retriever_from_llm.from_llm(query=question)
            # result = qa_chain({"query": prompt_employment_SOW})
            combined_result.append (result.content)

            print (rf"============== {i} ===============")
            print (result.content)

combined_result

| Employer Name | Location | Job title | Year | Salary from Employment | Bonus from Employment | Shares | Benefit received from Employment | Narrative |
|---------------|----------|-----------|------|------------------------|-----------------------|--------|----------------------------------|-----------|
| Grab | Kuala Lumpur, Malaysia & Regional/Singapore | Co-founder, COO | 2012.0 | Not Available | Not Available | Not Available | Not Available | Tan Hooi Ling co-founded Grab in 2012 alongside fellow Malaysian Anthony Tan. Despite the text providing insights into her role and the early days of Grab, specific income details such as salary, bonuses, shares, and other benefits for the year 2012 are not disclosed. The narrative focuses on her humble beginnings and the choice of a low-key lifestyle despite her significant achievements. |
| Employer Name | Location | Job title | Year | Salary from Employment | Bonus from Employment | Shares | Benefit received from Employment | Narrative |
|

In [None]:

combined_text = "\n\n\n".join ([text for text in combined_result])

OPENAI_MODEL_GPT4_0125 = 'gpt-4-0125-preview'

llm = ChatOpenAI(model_name=OPENAI_MODEL_GPT4_0125 , temperature=0)

prompt_combine_result= rf"""
    You are summarising a set of result from LLM retrieval. The topic is the biography you like to write include {customer_name} early Employment 
    Each result of the LLM is separated by three blank lines (\n\n\n).
    The documents were originally formatted in Markdown.and structured in the following manner 
    
    <RESULT STRUCTURE> 
    # Summary of document
    Place the summary of the document here
    
    
    # Career 
    Output the career result in the following markdown format
    Put the output in markdown format as per the following table:
    | Employer Name | Location | Job title | Year | Salary from Employment | Bonus from Employment | Shares | Benefit received from Employment  | Narrative |

    Markdown table Data dictionary:
    Employer Name =  Employer name
    Location =  location employer is located at
    Job Title = Job title at place of employment 
    Year = Current Year under review
    Salary from employment  = Salary {customer_name} received for the year that {customer_name} was employed by employer
    Bonus from Employment = Bonus  {customer_name} received for the year that {customer_name} was employed by employer
    Shares = Shares  {customer_name} held in the company for the year
    Benefit from Employment = Benefits, other than Salary, Bonus and shares, receiverd by {customer_name} for the year that {customer_name} was employed by employer
    
    Sort the table by Employer Name

    The following contraints are only for Start Year and End Year Columns:
    - Start Year = Year of start of employment. Only output the year without month or day. Where Start Year is "Not Available" replace it with a 0. Where Start Year is "present" or "current", replace it with the current year
    - End Year = Year of end of employment. Only output the year without month or day. Where End Year is "Not Available" replace it with 0. Where End Year is "present" or "current", Replace it with the current year


    Task: 
    review the text between <RESULTS> and <\RESULTS>. 
    Combine the information into a single row where "Employer Name", "Location", "Job Title", and "Year" are the same 

    <RESULTS>
     {combined_text}
    <\RESULTS>

    contrainst:
        Ensure that # Summary of {customer_name} is comprehensive
        The output should retain the same structure in each section
        Do not review results that are "No information found!!"
        Do not include <RESULTS> or <\RESULTS> in the output
        Replace all "Not Available" with a 0
"""

result_employment_SOW = llm.invoke(prompt_combine_result)

print(result_employment_SOW.content)

['interest, (ii) exercise veto rights with respect to certain reserved matters that fundamentally affect the business of the company, (iii) receive the economic benefits\n \nand absorb losses of the Philippine entities in proportion to the amount and value of our investment, and (b) an exclusive call option to purchase all or part of the\n \nequity interests in certain circumstances. In addition, the above-mentioned control-related rights under the Investment Agreement have been included in the\n \nAmended Articles of Incorporation and By-Laws of Grab PH Holdings Inc. The Amended Articles of Incorporation and By-Laws have been approved by the\n \nPhilippines SEC, the relevant terms of the Investment Agreement are memorialized in the Amended Articles of Incorporation and By-Laws which are public\n \nrecords that are binding not only on Grab PH Holdings Inc. and the shareholders but also on third parties in relation to the matters covered thereby. A breach of\n \nthe Investment Agreement