In [1]:
import warnings
warnings.filterwarnings("ignore")

import os
import yaml
import json
import numpy as np
from pprint import pprint
from collections import defaultdict
import cv2
import random
random.seed(0)

from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain_core.documents import Document
from langchain.load import dumps, loads

In [2]:
with open("../creds.yaml", "r") as f:
    creds = yaml.safe_load(f)

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGCHAIN_API_KEY"] = creds["LANGCHAIN"]["LANGCHAIN_API_KEY"]

os.environ["OPENAI_API_KEY"] = creds["OPENAI"]["OPENAI_API_KEY"]
os.environ["OPENAI_BASE_URL"] = "https://pro.aiskt.com/v1"

os.environ["LANGCHAIN_PROJECT"]="LLM Agent"

In [3]:
def get_video_duration(video_path):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        print("Error: Could not open video.")
        return None
    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    duration = frame_count / fps
    cap.release()
    return frame_count, duration

def time_converter(seconds):
    minutes = seconds // 60
    remaining_seconds = seconds % 60
    return f"{int(minutes)}:{int(remaining_seconds)}"

def build_corpus(vid, raw_pred):
    frame_count, duration = get_video_duration(vid)

    doc = []
    for event in raw_pred:
        for i, sent in enumerate(event["sentence"]):
            timestamp = event["timestamp"]
            real_time = list(map(time_converter, timestamp))
            frame_range = list(map(lambda x: int(x / duration * frame_count), timestamp))
            doc.append(Document(
                page_content=sent,
                metadata={
                    "vid": vid,
                    "sent_id": i,
                    "event_id": event["event_id"],
                    "timestamp_s": timestamp[0],
                    "timestamp_e": timestamp[1],
                    "real_time_s": real_time[0],
                    "real_time_e": real_time[1],
                    "frame_range_s": frame_range[0],
                    "frame_range_e": frame_range[1]
                }
            ))
    return doc

In [4]:
# Data model
def get_unique_union(documents: list[list]):
    """ Unique union of retrieved docs """
    # Flatten list of lists, and convert each Document to string
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    # Get unique documents
    unique_docs = list(set(flattened_docs))
    unique_docs = [loads(doc) for doc in unique_docs]
    unique_event = {
        int(doc.metadata["event_id"]): doc for doc in unique_docs
    }
    unique_event_list = list(zip(unique_event.keys(), unique_event.values()))
    unique_event_list.sort(key=lambda x: x[0])
    return [elem[-1] for elem in unique_event_list]

def retrieval_chain(question, retriever):
    retrieved_docs = retriever.invoke(question)
    context = "\n".join(f"Document {i + 1}: {doc.page_content}" for i, doc in enumerate(retrieved_docs))

    template = """
    Task: Medical Document Retrieval

    Introduction:
    In this task, you will be provided with a set of documents as reference that describe various medical scenarios.
    Each document discusses a medical situation from different angles, including symptoms, affected body parts, potential treatments, etc.
    Your objective is to determine which documents are relevant to a given user query.

    Instructions:
    1. Understand the Reference Documents:
        - Each document is indexed with a positive number, following the format: "Document X: ...".
        - Documents are separated by newlines.
        - Ensure you fully comprehend the medical situation described in each document.
    2. Understand the User's Question:
        - Analyze the user's query to identify key concerns, such as symptoms, affected body parts, queried treatments, etc.
    3. Compare and Identify Relevance:
        - Compare the user's question with the content of the provided documents.
        - Identify the documents that can POTENTIALLY address the concerns raised in the user's question as MANY as possible.
    4. Output the Results:
        - Provide the indices of the relevant documents.
        - Separate the indices with commas (e.g., "1, 2").

    Example:
    Reference Documents:
    Document 1: applying pressure on the head with a pillow can help alleviate pressure on the neck and provide relief from neck pain.
    Document 2: performing neck pain and improve blood circulation through the head and shoulders, potentially reducing tension in the neck muscles and improve range of motion.
    Document 3: regular self - massage techniques can be used to alleviate discomfort associated with pinched nerve in the neck.
    Document 4: incorporating isometric neck movements into your routine can reduce discomfort associated with neck pain and improve flexibility.
    Document 5: applying gentle pressure to the neck can help alleviate symptoms of cervical spasms.
    Document 6: taking non-steroidal anti-inflammatory drugs and acetaminophen can help relieve sore throats.
    Input Question:
    "I always suffer from neck pain. Can you give me some suggestions?"
    Answer:
    1, 2, 3, 4, 5

    Reference Documents: {context}
    Input Question: {question}
    """
    prompt = ChatPromptTemplate.from_template(template)
    retrieval_chain = (
        prompt
        | ChatOpenAI(temperature=0)
        | StrOutputParser()
    )

    output = retrieval_chain.invoke({"context": context, "question": question})
    output = output.split("\n")[-1]
    output = output.split(": ")[-1]
    output_idx = [int(elem) - 1 for elem in output.split(", ")]
    return output_idx, retrieved_docs

def pipeline(question, retriever):
    output_idx, retrieved_docs = retrieval_chain(question, retriever)
    answer = [elem for i, elem in enumerate(retrieved_docs) if i in output_idx]
    
    output_vid = {}
    for elem in answer:
        if output_vid.get(elem.metadata["vid"], None):
            output_vid[elem.metadata["vid"]]["answers"].update({
                f"{elem.metadata['real_time_s']}-{elem.metadata['real_time_e']}": {
                    "real_time_s": elem.metadata["real_time_s"],
                    "real_time_e": elem.metadata["real_time_e"],
                }
            })
        else:
            output_vid[elem.metadata["vid"]] = {
                "vid_source": f"https://www.youtube.com/watch?v={os.path.basename(elem.metadata['vid']).rsplit('.mp4')[0]}",
                "answers": {
                    f"{elem.metadata['real_time_s']}-{elem.metadata['real_time_e']}": {
                        "real_time_s": elem.metadata["real_time_s"],
                        "real_time_e": elem.metadata["real_time_e"],
                    }
                }
            }

    if len(output_vid) == 0:
        print(f"Did not find related answers.")
    for i, elem in enumerate(output_vid.values()):
        print(f"Answer {i + 1}: {elem['vid_source']}")

        answers = list(elem["answers"].values())
        answers.sort(key=lambda x: list(map(int, x["real_time_s"].split(":"))))
        for answer in answers:
            print(f"From {answer['real_time_s']} to {answer['real_time_e']}")

In [5]:
# input
vid_dir = "../inputs/test_set"
with open("../captions_ann/final_predictions.json", "r") as f:
    pred = json.load(f)

doc_all = []
for k, v in pred.items():
    v.sort(key=lambda x: x["event_id"])
    doc = build_corpus(os.path.join(vid_dir, f"{k}.mp4"), v)
    doc_all.extend(doc)

In [6]:
#vectorstore = Chroma.from_documents(
#    documents=doc_all, 
#    embedding=OpenAIEmbeddings(),
#    persist_directory="../chroma_db"
#)
vectorstore = Chroma(
    persist_directory="../chroma_db",
    embedding_function=OpenAIEmbeddings()
)
retriever = vectorstore.as_retriever(
    search_type="mmr",
    search_kwargs={"lambda_mult": 0.1, "k": 10},
)

In [7]:
question = "Are there any medical machines available?"
pipeline(question, retriever)

Answer 1: https://www.youtube.com/watch?v=mL-50vtXHdo&t=47s
From 0:2 to 1:4
Answer 2: https://www.youtube.com/watch?v=Cec1-mcZOp8
From 0:15 to 0:41
Answer 3: https://www.youtube.com/watch?v=kSpggqOLgaU
From 5:9 to 6:58
Answer 4: https://www.youtube.com/watch?v=LqMs2kA5Y88
From 2:21 to 2:52
