In [1]:
import dspy
class SummarizerSignature(dspy.Signature):
    __doc__ = """
        >> **General Instructions**\n
        This **file_diff_content** represents changes made to files in a version control system, such as Git.\n

        **Interpretation of the diff:**\n

        1. **File Information**:\n
            - Each section of the diff corresponds to a specific file.\n
            - The header of each section indicates the file path and the type of change (modification, addition, or deletion).\n

        2. **Change Types**:\n
            - **Modification**: Changes made to an existing file.\n
            - **Addition**: A new file has been created.\n
            - **Deletion**: An existing file has been removed.

        3. **File Hashes**:\n
            - The old and new file hashes represent the state of the file before and after the changes.\n
            - A hash of `0000000` typically indicates that the file did not exist previously (in the case of a new file).\n

        4. **Line Changes**:\n
            - Lines starting with `-` indicate content that was removed or replaced.\n
            - Lines starting with `+` indicate new content that was added.\n
            - Context lines (without `-` or `+`) provide additional information to understand the changes in the surrounding lines.\n

        **How to Interpret the Diff**:\n
        - The diff output is divided into sections, each representing changes to a specific file.\n
        - The lines prefixed with `-` and `+` show the exact changes made to the file content.\n
        - The context lines help in understanding the modifications in the context of the surrounding code or text.

            This diff helps in understanding what changes were made to the files, which lines were added, removed, or modified, and provides a clear view of the modifications in the repository.\n
        >> **Specific Instructions**\n
            - The summary should be in English.\n
            - Provide a short and factoid summary with a short description of the cummalative changes in **walkthrough**.\n
            - Then give the summary of changed content per file in a tabular format in markdown table for **changes_in_tabular_description**.\
            - Do not mention which lines have been changed.\n
            - The change summary in the table should be short and concise.\n
        ### Example:\n
        walkthrough:\n
        The method and logic to calculate the area of the triangle has been changed.\n
        changes_in_tabular_description:\n
        | File Name | Changes |
        | --------- | -------- |
        | test.txt  | The method to calculate the area of the triangle has been changed. |
        | test2.txt | The logic to calculate the area of the triangle has been changed. |
    
    """
    file_diff_content:str = dspy.InputField(
        desc="The diff of the file",
    )
    walkthrough:str = dspy.OutputField(
        desc="The summary of changes made in the PR",
    )
    changes_in_tabular_description:str = dspy.OutputField(
        desc="The Markdown table containing the changes in the file",
    )

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from dotenv import load_dotenv
import os
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")
turbo = dspy.OpenAI(
    model="gpt-4o",
    api_key=api_key,
    max_tokens=3000,
)
dspy.settings.configure(lm=turbo)

In [3]:
class DspyProgramme(dspy.Module):
    def __init__(self, signature):
        super().__init__()
        self.predictor = dspy.TypedChainOfThought(signature)

    def forward(self,context):
        return dspy.Prediction(
            answer=self.predictor(context=context)
        )

In [4]:
summarizer_programme = DspyProgramme(signature=SummarizerSignature)

In [5]:
file_diff = """
    diff --git a/ai/service_desk/api/routers/chat_assist.py b/ai/service_desk/api/routers/chat_assist.py
    index acc97831..9944ad65 100644
    --- a/ai/service_desk/api/routers/chat_assist.py
    +++ b/ai/service_desk/api/routers/chat_assist.py
    @@ -1,11 +1,14 @@
     from typing import Any
    -from fastapi import APIRouter, Request
    +from fastapi import APIRouter, Request, BackgroundTasks
    +from fastapi.encoders import jsonable_encoder
     from service_desk.logger import get_logger
     from service_desk.data_types import ChatAssistRequest, User
     from service_desk.config import init_config
     from service_desk.agents.primary_agent.primary_agent_assist import (
         PrimaryAgentAssist,
     )
    +from core.LogAiModules import LogAiModules, AiLogs
    +import json
    
     log = get_logger(__name__)
     router = APIRouter()
    @@ -16,11 +19,14 @@
         input_dir=None, persist_dir=cfg.rag.persist_dir, embedding_provider_name="openai"
     )
    
    +log_ai_modules = LogAiModules()
    +
    
     @router.post("/api/v1/chat/assist")
     async def category_search_with_rank(
         request: Request,
         body: ChatAssistRequest,
    +    background_tasks: BackgroundTasks,
     ) -> Any:
         json_body = body.model_dump()
         conversation_history = json_body.get("messages")
    @@ -28,6 +34,7 @@ async def category_search_with_rank(
         session_state = json_body.get("session_state")
         request_user_object = json_body.get("user")
         request_org_id = json_body.get("org_id")
    +    session_id = json_body.get("context").get("overrides").get("session_id")
    
         (
             primary_cache,
    @@ -57,4 +64,18 @@ async def category_search_with_rank(
         response["choices"][0]["context"]["followup_questions"] = followup_questions
         response["choices"][0]["context"]["citations"] = citations
         response["choices"][0]["context"]["user"] = response_user_object
    +    ai_logs = AiLogs(
    +        org_id=request_org_id,
    +        session_id=session_id,
    +        app_id="chat-agent-assist",
    +        request_body=json.dumps(json_body),
    +        response_body=json.dumps(jsonable_encoder(response)),
    +    )
    +    background_tasks.add_task(log_ai_modules_task, ai_logs)
         return response
    +
    +
    +async def log_ai_modules_task(ai_logs: AiLogs):
    +    await log_ai_modules.connect()
    +    await log_ai_modules.log_ai_modules(ai_logs=ai_logs)
    +    await log_ai_modules.disconnect()
    diff --git a/ai/service_desk/data_types/__init__.py b/ai/service_desk/data_types/__init__.py
    index f951b82f..f62b6991 100644
    --- a/ai/service_desk/data_types/__init__.py
    +++ b/ai/service_desk/data_types/__init__.py
    @@ -11,6 +11,7 @@ class Message(BaseModel):
     class ContextOverrides(BaseModel):
         top: int = 7
         retrieval_mode: Literal["vectors"] = "vectors"
    +    session_id: Optional[str] = None
         semantic_ranker: bool = True
         semantic_captions: bool = False
         suggest_followup_questions: bool = False
    diff --git a/core/core/LogAiModules.py b/core/core/LogAiModules.py
    new file mode 100644
    index 00000000..2de319f0
    --- /dev/null
    +++ b/core/core/LogAiModules.py
    @@ -0,0 +1,52 @@
    +from prisma import Prisma
    +import logging
    +from pydantic import BaseModel
    +from typing import Any
    +
    +log = logging.getLogger(__name__)
    +
    +
    +class AiLogs(BaseModel):
    +    org_id: str
    +    session_id: str
    +    app_id: str
    +    request_body: Any
    +    response_body: Any
    +
    +
    +class LogAiModules:
    +    def __init__(self):
    +        "Initialize the LogAiModules class with prisma client"
    +        self.prisma = Prisma()
    +
    +    async def connect(self):
    +        "Connects to the PostgreSQL database using the Prisma client."
    +        await self.prisma.connect()
    +        await self.prisma.connect()
    +
    +    async def disconnect(self):
    +        "Disconnects from the PostgreSQL database using the Prisma client."
    +        await self.prisma.disconnect()
    +
    +    async def log_ai_modules(self, ai_logs: AiLogs):
    +        "Logs the request and response body to the database."
    +        try:
    +            await self.prisma.ai_logs.create(
    +                data={
    +                    "org_id": ai_logs.org_id,
    +                    "session_id": ai_logs.session_id,
    +                    "app_id": ai_logs.app_id,
    +                    "request_body": ai_logs.request_body,
    +                    "response_body": ai_logs.response_body,
    +                }
    +            )
    +        except Exception as e:
    +            log.error(f"Error logging AI modules: {e}")
    +            raise e
    diff --git a/core/core/db/prisma/migrations/20240629224932_change_app_id/migration.sql b/core/core/db/prisma/migrations/20240629224932_change_app_id/migration.sql
    new file mode 100644
    index 00000000..99f87af5
    --- /dev/null
    +++ b/core/core/db/prisma/migrations/20240629224932_change_app_id/migration.sql
    @@ -0,0 +1,2 @@
    +-- AlterTable
    +ALTER TABLE "ai_logs" ALTER COLUMN "app_id" SET DATA TYPE TEXT;
    diff --git a/core/core/db/prisma/schema.prisma b/core/core/db/prisma/schema.prisma
    index 804af5f4..d849736e 100644
    --- a/core/core/db/prisma/schema.prisma
    +++ b/core/core/db/prisma/schema.prisma
    @@ -244,7 +244,7 @@ model ai_logs {
       created_at    DateTime @default(now()) @db.Timestamptz(6)
       org_id        String?  @db.Uuid
       session_id    String?  @db.Uuid
    -  app_id        String?  @db.Uuid
    +  app_id        String?  
       request_body  Json?    @db.Json
       response_body Json?    @db.Json
    }
    """

prediction = summarizer_programme.predictor(
    file_diff_content=file_diff
)
summary = prediction.walkthrough
print(summary)

The changes introduce logging functionality for AI modules, including the addition of a new logging class and methods, modifications to existing API routes to incorporate logging, and updates to the database schema to support the new logging requirements.


In [6]:
k = {
    "file_diff": file_diff
}

In [7]:
import json
json.dumps(file_diff)
# dump to file
with open("file_diff.json", "w") as f:
    f.write(json.dumps(k))

In [9]:
from IPython.display import display, Markdown
# render markdown table
display(Markdown(prediction.changes_in_tabular_description))

| File Name | Changes |
| --------- | -------- |
| ai/service_desk/api/routers/chat_assist.py | Added logging functionality for AI modules, including background tasks for logging requests and responses. |
| ai/service_desk/data_types/__init__.py | Added a new optional field `session_id` to the `ContextOverrides` class. |
| core/core/LogAiModules.py | New file created to handle logging of AI modules, including connection to the database and logging methods. |
| core/core/db/prisma/migrations/20240629224932_change_app_id/migration.sql | New migration file to alter the `app_id` column type in the `ai_logs` table. |
| core/core/db/prisma/schema.prisma | Updated the `app_id` column type in the `ai_logs` model from UUID to String. |

In [14]:
prediction

Prediction(
    reasoning="Reasoning: Let's think step by step in order to produce the summary. We observe that the new chunk of code introduces a validation step to ensure that all elements in the input list are either integers or floats. This is done using the `all` function combined with `isinstance`. If any element in the list is not a number, a `ValueError` is raised with an appropriate error message. The rest of the code remains unchanged, where it calculates the sum of the numbers in the list.",
    summary='The new code for the `calculate_sum` function includes an additional validation step to check that all elements in the input list are numbers (either integers or floats). If any element is not a number, the function raises a `ValueError`. This enhancement ensures that the function handles invalid inputs more gracefully, preventing potential errors during the summation process. The core functionality of summing the numbers remains the same.'
)

In [10]:
class OverconfidentJudge(dspy.Signature):
    __doc__ = """
    You must provide your judgment on why and if the given answer is correct with utmost confidence and persuasiveness.\n
    The judgment should be delivered in a manner that exudes confidence and authority.
    """
    case_description: str = dspy.InputField(
        desc="The description of the case",
    )
    case_answer: str = dspy.InputField(
        desc="The answer given for the case",
    )
    judgment: str = dspy.OutputField(
        desc="The final judgment for the case",
    )

In [11]:
overconfident_judge = DspyProgramme(signature=OverconfidentJudge)

overconfident_judge.predictor(
    case_description="you have to provide a summary of changes in the files in a tabular format",
    case_answer=prediction.changes_in_tabular_description,
)

python(4757) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


Prediction(
    reasoning='produce the judgment. We need to evaluate the given answer for its accuracy and completeness in summarizing the changes in the files.\n\n1. **File Name and Changes**: The table provided lists the file names and the corresponding changes made to each file. This is a clear and concise way to present the information.\n2. **Specificity**: Each change is described with sufficient detail to understand what was modified or added. For example, the addition of logging functionality, new fields, and new files are all explicitly mentioned.\n3. **Consistency**: The format is consistent throughout the table, making it easy to read and understand.\n4. **Relevance**: All the changes listed are relevant to the context of the files mentioned. There are no extraneous details that could confuse the reader.',
    judgment='The final judgment for the case is that the given answer is correct. The table accurately and comprehensively summarizes the changes in the files, adhering to