In [2]:
!ada credentials update --account=491718866258 --provider=isengard --role=Admin --profile=default --once

2025/03/13 17:19:38 Refreshing aws credentials for default
2025/03/13 17:19:42 Successfully refreshed aws credentials for default


In [3]:
import os
import logging
import boto3

# Configure logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)

# Configuration
AWS_REGION = "us-east-1"
BEDROCK_MODEL = "anthropic.claude-3-sonnet-20240229-v1:0"
CODEBASE_PATH = "/Users/wuzhiche/Workspace/ATE"  # Default path
BATCH_SIZE = 8

# Use boto3 to load credentials from ~/.aws/credentials default profile
session = boto3.Session(profile_name="default")
credentials = session.get_credentials()
if credentials:
    logger.info(f"Loaded credentials from ~/.aws/credentials: Access Key ID = {credentials.access_key[:4]}...")
else:
    logger.warning("No credentials found in ~/.aws/credentials. Configure via AWS CLI ('aws configure').")


2025-03-13 17:19:42,216 - INFO - Found credentials in shared credentials file: ~/.aws/credentials
2025-03-13 17:19:42,217 - INFO - Loaded credentials from ~/.aws/credentials: Access Key ID = ASIA...


In [4]:
import operator
from typing import List, TypedDict, Annotated
from pydantic import BaseModel, Field
from langchain_core.messages import HumanMessage, AIMessage, ToolMessage
from langgraph.graph.message import add_messages

class OpenFilesSchema(BaseModel):
    """Schema for the open_files tool."""
    file_paths: List[str] = Field(description="List of file paths to open, relative to the codebase root.")

class ChatState(TypedDict):
    messages: Annotated[List[HumanMessage | AIMessage | ToolMessage], add_messages]
    all_files_opened: Annotated[List[str], operator.add]


In [5]:
from langchain.tools import Tool, StructuredTool
from langchain_core.utils.function_calling import convert_to_openai_tool
from typing import List

def create_tools(filesystem):
    """Create and return the tools used by the chatbot."""
    
    def open_files(file_paths: List[str]) -> str:
        """Open and return the contents of the specified files (up to 30000 chars each)."""
        return filesystem.read_files(file_paths, max_chars=30000)
    
    tools = [
        Tool(
            name="get_file_structure",
            func=filesystem.get_file_structure,
            description="Retrieve the file structure of the codebase."
        ),
        StructuredTool.from_function(
            func=open_files,
            name="open_files",
            description="Open and retrieve contents of files from the codebase.",
            args_schema=OpenFilesSchema
        )
    ]
    
    return tools, [convert_to_openai_tool(t) for t in tools]


In [6]:
import os
import logging
from typing import List
from pathlib import Path

logger = logging.getLogger(__name__)

class FileSystem:
    def __init__(self, codebase_path: str):
        self.path = Path(codebase_path)
        self.files = self._list_files()

    def _list_files(self) -> List[str]:
        """List all text files in the codebase."""
        text_extensions = {'.java', '.py', '.txt', '.js', '.cpp', '.h', '.yml', '.yaml', '.properties'}
        return [str(f.relative_to(self.path)) for f in self.path.rglob("*") 
                if f.is_file() and f.suffix.lower() in text_extensions]

    def get_file_structure(self) -> str:
        """Generate a nested file structure with total size."""
        total_size = sum(f.stat().st_size for f in self.path.rglob("*") if f.is_file()) / 1024
        structure = f"{self.path.name} ({total_size:.1f}KB, {len(self.files)} files)\n"
        packages = {}
        for file in self.files:
            parts = file.split(os.sep)
            pkg = "/".join(parts[:-1]) if len(parts) > 1 else ""
            if pkg not in packages:
                packages[pkg] = []
            packages[pkg].append(parts[-1])
        for pkg, files in sorted(packages.items()):
            if pkg:
                structure += f"├── {pkg} ({len(files)} files)\n"
            for file in sorted(files):
                structure += f"│   ├── {file}\n" if pkg else f"├── {file}\n"
        return structure.strip()

    def read_files(self, file_paths: List[str], max_chars: int = 30000) -> str:
        """Read content of selected files, trimmed to max_chars, with path cleaning."""
        contents = {}
        cleaned_paths = []

        if isinstance(file_paths, str):
            fp_cleaned = file_paths.replace('\n', '').strip()
            if fp_cleaned.startswith('[') and fp_cleaned.endswith(']'):
                try:
                    import ast
                    cleaned_paths = ast.literal_eval(fp_cleaned)
                    logger.debug("Converted stringified files list: %s", cleaned_paths)
                except (ValueError, SyntaxError) as e:
                    logger.error("Failed to parse stringified file list: %s", str(e))
                    return "No valid file contents retrieved."
            else:
                cleaned_paths = [fp_cleaned]
        elif isinstance(file_paths, list):
            cleaned_paths = file_paths

        for fp in cleaned_paths:
            fp_clean = fp.strip().strip("'\"[]").strip()
            fp_clean = ''.join(c for c in fp_clean if not c.isdigit()).strip().lstrip('. ').strip()
            if not fp_clean or fp_clean == '/':
                logger.warning(f"Skipping invalid file path: {fp}")
                continue
            full_path = self.path / fp_clean
            if full_path.exists() and full_path.is_file():
                try:
                    with open(full_path, "r", encoding="utf-8") as f:
                        content = f.read()
                        contents[fp_clean] = content[:max_chars] + ("..." if len(content) > max_chars else "")
                except Exception as e:
                    logger.error(f"Error reading file {fp_clean}: {str(e)}")
            else:
                logger.warning(f"File not found or not a file: {fp_clean}")
        return "\n\n".join([f"{fp}:\n{cont}" for fp, cont in contents.items()]) if contents else "No valid file contents retrieved."


In [7]:
from typing import Dict, Any
import logging
from langchain_aws import ChatBedrock
from langchain_core.messages import HumanMessage, AIMessage, ToolMessage
from langgraph.graph import StateGraph, END
from langgraph.checkpoint.memory import MemorySaver
from config import session, AWS_REGION, BEDROCK_MODEL

logger = logging.getLogger(__name__)


def route_tools(state: ChatState):
    if not state["messages"]:
        return END
    last_message = state["messages"][-1]
    if hasattr(last_message, "tool_calls") and last_message.tool_calls:
        return "tools"
    return END

class CodeExplorerChatbot:
    def __init__(self, codebase_path: str):
        self.fs = FileSystem(codebase_path)
        self.checkpointer = MemorySaver()
        self.llm = ChatBedrock(
            model_id=BEDROCK_MODEL,
            region_name=session.region_name or AWS_REGION,
            model_kwargs={"temperature": 0.7, "max_tokens": 200000}
        )
        self.tools, openai_tools = create_tools(self.fs)
        self.llm_with_tools = self.llm.bind_tools(openai_tools)
        self._initialize_workflow()

    def _initialize_workflow(self):
        async def agent(state: ChatState, config) -> ChatState:
            response = await self.llm_with_tools.ainvoke(state["messages"], config)
            return {
                "messages": [response]
            }
    
        def execute_tools(state: ChatState) -> ChatState:
            messages = []
            last_message = state["messages"][-1]
            all_files_opened = []
            
            for tool_call in last_message.tool_calls:
                if tool_call["name"] == "open_files":
                    file_paths = tool_call["args"].get("file_paths", [])
                    result = self.tools[1].func(file_paths)
                    all_files_opened.extend(file_paths)
                elif tool_call["name"] == "get_file_structure":
                    result = self.tools[0].func()
                else:
                    result = f"Unknown tool: {tool_call['name']}"
                messages.append(ToolMessage(
                    content=result, 
                    tool_call_id=tool_call["id"]
                ))
            
            return {
                "messages": messages,
                "all_files_opened": all_files_opened
            }

        workflow = StateGraph(ChatState)
        workflow.add_node("agent", agent)
        workflow.add_node("tools", execute_tools)
        workflow.set_entry_point("agent")
        
        workflow.add_conditional_edges(
            "agent",
            route_tools,
            {"tools": "tools", END: END}
        )
        
        workflow.add_edge("tools", "agent")
        self.app = workflow.compile(
            checkpointer=self.checkpointer
        )

2025-03-13 17:19:43,019 - INFO - Found credentials in shared credentials file: ~/.aws/credentials
2025-03-13 17:19:43,019 - INFO - Loaded credentials from ~/.aws/credentials: Access Key ID = ASIA...


In [8]:
a = CodeExplorerChatbot(CODEBASE_PATH)

2025-03-13 17:19:44,298 - INFO - Found credentials in shared credentials file: ~/.aws/credentials


In [9]:
async for message_chunk, metadata in a.app.astream(
    {"messages": [HumanMessage(content="hello")]},
    {"configurable": {"thread_id": "1"}},
    stream_mode="messages",
):
    if message_chunk.content:
        print(message_chunk.content, flush=True)

[{'type': 'text', 'text': 'Hello', 'index': 0}]
[{'type': 'text', 'text': "! I'm here to help you explore", 'index': 0}]
[{'type': 'text', 'text': ' the codebase. Woul', 'index': 0}]
[{'type': 'text', 'text': 'd you like to see the file structure or', 'index': 0}]
[{'type': 'text', 'text': ' open any specific files? I can', 'index': 0}]
[{'type': 'text', 'text': ' assist you by using', 'index': 0}]
[{'type': 'text', 'text': ' the available tools to retrieve information', 'index': 0}]
[{'type': 'text', 'text': ' about the project.\n\nIs', 'index': 0}]
[{'type': 'text', 'text': " there anything specific you'", 'index': 0}]
[{'type': 'text', 'text': 'd like to know about the codebase', 'index': 0}]
[{'type': 'text', 'text': '? I can:\n1', 'index': 0}]
[{'type': 'text', 'text': '. Show you the file structure', 'index': 0}]
[{'type': 'text', 'text': ' using `get_file_', 'index': 0}]
[{'type': 'text', 'text': 'structure`\n2', 'index': 0}]
[{'type': 'text', 'text': '. Open and view contents',

In [None]:
async for message_chunk, metadata in a.app.astream(
    {"messages": [HumanMessage(content="what's the file structure")]},
    {"configurable": {"thread_id": "1"}},
    stream_mode="messages",
):
    if message_chunk.content:
        print(message_chunk.content, flush=True)

In [None]:
a.app.get_state({"configurable": {"thread_id": "1"}})

In [43]:
from typing import Annotated

from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_core.messages import BaseMessage
from typing_extensions import TypedDict
from langchain_aws import ChatBedrock
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import StateGraph
from langgraph.graph.message import add_messages
from langgraph.prebuilt import ToolNode
from langgraph.checkpoint.memory import MemorySaver

memory = MemorySaver()

class State(TypedDict):
    messages: Annotated[list, add_messages]
    c: int


graph_builder = StateGraph(State)

llm = ChatBedrock(
            model_id=BEDROCK_MODEL,
            region_name=session.region_name or AWS_REGION,
            model_kwargs={"temperature": 0.7, "max_tokens": 200000}
        )


def chatbot(state: State, config):
    return {"messages": [llm.invoke(state["messages"])], "c": 1}

def chatbot1(state: State, config):
    return {}


# The first argument is the unique node name
# The second argument is the function or object that will be called whenever
# the node is used.
graph_builder.add_node("chatbot", chatbot)
graph_builder.add_node("chatbot1", chatbot1)
graph_builder.set_entry_point("chatbot")
graph_builder.add_edge("chatbot", "chatbot1")
graph_builder.set_finish_point("chatbot1")
graph = graph_builder.compile(checkpointer=memory)

2025-03-13 17:27:10,072 - INFO - Found credentials in shared credentials file: ~/.aws/credentials


In [44]:
config = {"configurable": {"thread_id": "1"}}
user_input = "Hi there! My name is Will."

# The config is the **second positional argument** to stream() or invoke()!
events = graph.stream(
    {"messages": [{"role": "user", "content": user_input}]},
    config,
    stream_mode="values",
)
for event in events:
    event["messages"][-1].pretty_print()



2025-03-13 17:27:11,150 - INFO - Using Bedrock Invoke API to generate response



Hi there! My name is Will.


2025-03-13 17:27:12,521 - INFO - The message received from Bedrock: Hi Will! It's nice to meet you. How are you doing today?



Hi Will! It's nice to meet you. How are you doing today?


In [45]:
print(graph.get_state(config).values["c"])

1


In [29]:
config = {"configurable": {"thread_id": "1"}}
user_input = "Hi there! what is my name?"

# The config is the **second positional argument** to stream() or invoke()!
events = graph.stream(
    {"messages": [{"role": "user", "content": user_input}]},
    config,
    stream_mode="values",
)
for event in events:
    event["messages"][-1].pretty_print()

2025-03-13 15:14:34,649 - INFO - Using Bedrock Invoke API to generate response



Hi there! what is my name?
a {'messages': [HumanMessage(content='Hi there! My name is Will.', additional_kwargs={}, response_metadata={}, id='8ed62c5e-5cc3-40a4-9be4-ff9e1dc86002'), AIMessage(content="Hi Will, it's nice to meet you! My name is Claude. I'm an AI assistant created by Anthropic.", additional_kwargs={'usage': {'prompt_tokens': 15, 'completion_tokens': 29, 'total_tokens': 44}, 'stop_reason': 'end_turn', 'model_id': 'anthropic.claude-3-sonnet-20240229-v1:0'}, response_metadata={'usage': {'prompt_tokens': 15, 'completion_tokens': 29, 'total_tokens': 44}, 'stop_reason': 'end_turn', 'model_id': 'anthropic.claude-3-sonnet-20240229-v1:0'}, id='run-9802fa31-61a0-409f-b333-5defafbca762-0', usage_metadata={'input_tokens': 15, 'output_tokens': 29, 'total_tokens': 44}), HumanMessage(content='Hi there! My name is Will.', additional_kwargs={}, response_metadata={}, id='1e1762dc-4f01-4a87-b09e-5ca830470645'), AIMessage(content="Hello again Will! As I mentioned, I'm Claude, an AI assista

2025-03-13 15:14:35,694 - INFO - The message received from Bedrock: Will, you've introduced yourself as Will multiple times now. I remember that your name is Will.



Will, you've introduced yourself as Will multiple times now. I remember that your name is Will.


In [33]:
graph.get_state(config)

StateSnapshot(values={'messages': [HumanMessage(content='Hi there! My name is Will.', additional_kwargs={}, response_metadata={}, id='066597c1-1e50-4cdd-abb8-dcd68df6204c'), AIMessage(content="Hi Will, it's nice to meet you! I'm Claude, an AI assistant created by Anthropic. How are you doing today?", additional_kwargs={'usage': {'prompt_tokens': 15, 'completion_tokens': 32, 'total_tokens': 47}, 'stop_reason': 'end_turn', 'model_id': 'anthropic.claude-3-sonnet-20240229-v1:0'}, response_metadata={'usage': {'prompt_tokens': 15, 'completion_tokens': 32, 'total_tokens': 47}, 'stop_reason': 'end_turn', 'model_id': 'anthropic.claude-3-sonnet-20240229-v1:0'}, id='run-1e36f270-a547-44d9-855a-ad18875b454b-0', usage_metadata={'input_tokens': 15, 'output_tokens': 32, 'total_tokens': 47}), HumanMessage(content='Hi there! My name is Will.', additional_kwargs={}, response_metadata={}, id='be447d32-d54e-4dbb-91da-54ccd7a3bf1c'), AIMessage(content="Hello again Will! I'm doing well, thanks for asking. A