In [None]:
import pandas as pd 
import os 
import requests

!pip install anthropic

# Module One: Interacting with state-of-the-art LLMs

In [None]:
# Import the Anthropic Python SDK
import anthropic

# Your Anthropic API key (get one at https://console.anthropic.com/)
API_KEY = "your-anthropic-api-key"

# Which Claude model to use. Options include:
#   - "claude-opus-4-20250514"   (most capable, best for complex tasks)
model = "claude-opus-4-5-20251101"

# Conversation history is a list of message dictionaries.
# Each message has:
#   - "role": either "user" (you) or "assistant" (Claude)
#   - "content": the text of the message
# The conversation alternates between user and assistant messages.
# Claude will generate a response continuing from the last user message.
conversation = [
    {"role": "user", "content": "XXX"},
]

# Create the Anthropic client with your API key
client = anthropic.Anthropic(api_key=API_KEY)

# Send the conversation to Claude and get a response
# Parameters:
#   - model: which Claude model to use
#   - max_tokens: maximum length of Claude's response (required)
#   - messages: the conversation history
response = client.messages.create(
    model=model,
    max_tokens=4096,
    messages=conversation,
)

# The response object contains Claude's reply in response.content
# response.content is a list of content blocks; for text, access .text
print(response.content[0].text)

# Module Two: Tool Use (Function Calling) with Agents

In this module, we'll teach Claude to **use tools**—external functions it can call to retrieve real data.

## What is Tool Use?

LLMs like Claude can only work with the information in their training data or what you provide in the prompt. But what if you want Claude to:
- Look up live data from a database?
- Query an API?
- Perform calculations with real numbers?

**Tool use** (also called "function calling") solves this. You:
1. Define tools (functions) that Claude can request to call
2. Claude decides when it needs a tool and returns a structured request
3. Your code executes the function and returns the result to Claude
4. Claude incorporates the result into its final answer

## Our Example: DepMap CRISPR Gene Effect Lookup

We'll give Claude a tool to query the **DepMap CRISPR gene effect** dataset. This dataset contains scores indicating how essential each gene is to the survival of different cancer cell lines:
- **Negative scores** → the gene is essential (knocking it out kills the cell)
- **Scores near zero** → the gene has little effect
- **Positive scores** → knocking out the gene helps the cell grow

Claude will be able to answer questions like: *"How essential is TP53 in cell line ACH-000001?"*

In [None]:
# =============================================================================
# STEP 1: Define the actual Python functions (the "tools")
# =============================================================================
# These are regular Python functions that do the real work.
# Claude doesn't run these directly—it just asks us to run them.

# URL to the DepMap CRISPR Gene Effect dataset (replace with actual URL)
url = "https://example.com/CRISPRGeneEffect.csv"  # TODO: replace with real URL

def load_crispr_gene_effect(url: str, local_path: str = "data/CRISPRGeneEffect.csv"):
    """
    Cached loader for the CRISPR gene effect matrix.
    Downloads once and saves locally; subsequent calls read from disk.
    
    Returns: pandas DataFrame with cell lines as rows, genes as columns.
    """
    if not os.path.exists(local_path):
        os.makedirs(os.path.dirname(local_path), exist_ok=True)
        print(f"Downloading DepMap data from:\n  {url}")
        resp = requests.get(url)
        resp.raise_for_status()
        with open(local_path, "wb") as f:
            f.write(resp.content)
        print(f"Saved to {local_path}")
    
    df = pd.read_csv(local_path, index_col=0)  # First column = cell line IDs
    return df


def get_crispr_gene_effect(gene: str, depmap_id: str, url: str = url,
                           local_path: str = "data/CRISPRGeneEffect.csv"):
    """
    Look up the CRISPR gene effect score for a specific gene in a specific cell line.
    
    Args:
        gene: HGNC gene symbol (e.g., "TP53", "KRAS", "BRCA1")
        depmap_id: DepMap cell line ID (e.g., "ACH-000001")
        url: URL to download the dataset from
        local_path: Where to cache the downloaded file
    
    Returns:
        float: The gene effect score, or None if not found.
               Negative = essential, Zero = no effect, Positive = beneficial to knock out
    """
    df = load_crispr_gene_effect(url, local_path)

    if gene not in df.columns:
        return f"Error: Gene '{gene}' not found in the dataset."

    if depmap_id not in df.index:
        return f"Error: Cell line '{depmap_id}' not found in the dataset."

    val = df.loc[depmap_id, gene]

    try:
        return float(val)
    except:
        return None

In [None]:
# =============================================================================
# STEP 2: Define the tool schema for Claude
# =============================================================================
# Claude needs a JSON schema describing what tools are available.
# This tells Claude:
#   - The tool's name
#   - What it does (description)
#   - What parameters it accepts (with types and descriptions)

tools = [
    {
        "name": "get_crispr_gene_effect",
        "description": """Look up the CRISPR gene effect score for a specific gene 
in a specific cancer cell line from the DepMap database.

The score indicates how essential the gene is:
- Negative scores (e.g., -1.0): The gene is essential; knocking it out kills the cell
- Scores near zero: The gene has little effect on cell survival  
- Positive scores: Knocking out the gene may help the cell grow

Use this tool when asked about gene essentiality, gene dependencies, or 
CRISPR knockout effects in cancer cell lines.""",
        "input_schema": {
            "type": "object",
            "properties": {
                "gene": {
                    "type": "string",
                    "description": "The HGNC gene symbol (e.g., 'TP53', 'KRAS', 'BRCA1', 'MYC')"
                },
                "depmap_id": {
                    "type": "string",
                    "description": "The DepMap cell line ID (e.g., 'ACH-000001')"
                }
            },
            "required": ["gene", "depmap_id"]
        }
    }
]

print("Tool schema defined!")
print(f"Available tools: {[t['name'] for t in tools]}")

In [None]:
# =============================================================================
# STEP 3: Send a message to Claude WITH tools available
# =============================================================================
# When we include the `tools` parameter, Claude knows it can request to use them.
# If Claude decides it needs data from a tool, it will return a "tool_use" response
# instead of (or in addition to) a text response.

import anthropic

client = anthropic.Anthropic(api_key=API_KEY)

# Ask Claude a question that requires looking up real data
user_question = "How essential is the gene TP53 in cell line ACH-000001?"

response = client.messages.create(
    model="claude-opus-4-5-20251101",
    max_tokens=1024,
    tools=tools,  # <-- This tells Claude what tools are available
    messages=[
        {"role": "user", "content": user_question}
    ]
)

# Let's examine what Claude returned
print("Stop reason:", response.stop_reason)
print("\nResponse content:")
for block in response.content:
    print(f"  Type: {block.type}")
    if block.type == "text":
        print(f"  Text: {block.text}")
    elif block.type == "tool_use":
        print(f"  Tool: {block.name}")
        print(f"  Input: {block.input}")

## Understanding Claude's Response

When Claude needs to use a tool, the response will have:
- `stop_reason`: `"tool_use"` (instead of `"end_of_turn"`)
- `content`: Contains one or more blocks, which can be:
  - `TextBlock`: Claude's thinking or preamble text
  - `ToolUseBlock`: A request to call a specific tool with specific arguments

**Claude doesn't actually run the tool!** It just tells you *which* tool to call and *what arguments* to use. Your code must:
1. Execute the function with those arguments
2. Send the result back to Claude
3. Let Claude formulate a final answer

In [None]:
# =============================================================================
# STEP 4: Execute the tool and return the result to Claude
# =============================================================================
# This is the "agent loop": 
#   1. Claude requests a tool call
#   2. We execute it
#   3. We send the result back
#   4. Claude gives a final answer (or requests another tool)

def process_tool_call(tool_name: str, tool_input: dict):
    """
    Execute a tool based on Claude's request.
    Maps tool names to actual Python functions.
    """
    if tool_name == "get_crispr_gene_effect":
        return get_crispr_gene_effect(
            gene=tool_input["gene"],
            depmap_id=tool_input["depmap_id"]
        )
    else:
        return f"Error: Unknown tool '{tool_name}'"


# Check if Claude requested a tool
if response.stop_reason == "tool_use":
    # Find the tool use block
    tool_use_block = next(block for block in response.content if block.type == "tool_use")
    
    tool_name = tool_use_block.name
    tool_input = tool_use_block.input
    tool_use_id = tool_use_block.id  # Claude assigns an ID to track this call
    
    print(f"Claude wants to call: {tool_name}")
    print(f"With arguments: {tool_input}")
    
    # Execute the tool
    result = process_tool_call(tool_name, tool_input)
    print(f"\nTool result: {result}")

In [None]:
# =============================================================================
# STEP 5: Send the tool result back to Claude for a final answer
# =============================================================================
# We continue the conversation by adding:
#   1. Claude's response (including the tool_use block) as an "assistant" message
#   2. The tool result as a "user" message with role "tool_result"

# Build the continued conversation
continued_messages = [
    # Original user question
    {"role": "user", "content": user_question},
    
    # Claude's response (which included the tool request)
    {"role": "assistant", "content": response.content},
    
    # The tool result we're providing back to Claude
    {
        "role": "user",
        "content": [
            {
                "type": "tool_result",
                "tool_use_id": tool_use_id,  # Must match the ID from Claude's request
                "content": str(result)       # The actual result from our function
            }
        ]
    }
]

# Send to Claude for final answer
final_response = client.messages.create(
    model="claude-opus-4-5-20251101",
    max_tokens=1024,
    tools=tools,
    messages=continued_messages
)

# Print Claude's final answer
print("Claude's final answer:")
print("-" * 50)
for block in final_response.content:
    if block.type == "text":
        print(block.text)

## Putting It All Together: The Complete Agent Loop

The pattern above is the foundation of all AI agents:

```
User Question → Claude → Tool Request → Execute Tool → Result → Claude → Final Answer
```

In practice, Claude might need to call **multiple tools** or call the **same tool multiple times** before it has enough information. A robust agent uses a loop:

```python
while response.stop_reason == "tool_use":
    # Execute requested tools
    # Send results back
    # Get next response
```

Let's implement this as a reusable function!

In [None]:
# =============================================================================
# STEP 6: Complete Agent Loop Function
# =============================================================================
# A reusable function that handles the full tool-use cycle

def run_agent(user_message: str, tools: list, max_iterations: int = 10):
    """
    Run a complete agent interaction with tool use support.
    
    Args:
        user_message: The user's question or request
        tools: List of tool definitions
        max_iterations: Safety limit to prevent infinite loops
    
    Returns:
        The final text response from Claude
    """
    messages = [{"role": "user", "content": user_message}]
    
    for iteration in range(max_iterations):
        print(f"\n--- Iteration {iteration + 1} ---")
        
        # Call Claude
        response = client.messages.create(
            model="claude-opus-4-5-20251101",
            max_tokens=1024,
            tools=tools,
            messages=messages
        )
        
        print(f"Stop reason: {response.stop_reason}")
        
        # If Claude is done (no more tool calls), return the final answer
        if response.stop_reason == "end_turn":
            final_text = ""
            for block in response.content:
                if block.type == "text":
                    final_text += block.text
            return final_text
        
        # If Claude wants to use tools, process them
        if response.stop_reason == "tool_use":
            # Add Claude's response to message history
            messages.append({"role": "assistant", "content": response.content})
            
            # Process each tool use request
            tool_results = []
            for block in response.content:
                if block.type == "tool_use":
                    print(f"  Calling tool: {block.name}({block.input})")
                    result = process_tool_call(block.name, block.input)
                    print(f"  Result: {result}")
                    
                    tool_results.append({
                        "type": "tool_result",
                        "tool_use_id": block.id,
                        "content": str(result)
                    })
            
            # Add tool results to message history
            messages.append({"role": "user", "content": tool_results})
    
    return "Error: Max iterations reached"


# Test the agent!
print("=" * 60)
print("TESTING THE AGENT")
print("=" * 60)

answer = run_agent(
    "What is the CRISPR gene effect score for TP53 in ACH-000001? Is this gene essential?",
    tools
)

print("\n" + "=" * 60)
print("FINAL ANSWER:")
print("=" * 60)
print(answer)

## Summary: Key Concepts

1. **Tools are just functions** - Any Python function can be exposed as a tool
2. **Schema describes the tool** - Claude needs a JSON schema with name, description, and parameters
3. **Claude requests, you execute** - Claude doesn't run tools; it tells you what to run
4. **Results complete the loop** - Send tool results back so Claude can answer
5. **Agents loop until done** - Keep processing tool calls until `stop_reason == "end_turn"`

## Exercises

Try modifying the code above to:
1. Add a second tool (e.g., `list_cell_lines` that returns available cell line IDs)
2. Ask Claude a question that requires multiple tool calls
3. Add error handling for when the dataset isn't available