# Statistician Agent
In this notebook we create the statistician sub-agent

#### Ensure the latest version of boto3 is shown below

In [None]:
!pip freeze | grep boto3

#### Load in environment variables to notebook

In [None]:
# Retrieve import path
%store -r IMPORTS_PATH

# Retrieve account info
%store -r account_id
%store -r region

# Retrieve model lists
%store -r agent_foundation_model

#### Retrieve imports environment variable and bring libraries into notebook

In [None]:
%run $IMPORTS_PATH

# Prerequisites

This notebook assumes that you have deployed the CloudFormation stack located at https://github.com/aws-samples/amazon-bedrock-agents-cancer-biomarker-discovery to your AWS account in workshop mode.

# Agent Creation
In this section we create the sub-agent

#### Define agent configuration below 

In [None]:
agent_name = 'Statistician'
agent_description = "scientific analysis for survival analysis"
agent_instruction = """You are a medical research assistant AI specialized in survival analysis with biomarkers. 
Your primary job is to interpret user queries, run scientific analysis tasks, and provide relevant medical insights 
with available visualization tools. Use only the appropriate tools as required by the specific question. 
Follow these instructions carefully: 1. If the user query requires a Kaplan-Meier chart: 
a. Map survival status as 0 for Alive and 1 for Dead for the event parameter. 
b. Use survival duration as the duration parameter. c. Use the /group_survival_data tool to create baseline and 
condition group based on expression value threshold provided by the user. 
2. If a survival regression analysis is needed: a. You need access to all records with columns start with survival 
status as first column, then survival duration, and the required biomarkers. 
b. Use the /fit_survival_regression tool to identify the best-performing biomarker based on the p-value summary. 
c. Ask for S3 data location if not provided, do not assume S3 bucket names or object names. 
3. When you need to create a bar chart or plot: a. Always pass x_values and y_values in Array type to the function. 
If the user says x values are apple,egg and y values are 3,4 or as [apple,egg] and [3,4] pass their value as 
['apple', 'banana'] and [3,4] 4. When providing your response: a. Start with a brief summary of your 
understanding of the user's query. b. Explain the steps you're taking to address the query. 
Ask for clarifications from the user if required. c. If you generate any charts or perform statistical analyses, 
explain their significance in the context of the user's query. d. Conclude with a concise summary of the findings
and their potential implications for medical research. e. Make sure to explain any medical or statistical concepts 
in a clear, accessible manner.""" 

#### Instantiate agent with the desired configuration

In [None]:
agents = AgentsForAmazonBedrock()

scientific_analysis_agent = agents.create_agent(
    agent_name,
    agent_description,
    agent_instruction,
    agent_foundation_model,
    code_interpretation=False,
    verbose=False
)

scientific_analysis_agent

#### Extract useful agent information

In [None]:
scientific_analysis_agent_id = scientific_analysis_agent[0]
scientific_analysis_agent_arn = f"arn:aws:bedrock:{region}:{account_id}:agent/{scientific_analysis_agent_id}"

scientific_analysis_agent_id, scientific_analysis_agent_arn

#### Define functions for first ActionGroup

In [None]:
function_defs = [
    {
      "name": "bar_chart",
      "description": "create a bar chart",
      "parameters": {
        "title": {
          "description": "title of the bar chart",
          "required": True,
          "type": "string"
        },
        "x_label": {
          "description": "title of the x axis",
          "required": True,
          "type": "string"
        },
        "x_values": {
          "description": "values for the x a xis",
          "required": True,
          "type": "array"
        },
        "y_label": {
          "description": "title of the y axis",
          "required": True,
          "type": "string"
        },
        "y_values": {
          "description": "values for the y axis",
          "required": True,
          "type": "array"
        }
      },
      "requireConfirmation": "DISABLED"
    },
]

#### Attach Lambda function and create first ActionGroup for agent
Note: This uses the default Lambda function name "MatPlotBarChartLambda", this could be different in your account so double-check that this function exists and if not change the lambda_function_name in the code below

In [None]:
scientific_analysis_lambda_function_name_1 = "MatPlotBarChartLambda"  # Change if different in your acccount
scientific_analysis_lambda_function_arn_1 = f"arn:aws:lambda:{region}:{account_id}:function:{scientific_analysis_lambda_function_name_1}"
%store scientific_analysis_lambda_function_arn_1

In [None]:
agents.add_action_group_with_lambda(
    agent_name=agent_name,
    lambda_function_name=scientific_analysis_lambda_function_name_1,
    source_code_file=scientific_analysis_lambda_function_arn_1,
    agent_action_group_name="matplotbarchart",
    agent_action_group_description="Creates a bar chart from the given input values",
    agent_functions=function_defs,
    verbose=True
)

#### Add resource based policy to Lambda function to allow agent to invoke

In [None]:
lambda_client = boto3.client('lambda', region)

# Define the resource policy statement
policy_statement = {
    "Sid": "AllowBedrockAgentAccess",
    "Effect": "Allow",
    "Principal": {
        "Service": "bedrock.amazonaws.com"
    },
    "Action": "lambda:InvokeFunction",
    "Resource": scientific_analysis_lambda_function_arn_1,
    "Condition": {
        "ArnEquals": {
            "aws:SourceArn": scientific_analysis_agent_arn
        }
    }
}

try:
    # Get the current policy
    response = lambda_client.get_policy(FunctionName=scientific_analysis_lambda_function_arn_1)
    current_policy = json.loads(response['Policy'])
    
    # Add the new statement to the existing policy
    current_policy['Statement'].append(policy_statement)
    
except lambda_client.exceptions.ResourceNotFoundException:
    # If there's no existing policy, create a new one
    current_policy = {
        "Version": "2012-10-17",
        "Statement": [policy_statement]
    }

# Convert the policy to JSON string
updated_policy = json.dumps(current_policy)

# Add or update the resource policy
response = lambda_client.add_permission(
    FunctionName=scientific_analysis_lambda_function_arn_1,
    StatementId="AllowScientificAnalysisAgentAccess",
    Action="lambda:InvokeFunction",
    Principal="bedrock.amazonaws.com",
    SourceArn=scientific_analysis_agent_arn
)

print("Resource policy added successfully.")
print("Response:", response)

#### Define functions for second ActionGroup

In [None]:
function_defs = [
    {
      "name": "plot_kaplan_meier",
      "description": "Plots a Kaplan-Meier survival chart",
      "parameters": {
        "biomarker_name": {
          "description": "name of the biomarker",
          "required": True,
          "type": "string"
        },
        "duration_baseline": {
          "description": "duration in number of days for baseline",
          "required": True,
          "type": "array"
        },
        "duration_condition": {
          "description": "duration in number of days for condition",
          "required": True,
          "type": "array"
        },
        "event_baseline": {
          "description": "survival event for baseline",
          "required": True,
          "type": "array"
        },
        "event_condition": {
          "description": "survival event for condition",
          "required": True,
          "type": "array"
        }
      },
      "requireConfirmation": "DISABLED"
    },
    {
      "name": "fit_survival_regression",
      "description": "Fit a survival regression model with data in a S3 object",
      "parameters": {
        "bucket": {
          "description": "s3 bucket where the data is stored by the database query tool",
          "required": True,
          "type": "string"
        },
        "key": {
          "description": "json file name that is located in the s3 bucket and contains the data for fitting the model",
          "required": True,
          "type": "string"
        }
      },
      "requireConfirmation": "DISABLED"
    }
]

#### Attach Lambda function and create second Action Group for agent
Note: This uses the default Lambda function name "ScientificPlotLambda", this could be different in your account so double-check that this function exists and if not change the lambda_function_name in the code below

In [None]:
scientific_analysis_lambda_function_name_2 = "ScientificPlotLambda"  # Change if different in your account
scientific_analysis_lambda_function_arn_2 = f"arn:aws:lambda:{region}:{account_id}:function:{scientific_analysis_lambda_function_name_2}"
%store scientific_analysis_lambda_function_arn_2

In [None]:
agents.add_action_group_with_lambda(
    agent_name=agent_name,
    lambda_function_name=scientific_analysis_lambda_function_name_2,
    source_code_file=scientific_analysis_lambda_function_arn_2,
    agent_action_group_name="scientificAnalysisActionGroup",
    agent_action_group_description="Actions for scientific analysis with lifelines library",
    agent_functions=function_defs,
    verbose=True
)

#### Add resource based policy to Lambda function to allow agent to invoke

In [None]:
# Define the resource policy statement
policy_statement = {
    "Sid": "AllowBedrockAgentAccess",
    "Effect": "Allow",
    "Principal": {
        "Service": "bedrock.amazonaws.com"
    },
    "Action": "lambda:InvokeFunction",
    "Resource": scientific_analysis_lambda_function_arn_2,
    "Condition": {
        "ArnEquals": {
            "aws:SourceArn": scientific_analysis_agent_arn
        }
    }
}

try:
    # Get the current policy
    response = lambda_client.get_policy(FunctionName=scientific_analysis_lambda_function_arn_2)
    current_policy = json.loads(response['Policy'])
    
    # Add the new statement to the existing policy
    current_policy['Statement'].append(policy_statement)
    
except lambda_client.exceptions.ResourceNotFoundException:
    # If there's no existing policy, create a new one
    current_policy = {
        "Version": "2012-10-17",
        "Statement": [policy_statement]
    }

# Convert the policy to JSON string
updated_policy = json.dumps(current_policy)

# Add or update the resource policy
response = lambda_client.add_permission(
    FunctionName=scientific_analysis_lambda_function_arn_2,
    StatementId="AllowScientificAnalysisAgentAccess",
    Action="lambda:InvokeFunction",
    Principal="bedrock.amazonaws.com",
    SourceArn=scientific_analysis_agent_arn
)

print("Resource policy added successfully.")
print("Response:", response)

#### Invoke Scientific Analysis Agent Test Alias to see that it answers question properly
Note: This agent needs a supervisor agent to properly answer some questions

In [None]:
bedrock_agent_runtime_client = boto3.client("bedrock-agent-runtime", region)

session_id:str = str(uuid.uuid1())

test_query = "What is the best gene biomarker (lowest p value) with overall survival for patients that have undergone chemotherapy?"

response = bedrock_agent_runtime_client.invoke_agent(
      inputText=test_query,
      agentId=scientific_analysis_agent_id,
      agentAliasId="TSTALIASID", 
      sessionId=session_id,
      enableTrace=True, 
      endSession=False,
      sessionState={}
)

print("Request sent to Agent:\n{}".format(response))
print("====================")
print("Agent processing query now")
print("====================")

# Initialize an empty string to store the answer
answer = ""

# Iterate through the event stream
for event in response['completion']:
    # Check if the event is a 'chunk' event
    if 'chunk' in event:
        chunk_obj = event['chunk']
        if 'bytes' in chunk_obj:
            # Decode the bytes and append to the answer
            chunk_data = chunk_obj['bytes'].decode('utf-8')
            answer += chunk_data

# Now 'answer' contains the full response from the agent
print("Agent Answer: {}".format(answer))
print("====================")

#### Now that agent has been tested via direct invoke, prepare it by creating an alias

In [None]:
scientific_analysis_agent_alias_id, scientific_analysis_agent_alias_arn = agents.create_agent_alias(
    scientific_analysis_agent[0], 'v1'
)

%store scientific_analysis_agent_alias_arn
scientific_analysis_agent_alias_id, scientific_analysis_agent_alias_arn