# 如何构建一个基于Lambda的Query Agent Tool

In [None]:
# !pip install pydantic
# !pip install --force-reinstall 'sqlalchemy<2.0.0'
!pip install awscli
!pip install sqlalchemy pymysql pydantic pandas

### 1.创建一个RDS Database

- --vpc-security-group-ids 需要指定为chatbot部署以及本notebook所在的vpc为同一个
- --availability-zone 与notebook在同一个AZ
- --db-subnet-group-name 可以通过下面awscli进行创建

In [None]:
!aws rds create-db-subnet-group \
    --db-subnet-group-name {your-db-subnet-group-name} \
    --db-subnet-group-description "Your DB Subnet Group Description" \
    --subnet-ids {subnet-1} {subnet-2}

In [None]:
!aws rds create-db-instance \
    --db-instance-identifier my-rds-instance2 \
    --allocated-storage 50 \
    --db-instance-class db.r6g.large \
    --engine mysql \
    --master-username ybalbert \
    --master-user-password 12abcd3456 \
    --vpc-security-group-ids sg-0ae78936a18523497 \
    --availability-zone us-west-2a \
    --db-subnet-group-name testpydantic

### 2. 创建表并注入数据

#### 2.1 连接数据库

In [20]:
from sqlalchemy import create_engine, text, Column, Integer, String, MetaData, Table, Sequence, or_
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import Session
import pandas as pd

db_username = "ybalbert"
db_password = "12****3456"
db_host = "my-rds-instance2.cw6yhl7xqafj.us-west-2.rds.amazonaws.com"
db_port = "3306"
db_name = "simple_info_db"

connection_string = f"mysql+pymysql://{db_username}:{db_password}@{db_host}:{db_port}"
engine = create_engine(connection_string)

with engine.connect() as connection:
    result = connection.execute(text(f"CREATE DATABASE IF NOT EXISTS {db_name}"))

In [None]:
new_db_connection_string = f"mysql+pymysql://{db_username}:{db_password}@{db_host}:{db_port}/{db_name}"
new_db_engine = create_engine(new_db_connection_string)

print(new_db_connection_string)

Base = declarative_base()
session = Session(bind=new_db_engine)

# 定义模型类
class GTMS_SQLAlchemy(Base):
    __tablename__ = 'gtms_employee'
    id = Column(Integer, primary_key=True, autoincrement=True)
    service = Column(String(64), nullable=True)
    employee = Column(String(64), nullable=True)

# 创建表
Base.metadata.create_all(bind=new_db_engine)

# 使用模型添加数据
df = pd.read_csv("gtms.csv")
for index, row in df.iterrows():
    user_instance = GTMS_SQLAlchemy(service=row["Service"], employee=row["Employee"])
    session.add(user_instance)

session.commit()

#### 2.2 验证数据摄入成功与否

In [16]:
result = session.query(GTMS_SQLAlchemy).first()
print(result.service, result.employee)

SageMaker Amanda


### 3.构建查询的Lambda

#### 3.1 构建代码zip包

In [54]:
!mkdir lambda_code

In [57]:
%%writefile lambda_code/lambda_function.py
import json
from pydantic import BaseModel, validator, ValidationError
from typing import Optional
from sqlalchemy import create_engine, text, Column, Integer, String, MetaData, Table, Sequence, or_
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import Session

db_username = "ybalbert"
db_password = "12abcd3456"
db_host = "my-rds-instance2.cw6yhl7xqafj.us-west-2.rds.amazonaws.com"
db_port = "3306"
db_name = "simple_info_db"
new_db_connection_string = f"mysql+pymysql://{db_username}:{db_password}@{db_host}:{db_port}/{db_name}"
new_db_engine = create_engine(new_db_connection_string)

Base = declarative_base()
session = Session(bind=new_db_engine)

# 定义模型类
class GTMS_SQLAlchemy(Base):
    __tablename__ = 'gtms_employee'
    id = Column(Integer, primary_key=True, autoincrement=True)
    service = Column(String(64), nullable=True)
    employee = Column(String(64), nullable=True)

class GTMS(BaseModel):
    service: Optional[str] = None
    employee: Optional[str] = None
    
    @validator('service')
    def service_check(cls, v):
        if v not in ['sagemaker', 'emr', 'msk', 'personalize']:
            raise ValueError("service should be in ['sagemaker', 'emr', 'msk', 'personalize']")
        return v

def lambda_handler(event, context):
    param = event.get('param')
    
    gtms_obj = None
    try:
        gtms_obj = GTMS(**param)
    except ValidationError as e:
        return {
            'statusCode': 500,
            'body': e.json()
        }
    
    gtms_sqlalchemy = GTMS_SQLAlchemy(**gtms_obj.dict())
    
    results = session.query(GTMS_SQLAlchemy).filter(or_(GTMS_SQLAlchemy.service == gtms_sqlalchemy.service, GTMS_SQLAlchemy.employee == gtms_sqlalchemy.employee)).all()
    
    plain_result = "\n".join([ f"{idx}. {item.employee}负责{item.service}." for idx, item in enumerate(results)])
    
    return {
        'statusCode': 200,
        'body': plain_result
    }

Overwriting lambda_code/lambda_function.py


In [None]:
!sh create_lambda.sh

#### 3.2 创建部署lambda

+ 创建role

In [None]:
!aws iam create-role \
    --role-name AgentLambdaRole \
    --assume-role-policy-document '{"Version": "2012-10-17", "Statement": [{"Effect": "Allow", "Principal": {"Service": "lambda.amazonaws.com"},"Action": "sts:AssumeRole"}]}'

In [None]:
!aws iam create-policy --policy-name MyCustomPolicy --policy-document file://lambda_role_policy.json

In [70]:
!aws iam attach-role-policy --role-name AgentLambdaRole \
    --policy-arn arn:aws:iam::106839800180:policy/MyCustomPolicy

In [None]:
!aws lambda create-function \
    --function-name YourFunctionName \
    --runtime python3.10 \
    --role arn:aws:iam::YourAccountID:role/YourExecutionRole \
    --handler lambda_function.lambda_handler \
    --zip-file fileb://lambda_code/my_deployment_package.zip \
    --vpc-config SubnetIds=[子网ID列表],SecurityGroupIds=[安全组ID列表]
    --region your-region

- 需要把lambda部署在vpc中，因为db也是在vpc中，否则会出现超时

In [71]:
!aws lambda create-function --function-name query_gtms \
    --zip-file fileb://lambda_code/my_deployment_package.zip --runtime python3.10 \
    --handler lambda_function.lambda_handler --timeout 10 --region us-west-2 \
    --role arn:aws:iam::106839800180:role/AgentLambdaRole

{
    "FunctionName": "query_gtms",
    "FunctionArn": "arn:aws:lambda:us-west-2:106839800180:function:query_gtms",
    "Runtime": "python3.10",
    "Role": "arn:aws:iam::106839800180:role/AgentLambdaRole",
    "Handler": "lambda_function.lambda_handler",
    "CodeSize": 51072988,
    "Description": "",
    "Timeout": 10,
    "MemorySize": 128,
    "LastModified": "2023-12-05T11:42:05.681+0000",
    "CodeSha256": "R1FXoUQpjjg9zCkrWtxmAoBl3PBs4G78MiPmcNS4KY0=",
    "Version": "$LATEST",
    "TracingConfig": {
        "Mode": "PassThrough"
    },
    "RevisionId": "c771671f-874e-450a-aaee-d0593c67f97c",
    "State": "Pending",
    "StateReason": "The function is being created.",
    "StateReasonCode": "Creating",
    "PackageType": "Zip",
    "Architectures": [
        "x86_64"
    ],
    "EphemeralStorage": {
        "Size": 512
    },
    "SnapStart": {
        "ApplyOn": "None",
        "OptimizationStatus": "Off"
    },
    "RuntimeVersionConfig": {
        "RuntimeVersionArn": "arn:

### 4. 测试pydantic框架带来的接口报错机制

In [73]:
from pydantic import BaseModel, validator, ValidationError
from typing import Optional

class GTMS(BaseModel):
    service: Optional[str] = None
    employee: Optional[str] = None

    @validator('service')
    def service_check(cls, v):
        if v not in ['sagemaker', 'emr', 'msk', 'personalize']:
            raise ValueError("service should be in ['sagemaker', 'emr', 'msk', 'personalize']")
        return v

obj = {
    "service" : "sagemakers"
}

try:
    gtms_obj = GTMS(**obj)
except ValidationError as e:
    print(e.json())

gtms_obj = GTMS(**obj)

[
  {
    "loc": [
      "service"
    ],
    "msg": "service should be in ['sagemaker', 'emr', 'msk', 'personalize']",
    "type": "value_error"
  }
]


ValidationError: 1 validation error for GTMS
service
  service should be in ['sagemaker', 'emr', 'msk', 'personalize'] (type=value_error)