# 如何构建一个基于Lambda的Query Agent Tool

!pip install awscli
!pip install sqlalchemy pymysql pydantic pandas

### 0. 测试pydantic框架带来的接口报错机制

In [None]:
from pydantic import BaseModel, validator, ValidationError
from typing import Optional

class GTMS(BaseModel):
    service: Optional[str] = None
    employee: Optional[str] = None

    @validator('service')
    def service_check(cls, v):
        if v not in ['sagemaker', 'emr', 'msk', 'personalize']:
            raise ValueError("service should be in ['sagemaker', 'emr', 'msk', 'personalize']")
        return v

obj = {
    "service" : "sagemakers"
}

try:
    gtms_obj = GTMS(**obj)
except ValidationError as e:
    print(e.json())

gtms_obj = GTMS(**obj)

### 1.创建一个RDS Database

- --vpc-security-group-ids 需要指定为chatbot部署以及本notebook所在的vpc为同一个
- --availability-zone 与notebook在同一个AZ
- --db-subnet-group-name 可以通过下面awscli进行创建

#### 1.1 设定变量

In [None]:
db_subnet_group_name = "ag****"
db_username = "yb****t"
db_password = "12****56"
db_az = 'us-****-2a'
db_instance_name = "agent-****-tance"

region = 'us-****-2'
vpc = "vpc-03e6******cbaf"
subnet_ids = ["subnet-1", "subnet-2", "subnet-3", "subnet-4"]
vpc_security_group_ids = "sg-0*******497"

#### 1.2 创建DB instance

In [None]:
!aws rds create-db-subnet-group \
    --db-subnet-group-name {db_subnet_group_name} \
    --db-subnet-group-description "DB Subnet Group For Agent" \
    --subnet-ids {" ".join(subnet_ids)}

In [None]:
!aws rds create-db-instance \
    --db-instance-identifier {db_instance_name} \
    --allocated-storage 50 \
    --db-instance-class db.r6g.large \
    --engine mysql \
    --master-username {db_username} \
    --master-user-password {db_password} \
    --vpc-security-group-ids {vpc_security_group_ids} \
    --availability-zone {db_az} \
    --db-subnet-group-name {db_subnet_group_name}

### 2. 创建表并注入数据

#### 2.1 连接数据库
- 根据创建数据库的返回，指定db_host，db_port
- 自行指定db_name，db_table_name

In [None]:
db_host = "my-rds-instance2.********.us-****-2.rds.amazonaws.com"
db_port = "****"
db_name = "sim***_****_db"
db_table_name = 'gtms_employee'

In [None]:
from sqlalchemy import create_engine, text, Column, Integer, String, MetaData, Table, Sequence, or_
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import Session
import pandas as pd

connection_string = f"mysql+pymysql://{db_username}:{db_password}@{db_host}:{db_port}"
engine = create_engine(connection_string)

with engine.connect() as connection:
    result = connection.execute(text(f"CREATE DATABASE IF NOT EXISTS {db_name}"))

In [None]:
new_db_connection_string = f"mysql+pymysql://{db_username}:{db_password}@{db_host}:{db_port}/{db_name}"
new_db_engine = create_engine(new_db_connection_string)

print(new_db_connection_string)

Base = declarative_base()
session = Session(bind=new_db_engine)

# 定义模型类
class GTMS_SQLAlchemy(Base):
    __tablename__ = db_table_name
    id = Column(Integer, primary_key=True, autoincrement=True)
    service = Column(String(64), nullable=True)
    employee = Column(String(64), nullable=True)

# 创建表
Base.metadata.create_all(bind=new_db_engine)

# 使用模型添加数据, 需要
df = pd.read_csv("data.csv")
for index, row in df.iterrows():
    user_instance = GTMS_SQLAlchemy(service=row["Service"], employee=row["Employee"])
    session.add(user_instance)

session.commit()

#### 2.2 验证数据摄入成功与否

In [None]:
result = session.query(GTMS_SQLAlchemy).first()
print(result.service, result.employee)

### 3.构建查询的Lambda

#### 3.1 构建代码zip包
- 需要注意把lambda_function.py中的一些变量手动替换上去
  + db_username
  + db_password
  + db_host
  + db_port
  + db_name

In [None]:
!mkdir lambda_code

In [None]:
%%writefile lambda_code/lambda_function.py
import json
from pydantic import BaseModel, validator, ValidationError
from typing import Optional
from sqlalchemy import create_engine, text, Column, Integer, String, MetaData, Table, Sequence, or_
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import Session

# db_username = {db_username}
# db_password = {db_password}
# db_host = {db_host}
# db_port = {db_port}
# db_name = {db_name}
# db_table_name = {db_table_name}

new_db_connection_string = f"mysql+pymysql://{db_username}:{db_password}@{db_host}:{db_port}/{db_name}"
new_db_engine = create_engine(new_db_connection_string)

Base = declarative_base()
session = Session(bind=new_db_engine)

# 定义模型类
class GTMS_SQLAlchemy(Base):
    __tablename__ = db_table_name
    id = Column(Integer, primary_key=True, autoincrement=True)
    service = Column(String(64), nullable=True)
    employee = Column(String(64), nullable=True)

class GTMS(BaseModel):
    service: Optional[str] = None
    employee: Optional[str] = None
    
    @validator('service')
    def service_check(cls, v):
        if v not in ['sagemaker', 'emr', 'msk', 'personalize']:
            raise ValueError("service should be in ['sagemaker', 'emr', 'msk', 'personalize']")
        return v

def lambda_handler(event, context):
    param = event.get('param')
    
    gtms_obj = None
    try:
        gtms_obj = GTMS(**param)
    except ValidationError as e:
        return {
            'statusCode': 500,
            'body': e.json()
        }
    
    gtms_sqlalchemy = GTMS_SQLAlchemy(**gtms_obj.dict())
    
    results = session.query(GTMS_SQLAlchemy).filter(or_(GTMS_SQLAlchemy.service == gtms_sqlalchemy.service, GTMS_SQLAlchemy.employee == gtms_sqlalchemy.employee)).all()
    
    plain_result = "\n".join([ f"{idx}. {item.employee}负责{item.service}." for idx, item in enumerate(results)])
    
    return {
        'statusCode': 200,
        'body': plain_result
    }

In [None]:
!sh create_lambda.sh

#### 3.2 创建部署lambda

+ 创建role

In [None]:
!aws iam create-role \
    --role-name AgentLambdaRole \
    --assume-role-policy-document '{"Version": "2012-10-17", "Statement": [{"Effect": "Allow", "Principal": {"Service": "lambda.amazonaws.com"},"Action": "sts:AssumeRole"}]}'

In [None]:
!aws iam create-policy --policy-name MyCustomPolicy --policy-document file://lambda_role_policy.json

- 提取前一个cell的输出，替换policy_arn

In [None]:
!aws iam attach-role-policy --role-name AgentLambdaRole \
    --policy-arn {policy_arn}

- 需要把lambda部署在vpc中，因为db也是在vpc中，否则会出现超时
- 需要从前两步产生的role传入参数中

In [None]:
!aws lambda create-function --function-name query_gtms4 \
    --zip-file fileb://lambda_code/my_deployment_package.zip --runtime python3.10 \
    --handler lambda_function.lambda_handler --timeout 10 --region {region} \
    --role arn:aws:iam::106839800180:role/AgentLambdaRole \
    --vpc-config SubnetIds={",".join(subnet_ids)},SecurityGroupIds={vpc_security_group_ids}