In [17]:
import pandas as pd
import os
import glob
from config import DATABASE_DIR, DATABASE_CSV_FILES, validate_config

# 全局变量，用于存储加载后的数据库DataFrames
DB = {} 

def load_database():
    """
    加载数据库目录中的所有CSV文件到全局的DB字典中。
    使用config.py中的配置。
    """
    print("正在加载数据库到内存...")
    
    # 验证配置
    config_errors = validate_config()
    if config_errors:
        print("配置错误:")
        for error in config_errors:
            print(f"  - {error}")
        return
    
    if not DATABASE_DIR.exists():
        print(f"致命错误：数据库目录未找到于 '{DATABASE_DIR}'。Agent无法启动。")
        return

    # 使用配置中的CSV文件列表
    for csv_file in DATABASE_CSV_FILES:
        file_path = DATABASE_DIR / csv_file
        if not file_path.exists():
            print(f"警告：数据表文件 '{csv_file}' 不存在于 '{DATABASE_DIR}'")
            continue
            
        try:
            # 使用文件名（不含扩展名）作为字典的键
            key = csv_file.split('.')[0]
            DB[key] = pd.read_csv(file_path)
            print(f"  - 已加载数据表 '{key}' (共 {len(DB[key])} 行)")
        except Exception as e:
            print(f"  - 加载数据表 '{csv_file}' 失败: {e}")
    
    print("数据库加载完成。")

# 当此模块被首次导入时，自动执行加载数据库的操作
load_database()

正在加载数据库到内存...
  - 已加载数据表 '1_reactions_core' (共 3485 行)
  - 已加载数据表 '2_enzymes' (共 3485 行)
  - 已加载数据表 '3_experimental_conditions' (共 3485 行)
  - 已加载数据表 '4_activity_performance' (共 3485 行)
  - 已加载数据表 '5_reaction_participants' (共 16146 行)
  - 已加载数据表 '6_kinetic_parameters' (共 4756 行)
  - 已加载数据表 '7_mutants_characterized' (共 2388 行)
  - 已加载数据表 '8_inhibitors_main' (共 2466 行)
  - 已加载数据表 '9_inhibition_params' (共 1838 行)
  - 已加载数据表 '10_auxiliary_factors' (共 2740 行)
数据库加载完成。


In [4]:
from google.adk.tools import FunctionTool
import pandas as pd
import numpy as np
from typing import List, Dict, Optional, Any
from config import QUERY_CONFIG, ANALYSIS_CONFIG
import re

In [34]:
def _enzyme_name_or_synonym_match(df, enzyme_name):
    """
    支持enzyme_name和enzyme_synonyms（|分隔）模糊匹配
    """
    if 'enzyme_synonyms' not in df.columns:
        return df['enzyme_name'].str.contains(enzyme_name, case=False, na=False)
    # 先对synonyms做分割，生成布尔Series
    def match_synonyms(synonyms):
        if pd.isnull(synonyms):
            return False
        for syn in str(synonyms).split('|'):
            if enzyme_name.lower() in syn.lower():
                return True
        return False
    return (
        df['enzyme_name'].str.contains(enzyme_name, case=False, na=False) |
        df['enzyme_synonyms'].apply(match_synonyms)
    )

def find_reactions_by_enzyme(
    enzyme_name: str,
    organism: str,
    max_results: int
) -> str:
    """
    根据酶名称和物种查找相关反应。
    
    :param enzyme_name: str
    :param organism: str
    :param max_results: int
    """
    if not DB: return "数据库未加载。"
    
    enzymes_df = DB.get('2_enzymes', pd.DataFrame())
    core_df = DB.get('1_reactions_core', pd.DataFrame())
    
    if enzymes_df.empty or core_df.empty:
        return "核心数据表未加载。"
    
    # 构建查询条件
    query_conditions = []
    if enzyme_name:
        query_conditions.append(_enzyme_name_or_synonym_match(enzymes_df, enzyme_name))
    if organism:
        query_conditions.append(enzymes_df['organism'].str.contains(organism, case=False, na=False))

    if not query_conditions:
        return "请提供酶名称或物种信息。"
    
    # 应用查询条件
    filtered_enzymes = enzymes_df[pd.concat(query_conditions, axis=1).all(axis=1)]
    
    if filtered_enzymes.empty:
        return f"未找到匹配酶 '{enzyme_name}' 和物种 '{organism}' 的反应。"
    
    # 合并反应信息
    merged_df = pd.merge(filtered_enzymes, core_df, on=['literature_id', 'reaction_id'])
    
    # 限制结果数量
    max_results = min(max_results, QUERY_CONFIG["max_results"])
    result_df = merged_df.head(max_results)
    
    # 格式化输出
    result = f"# 酶相关反应查询结果\n\n"
    result += f"**查询条件**: 酶={enzyme_name}, 物种={organism}\n"
    result += f"**找到反应数**: {len(result_df)} (共{len(merged_df)}个)\n\n"
    
    for _, row in result_df.iterrows():
        result += f"## {row['literature_id']}:{row['reaction_id']}\n"
        result += f"- **酶**: {row['enzyme_name']}\n"
        result += f"- **物种**: {row['organism']}\n"
        result += f"- **反应**: {row['reaction_equation']}\n"
        result += f"- **是否可逆**: {row['reaction_type_reversible']}\n\n"
        print(row['enzyme_synonyms'])
    return result

In [36]:
find_reactions_by_enzyme('AtPPPS',"Arabidopsis thaliana",5)

AtSPS3|AtPPPS


'# 酶相关反应查询结果\n\n**查询条件**: 酶=AtPPPS, 物种=Arabidopsis thaliana\n**找到反应数**: 1 (共1个)\n\n## PMID32034864:reaction_3\n- **酶**: Solanesyl diphosphate synthase 3\n- **物种**: Arabidopsis thaliana\n- **反应**: Geranylgeranyl diphosphate + 5 Isopentenyl diphosphate -> Solanesyl diphosphate + 5 Pyrophosphate\n- **是否可逆**: Not specified\n\n'

In [16]:

def guess_search_fields(user_query: str) -> list:
    """
    根据用户输入内容智能推断最合适的数据库字段（严格依据实际字段名）。
    """
    # 反应方程式结构
    if '->' in user_query or '→' in user_query:
        return ['reaction_equation']
    # EC号
    if re.match(r'\d+\.\d+\.\d+\.\d+', user_query):
        return ['ec_number']
    # 酶名关键词
    if any(kw in user_query.lower() for kw in ['酶', 'ase', 'protein']):
        return ['enzyme_name', 'enzyme_synonyms']
    # 反应类型
    if '可逆' in user_query or '不可逆' in user_query or '类型' in user_query:
        return ['reaction_type_reversible']
    # 底物/产物
    if '底物' in user_query or '产物' in user_query or 'substrate' in user_query.lower() or 'product' in user_query.lower():
        return ['participant_name', 'role']
    # 基因名
    if '基因' in user_query or 'gene' in user_query.lower():
        return ['gene_name']
    # 物种
    if '物种' in user_query or 'organism' in user_query.lower():
        return ['organism']
    # 备注
    if '备注' in user_query or 'note' in user_query.lower():
        return ['notes']
    # fallback: 所有主要字段
    return [
        'reaction_equation', 'reaction_type_reversible', 'notes',
        'enzyme_name', 'enzyme_synonyms', 'gene_name', 'organism', 'ec_number',
        'participant_name', 'role'
    ]


In [43]:
def smart_search_reactions(
    search_query: str,
    search_fields: List[str],
    max_results: int
) -> str:
    """
    智能搜索反应，支持多字段模糊匹配。参数均可选。
    
    :param search_query: str，可选
    :param search_fields: List[str]，可选
    :param max_results: int
    """
    if not DB: return "数据库未加载。"
    
    core_df = DB.get('1_reactions_core', pd.DataFrame())
    enzymes_df = DB.get('2_enzymes', pd.DataFrame())
    participants_df = DB.get('5_reaction_participants', pd.DataFrame())
    
    if core_df.empty or enzymes_df.empty:
        return "核心数据表未加载。"
    
    # 合并数据
    merged_df = pd.merge(core_df, enzymes_df, on=['literature_id', 'reaction_id'])
    
    # 如果涉及底物/产物/参与者，合并参与者表
    if any(f in ['participant_name', 'role'] for f in (search_fields or [])):
        merged_df = pd.merge(merged_df, participants_df, on=['literature_id', 'reaction_id'], how='left')
    
    # 字段推断
    valid_fields = [f for f in (search_fields or []) if f in merged_df.columns]
    if not valid_fields:
        valid_fields = guess_search_fields(search_query)
        valid_fields = [f for f in valid_fields if f in merged_df.columns]
    # fallback: 全字段
    if not valid_fields:
        valid_fields = [col for col in [
            'reaction_equation', 'reaction_type_reversible', 'notes',
            'enzyme_name', 'enzyme_synonyms', 'gene_name', 'organism', 'ec_number',
            'participant_name', 'role'
        ] if col in merged_df.columns]
    # 构建搜索条件
    print(valid_fields)
    search_conditions = []
    for field in valid_fields:
        print(field)
        if field in merged_df.columns:
            if field == "enzyme_name" or field == "enzyme_synonyms":
                search_conditions.append(_enzyme_name_or_synonym_match(merged_df, search_query))
            else:
                search_conditions.append(merged_df[field].astype(str).str.contains(search_query, case=False, na=False,regex=False))
    print(search_query)
    print(merged_df['reaction_equation'].astype(str).str.contains(search_query, case=False, na=False,regex=False))
    # print(merged_df)
    if not search_conditions:
        return "未找到有效的搜索字段。"
    # print(search_conditions)
    # 应用搜索条件（OR逻辑）
    combined_condition = pd.concat(search_conditions, axis=1).any(axis=1)
    filtered_df = merged_df[combined_condition]
    
    if filtered_df.empty:
        return f"未找到匹配查询 '{search_query}' 的反应。"
    
    # 限制结果数量
    max_results = min(max_results, QUERY_CONFIG["max_results"])
    result_df = filtered_df.head(max_results)
    
    # 格式化输出
    result = f"# 智能搜索结果\n\n"
    result += f"**搜索查询**: {search_query if search_query else '全部'}\n"
    result += f"**搜索字段**: {', '.join(valid_fields) if valid_fields else '全部'}\n"
    result += f"**找到反应数**: {len(result_df)} (共{len(filtered_df)}个)\n\n"
    
    for _, row in result_df.iterrows():
        result += f"## {row['literature_id']}:{row['reaction_id']}\n"
        result += f"- **酶**: {row.get('enzyme_name', 'N/A')}\n"
        result += f"- **物种**: {row.get('organism', 'N/A')}\n"
        result += f"- **反应**: {row.get('reaction_equation', 'N/A')}\n"
        result += f"- **反应是否可逆**: {row.get('reaction_type_reversible', 'N/A')}\n"
        if 'participant_name' in row and pd.notnull(row['participant_name']):
            result += f"- **参与分子**: {row['participant_name']} ({row.get('role', 'N/A')})\n"
        if 'notes' in row and pd.notnull(row['notes']):
            result += f"- **备注**: {row['notes']}\n"
        result += "\n"
    return result


In [44]:
smart_search_reactions("Carbamoyl phosphate + L-ornithine -> Citrulline + Inorganic phosphate",["reaction_equation"],1)

['reaction_equation']
reaction_equation
Carbamoyl phosphate + L-ornithine -> Citrulline + Inorganic phosphate
0        True
1       False
2       False
3       False
4       False
        ...  
3480    False
3481    False
3482    False
3483    False
3484    False
Name: reaction_equation, Length: 3485, dtype: bool


'# 智能搜索结果\n\n**搜索查询**: Carbamoyl phosphate + L-ornithine -> Citrulline + Inorganic phosphate\n**搜索字段**: reaction_equation\n**找到反应数**: 1 (共1个)\n\n## PMID32027716:reaction_1\n- **酶**: Ornithine transcarbamoylase\n- **物种**: Escherichia coli\n- **反应**: Carbamoyl phosphate + L-ornithine -> Citrulline + Inorganic phosphate\n- **反应是否可逆**: Not specified\n- **备注**: Extensive biophysical characterization including thermal shift assays, fluorescence binding assays, SEC-SAXS, and molecular dynamics simulations was performed to investigate the structural and dynamic effects of the mutations.\n\n'

In [8]:
def find_kinetic_parameters(**kwargs) -> str:
    """
    查询并展示指定反应的动力学参数（如kcat、Km、Vmax、kcat_km、specific_activity等）。
    支持按文献、反应、参数类型筛选。参数均可选。
    
    :param literature_id: str，可选
    :param reaction_id: str，可选
    :param parameter_type: str，可选（如'kcat','Km','Vmax', 'kcat_km', 'specific_activity'等）
    :param max_results: int
    """
    literature_id = kwargs.get('literature_id', None)
    reaction_id = kwargs.get('reaction_id', None)
    parameter_type = kwargs.get('parameter_type', None)

    if not DB: return "数据库未加载。"
    kinetic_df = DB.get('6_kinetic_parameters', pd.DataFrame())
    if kinetic_df.empty:
        return "动力学参数数据表未加载。"

    # 条件筛选
    df = kinetic_df.copy()
    if literature_id:
        df = df[df['literature_id'] == literature_id]
    if reaction_id:
        df = df[df['reaction_id'] == reaction_id]
    if parameter_type:
        df = df[df['parameter_type'].str.lower() == parameter_type.lower()]
    if df.empty:
        return "未找到匹配的动力学参数数据。"
    
    max_results = kwargs.get('max_results', len(df))
    # 限制结果数量
    df = df.head(max_results)
    
    # 分组展示
    result = f"# 动力学参数查询结果\n\n"
    # print(df)
    print(df[df['source_type'] == 'wild_type']['mutation_description'].unique())
    group_cols = ['literature_id', 'reaction_id', 'source_type', 'mutation_description']
    grouped = df.groupby(group_cols)

    for group_keys, group_df in grouped:
        print(group_keys)
        print(group_df)
        lit, rid, src, mut = group_keys
        result += f"## 文献: {lit} 反应: {rid} 类型: {src}"
        if src == "wild_type":
            result += f" 野生型: WT"
        else:
            if mut and str(mut).strip():
                result += f" 突变: {mut}"
        result += "\n"
        for _, row in group_df.iterrows():
            result += f"- **参数类型**: {row['parameter_type']}"
            if row['substrate_name'] and str(row['substrate_name']).strip():
                result += f" | **底物**: {row['substrate_name']}"
            result += f" | **数值**: {row['value']} {row['unit']}"
            if row['error_margin'] and str(row['error_margin']).strip():
                result += f" (误差: {row['error_margin']})"
            if row['details'] and str(row['details']).strip():
                result += f" | 说明: {row['details']}"
            result += "\n"
        result += "\n"
    return result


In [9]:
find_kinetic_parameters(literature_id="PMID32027716", max_results=100)

['WT']
('PMID32027716', 'reaction_1', 'mutant', 'C273A')
   literature_id reaction_id source_type mutation_description parameter_type  \
27  PMID32027716  reaction_1      mutant                C273A             Km   
28  PMID32027716  reaction_1      mutant                C273A             Km   
29  PMID32027716  reaction_1      mutant                C273A           kcat   

         substrate_name  value unit error_margin details  
27  Carbamoyl phosphate  260.0   µM         ± 63     NaN  
28          L-ornithine  730.0   µM        ± 350     NaN  
29                  NaN  170.0  s⁻¹         ± 56     NaN  
('PMID32027716', 'reaction_1', 'mutant', 'D140N')
  literature_id reaction_id source_type mutation_description parameter_type  \
3  PMID32027716  reaction_1      mutant                D140N             Km   
4  PMID32027716  reaction_1      mutant                D140N             Km   
5  PMID32027716  reaction_1      mutant                D140N           kcat   

        substrate_n

'# 动力学参数查询结果\n\n## 文献: PMID32027716 反应: reaction_1 类型: mutant 突变: C273A\n- **参数类型**: Km | **底物**: Carbamoyl phosphate | **数值**: 260.0 µM (误差: ± 63) | 说明: nan\n- **参数类型**: Km | **底物**: L-ornithine | **数值**: 730.0 µM (误差: ± 350) | 说明: nan\n- **参数类型**: kcat | **底物**: nan | **数值**: 170.0 s⁻¹ (误差: ± 56) | 说明: nan\n\n## 文献: PMID32027716 反应: reaction_1 类型: mutant 突变: D140N\n- **参数类型**: Km | **底物**: Carbamoyl phosphate | **数值**: 1400.0 µM (误差: ± 450) | 说明: nan\n- **参数类型**: Km | **底物**: L-ornithine | **数值**: 17000.0 µM (误差: ± 5900) | 说明: nan\n- **参数类型**: kcat | **底物**: nan | **数值**: 460.0 s⁻¹ (误差: ± 154) | 说明: nan\n\n## 文献: PMID32027716 反应: reaction_1 类型: mutant 突变: D231A\n- **参数类型**: Km | **底物**: Carbamoyl phosphate | **数值**: 150.0 µM (误差: ± 42) | 说明: nan\n- **参数类型**: Km | **底物**: L-ornithine | **数值**: 590.0 µM (误差: ± 330) | 说明: nan\n- **参数类型**: kcat | **底物**: nan | **数值**: 0.7 s⁻¹ (误差: ± 0.050) | 说明: nan\n\n## 文献: PMID32027716 反应: reaction_1 类型: mutant 突变: E299D\n- **参数类型**: Km | **底物**: Carb

In [1]:
%pwd

'/personal/paper_label/bioreaction_adk_agent'

In [None]:
find_kinetic_parameters(literature_id="PMID32027716", max_results=100)

[nan]
('PMID32027716', 'reaction_1', 'mutant', 'C273A')
   literature_id reaction_id source_type mutation_description parameter_type  \
27  PMID32027716  reaction_1      mutant                C273A             Km   
28  PMID32027716  reaction_1      mutant                C273A             Km   
29  PMID32027716  reaction_1      mutant                C273A           kcat   

         substrate_name  value unit error_margin details  
27  Carbamoyl phosphate  260.0   µM         ± 63     NaN  
28          L-ornithine  730.0   µM        ± 350     NaN  
29                  NaN  170.0  s⁻¹         ± 56     NaN  
('PMID32027716', 'reaction_1', 'mutant', 'D140N')
  literature_id reaction_id source_type mutation_description parameter_type  \
3  PMID32027716  reaction_1      mutant                D140N             Km   
4  PMID32027716  reaction_1      mutant                D140N             Km   
5  PMID32027716  reaction_1      mutant                D140N           kcat   

        substrate_na

'# 动力学参数查询结果\n\n## 文献: PMID32027716 反应: reaction_1 类型: mutant 突变: C273A\n- **参数类型**: Km | **底物**: Carbamoyl phosphate | **数值**: 260.0 µM (误差: ± 63) | 说明: nan\n- **参数类型**: Km | **底物**: L-ornithine | **数值**: 730.0 µM (误差: ± 350) | 说明: nan\n- **参数类型**: kcat | **底物**: nan | **数值**: 170.0 s⁻¹ (误差: ± 56) | 说明: nan\n\n## 文献: PMID32027716 反应: reaction_1 类型: mutant 突变: D140N\n- **参数类型**: Km | **底物**: Carbamoyl phosphate | **数值**: 1400.0 µM (误差: ± 450) | 说明: nan\n- **参数类型**: Km | **底物**: L-ornithine | **数值**: 17000.0 µM (误差: ± 5900) | 说明: nan\n- **参数类型**: kcat | **底物**: nan | **数值**: 460.0 s⁻¹ (误差: ± 154) | 说明: nan\n\n## 文献: PMID32027716 反应: reaction_1 类型: mutant 突变: D231A\n- **参数类型**: Km | **底物**: Carbamoyl phosphate | **数值**: 150.0 µM (误差: ± 42) | 说明: nan\n- **参数类型**: Km | **底物**: L-ornithine | **数值**: 590.0 µM (误差: ± 330) | 说明: nan\n- **参数类型**: kcat | **底物**: nan | **数值**: 0.7 s⁻¹ (误差: ± 0.050) | 说明: nan\n\n## 文献: PMID32027716 反应: reaction_1 类型: mutant 突变: E299D\n- **参数类型**: Km | **底物**: Carb

In [None]:
find_kinetic_parameters(literature_id="PMID32027716", max_results=100)

[nan]
('PMID32027716', 'reaction_1', 'mutant', 'C273A')
   literature_id reaction_id source_type mutation_description parameter_type  \
27  PMID32027716  reaction_1      mutant                C273A             Km   
28  PMID32027716  reaction_1      mutant                C273A             Km   
29  PMID32027716  reaction_1      mutant                C273A           kcat   

         substrate_name  value unit error_margin details  
27  Carbamoyl phosphate  260.0   µM         ± 63     NaN  
28          L-ornithine  730.0   µM        ± 350     NaN  
29                  NaN  170.0  s⁻¹         ± 56     NaN  
('PMID32027716', 'reaction_1', 'mutant', 'D140N')
  literature_id reaction_id source_type mutation_description parameter_type  \
3  PMID32027716  reaction_1      mutant                D140N             Km   
4  PMID32027716  reaction_1      mutant                D140N             Km   
5  PMID32027716  reaction_1      mutant                D140N           kcat   

        substrate_na

'# 动力学参数查询结果\n\n## 文献: PMID32027716 反应: reaction_1 类型: mutant 突变: C273A\n- **参数类型**: Km | **底物**: Carbamoyl phosphate | **数值**: 260.0 µM (误差: ± 63) | 说明: nan\n- **参数类型**: Km | **底物**: L-ornithine | **数值**: 730.0 µM (误差: ± 350) | 说明: nan\n- **参数类型**: kcat | **底物**: nan | **数值**: 170.0 s⁻¹ (误差: ± 56) | 说明: nan\n\n## 文献: PMID32027716 反应: reaction_1 类型: mutant 突变: D140N\n- **参数类型**: Km | **底物**: Carbamoyl phosphate | **数值**: 1400.0 µM (误差: ± 450) | 说明: nan\n- **参数类型**: Km | **底物**: L-ornithine | **数值**: 17000.0 µM (误差: ± 5900) | 说明: nan\n- **参数类型**: kcat | **底物**: nan | **数值**: 460.0 s⁻¹ (误差: ± 154) | 说明: nan\n\n## 文献: PMID32027716 反应: reaction_1 类型: mutant 突变: D231A\n- **参数类型**: Km | **底物**: Carbamoyl phosphate | **数值**: 150.0 µM (误差: ± 42) | 说明: nan\n- **参数类型**: Km | **底物**: L-ornithine | **数值**: 590.0 µM (误差: ± 330) | 说明: nan\n- **参数类型**: kcat | **底物**: nan | **数值**: 0.7 s⁻¹ (误差: ± 0.050) | 说明: nan\n\n## 文献: PMID32027716 反应: reaction_1 类型: mutant 突变: E299D\n- **参数类型**: Km | **底物**: Carb

In [2]:
%cd ..

/personal/paper_label


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [3]:
from bioreaction_adk_agent.tools.database_loader import DB
def get_database_statistics() -> str:
    """
    获取数据库统计信息。
    """
    if not DB: return "数据库未加载。"
    
    result = "# 数据库统计信息\n\n"
    
    for table_name, df in DB.items():
        result += f"## {table_name}\n"
        result += f"- **记录数**: {len(df)}\n"
        result += f"- **列数**: {len(df.columns)}\n"
        result += f"- **列名**: {', '.join(df.columns.tolist())}\n\n"
    
    return result

get_database_statistics()

正在加载数据库到内存...
  - 已加载数据表 '1_reactions_core' (共 3485 行)
  - 已加载数据表 '2_enzymes' (共 3485 行)
  - 已加载数据表 '3_experimental_conditions' (共 3485 行)
  - 已加载数据表 '4_activity_performance' (共 3485 行)
  - 已加载数据表 '5_reaction_participants' (共 16146 行)
  - 已加载数据表 '6_kinetic_parameters' (共 4756 行)
  - 已加载数据表 '7_mutants_characterized' (共 2388 行)
  - 已加载数据表 '8_inhibitors_main' (共 2466 行)
  - 已加载数据表 '9_inhibition_params' (共 1838 行)
  - 已加载数据表 '10_auxiliary_factors' (共 2740 行)
数据库加载完成。


'# 数据库统计信息\n\n## 1_reactions_core\n- **记录数**: 3485\n- **列数**: 5\n- **列名**: literature_id, reaction_id, reaction_equation, reaction_type_reversible, notes\n\n## 2_enzymes\n- **记录数**: 3485\n- **列数**: 15\n- **列名**: literature_id, reaction_id, enzyme_name, enzyme_synonyms, gene_name, organism, ec_number, genbank_id, pdb_id, uniprot_id, subcellular_localization, optimal_temperature, optimal_temperature_unit, optimal_ph, optimal_conditions_details\n\n## 3_experimental_conditions\n- **记录数**: 3485\n- **列数**: 11\n- **列名**: literature_id, reaction_id, assay_type, assay_details, solvent_buffer, ph, ph_details, temperature_celsius, expression_host, expression_vector, expression_induction\n\n## 4_activity_performance\n- **记录数**: 3485\n- **列数**: 12\n- **列名**: literature_id, reaction_id, conversion_rate, conversion_rate_unit, conversion_rate_error, product_yield, product_yield_unit, product_yield_error, regioselectivity, stereoselectivity, enantiomeric_excess, enantiomeric_excess_unit\n\n## 5_reactio

In [11]:
%pwd

'/personal/paper_label/bioreaction_adk_agent'

In [2]:
%cd ..

/personal/paper_label


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [4]:
import asyncio
from bioreaction_adk_agent.agent import query_agent

result = await query_agent("请帮我查找与反应 PMID32033404:reaction_1 相似的反应，相似性标准为EC号")
print(result)

[DEBUG] 向Agent发送: 请帮我查找与反应 PMID32033404:reaction_1 相似的反应，相似性标准为EC号
[DEBUG] event: content=Content(
  parts=[
    Part(
      function_call=FunctionCall(
        args={
          'agent_name': 'database_query_agent'
        },
        id='adk-48e97055-4bd5-4ae2-9345-72dabc1d8582',
        name='transfer_to_agent'
      ),
      thought_signature=b'\n\x82\x03\x01T\xa8\\\xee\xcbd\xbcY}\xfa\x0e\x0f\xf5\x98!A\xc93\xf8\x0bL\x80\xda\x0fk\xa6}zxe\xfal\x02\x8dG\x08-\xc5\xb7\xcc\xe0\xe7\x1d\xf7\xaa\xfa\xcf\xa6\x14\x9aEK\x82\xbf\xaa)\xd5\xb0\x1bb\x12\t\x9c\x81f\xdc\xc0\r\xb5!\xf7#\x04\xc6!\xd7\xfe\xe6\xbalQ\xd5k\x12]a\xa4Q\x110\xd1?`...'
    ),
  ],
  role='model'
) grounding_metadata=None partial=None turn_complete=None error_code=None error_message=None interrupted=None custom_metadata=None usage_metadata=GenerateContentResponseUsageMetadata(
  candidates_token_count=23,
  prompt_token_count=716,
  prompt_tokens_details=[
    ModalityTokenCount(
      modality=<MediaModality.TEXT: 'TEXT'>,
    

In [35]:
def _enzyme_name_or_synonym_match(df, enzyme_name):
    """
    支持enzyme_name和enzyme_synonyms（|分隔）模糊匹配，归一化后再比对。
    """
    norm_query = normalize_enzyme_name(enzyme_name)
    if 'enzyme_synonyms' not in df.columns:
        return df['enzyme_name'].apply(lambda x: norm_query in normalize_enzyme_name(x))
    def match_synonyms(synonyms, enzyme_name_col):
        # 检查主酶名
        if norm_query in normalize_enzyme_name(enzyme_name_col):
            return True
        # 检查同义词
        if pd.isnull(synonyms):
            return False
        for syn in str(synonyms).split('|'):
            if norm_query in normalize_enzyme_name(syn):
                return True
        return False
    return df.apply(lambda row: match_synonyms(row.get('enzyme_synonyms', None), row.get('enzyme_name', '')), axis=1)
def normalize_enzyme_name(name: str) -> str:
    """
    归一化酶名：去除空格、特殊字符、转小写。
    """
    if not isinstance(name, str):
        return ''
    # 去除空格和常见特殊字符，仅保留字母数字
    return re.sub(r"[^a-zA-Z0-9]", "", name).lower()