# 纠正通用模型可逆性

In [None]:
#!/usr/bin/env python3
"""
反应可逆性纠正脚本

逻辑：
- 模型可逆 + TSV不可逆 → 修改为不可逆
- 模型不可逆 + TSV可逆 → 不修改
"""

import json
import pandas as pd


def load_modelseed_reactions(tsv_path: str) -> dict:
    """
    加载 ModelSEED 反应可逆性信息

    返回: {reaction_id: {'reversible': bool, 'direction': str}}
    """
    df = pd.read_csv(tsv_path, sep='\t')

    print(f"TSV文件列名: {df.columns.tolist()}")
    print(f"加载了 {len(df)} 条反应记录")

    reversibility_map = {}

    for _, row in df.iterrows():
        rxn_id = str(row['id']).strip()
        rev_symbol = str(row['reversibility']).strip()

        # = 表示可逆, > 或 < 表示不可逆
        if rev_symbol == '=':
            reversibility_map[rxn_id] = {
                'reversible': True,
                'direction': '='
            }
        elif rev_symbol == '>':
            reversibility_map[rxn_id] = {
                'reversible': False,
                'direction': '>'  # 正向
            }
        elif rev_symbol == '<':
            reversibility_map[rxn_id] = {
                'reversible': False,
                'direction': '<'  # 逆向
            }
        else:
            reversibility_map[rxn_id] = {
                'reversible': False,
                'direction': '>'
            }

    return reversibility_map


def extract_base_rxn_id(rxn_id: str) -> str:
    """
    从模型反应ID中提取基础反应ID
    例如: rxn09045_c -> rxn09045
    """
    if '_' in rxn_id:
        parts = rxn_id.rsplit('_', 1)
        if len(parts[1]) <= 2 and parts[1].isalpha():
            return parts[0]
    return rxn_id


def is_model_reversible(reaction: dict) -> bool:
    """
    判断模型中的反应是否可逆
    可逆: lower_bound < 0 且 upper_bound > 0
    """
    lower = reaction.get('lower_bound', 0.0)
    upper = reaction.get('upper_bound', 1000.0)
    return lower < 0 and upper > 0


def fix_reversibility(model_path: str, tsv_path: str, output_path: str):
    """
    主函数：比较并纠正模型中反应的可逆性

    仅修改：模型中可逆 但 TSV中不可逆 的反应
    """
    # 1. 加载ModelSEED反应信息
    print("=" * 60)
    print("步骤1: 加载ModelSEED反应可逆性信息")
    print("=" * 60)
    reversibility_map = load_modelseed_reactions(tsv_path)
    print(f"可逆反应数量: {sum(1 for v in reversibility_map.values() if v['reversible'])}")
    print(f"不可逆反应数量: {sum(1 for v in reversibility_map.values() if not v['reversible'])}")

    # 2. 加载模型文件
    print("\n" + "=" * 60)
    print("步骤2: 加载模型文件")
    print("=" * 60)
    with open(model_path, 'r', encoding='utf-8') as f:
        model = json.load(f)

    # 确定反应列表的位置
    if 'reactions' in model:
        reactions = model['reactions']
    elif isinstance(model, list):
        reactions = model
    else:
        raise ValueError("无法识别的模型格式")

    print(f"模型包含 {len(reactions)} 个反应")

    # 3. 比较并纠正可逆性
    print("\n" + "=" * 60)
    print("步骤3: 比较并纠正可逆性")
    print("=" * 60)

    changes = []
    not_found = []
    skipped_keep_irreversible = []  # 模型不可逆，TSV可逆，保持不变
    correct = 0

    for reaction in reactions:
        rxn_id = reaction['id']
        base_id = extract_base_rxn_id(rxn_id)

        # 检查是否在ModelSEED数据中
        if base_id not in reversibility_map:
            not_found.append(rxn_id)
            continue

        model_reversible = is_model_reversible(reaction)
        tsv_info = reversibility_map[base_id]
        tsv_reversible = tsv_info['reversible']

        if model_reversible and not tsv_reversible:
            # 模型可逆，TSV不可逆 → 需要修改为不可逆
            old_lower = reaction['lower_bound']
            old_upper = reaction['upper_bound']

            if tsv_info['direction'] == '>':
                # 正向不可逆: lower=0, upper=1000
                reaction['lower_bound'] = 0.0
                reaction['upper_bound'] = 1000.0
            elif tsv_info['direction'] == '<':
                # 逆向不可逆: lower=-1000, upper=0
                reaction['lower_bound'] = -1000.0
                reaction['upper_bound'] = 0.0

            changes.append({
                'reaction_id': rxn_id,
                'base_id': base_id,
                'old_lower': old_lower,
                'old_upper': old_upper,
                'new_lower': reaction['lower_bound'],
                'new_upper': reaction['upper_bound'],
                'tsv_direction': tsv_info['direction'],
                'change_type': '可逆→不可逆'
            })

        elif not model_reversible and tsv_reversible:
            # 模型不可逆，TSV可逆 → 保持不变（按用户要求）
            skipped_keep_irreversible.append(rxn_id)
            correct += 1

        else:
            # 可逆性一致，无需修改
            correct += 1

    # 4. 输出统计信息
    print(f"\n统计结果:")
    print(f"  - 无需修改的反应: {correct}")
    print(f"  - 修改为不可逆的反应 (模型可逆→TSV不可逆): {len(changes)}")
    print(f"  - 保持不可逆的反应 (模型不可逆但TSV可逆): {len(skipped_keep_irreversible)}")
    print(f"  - 未在ModelSEED中找到的反应: {len(not_found)}")

    # 5. 打印修改详情
    if changes:
        print(f"\n修改详情 (共 {len(changes)} 个):")
        print("-" * 110)
        print(f"{'反应ID':<25} {'基础ID':<15} {'原下界':>10} {'原上界':>10} {'新下界':>10} {'新上界':>10} {'TSV方向':<10} {'修改类型':<15}")
        print("-" * 110)
        for change in changes[:50]:
            print(f"{change['reaction_id']:<25} {change['base_id']:<15} {change['old_lower']:>10.1f} {change['old_upper']:>10.1f} {change['new_lower']:>10.1f} {change['new_upper']:>10.1f} {change['tsv_direction']:<10} {change['change_type']:<15}")
        if len(changes) > 50:
            print(f"... 还有 {len(changes) - 50} 个修改未显示")

    # 6. 保存纠正后的模型
    print("\n" + "=" * 60)
    print("步骤4: 保存纠正后的模型")
    print("=" * 60)

    with open(output_path, 'w', encoding='utf-8') as f:
        json.dump(model, f, indent=2, ensure_ascii=False)

    print(f"纠正后的模型已保存到: {output_path}")

    return changes, not_found, skipped_keep_irreversible

if __name__ == "__main__":
    # 配置文件路径
    MODEL_PATH = "5. GAPFILLING/processed_universal_modelseed_corrected.json"
    TSV_PATH = "4. 添加linker/modelseed_reactions.tsv"
    OUTPUT_PATH = "5. GAPFILLING/processed_universal_modelseed_corrected.json"

    # 运行纠正
    changes, not_found, skipped = fix_reversibility(MODEL_PATH, TSV_PATH, OUTPUT_PATH)

    print("\n" + "=" * 60)
    print("完成!")
    print("=" * 60)

# 去除转运反应

In [None]:
#!/usr/bin/env python3
"""
去除转运反应脚本

逻辑：
- 如果反应的metabolites中含有 _e 后缀的代谢物，则去除该反应
"""

import json


def has_external_metabolite(reaction: dict) -> bool:
    """
    判断反应是否含有外部代谢物 (_e)
    """
    metabolites = reaction.get('metabolites', {})
    for met_id in metabolites.keys():
        if met_id.endswith('_e') or '_e0' in met_id:
            return True
    return False


def get_external_metabolites(reaction: dict) -> list:
    """
    获取反应中所有的外部代谢物
    """
    metabolites = reaction.get('metabolites', {})
    external_mets = []
    for met_id in metabolites.keys():
        if met_id.endswith('_e') or '_e0' in met_id:
            external_mets.append(met_id)
    return external_mets


def remove_transport_reactions(model_path: str, output_path: str):
    """
    主函数：去除所有含有外部代谢物的转运反应
    """
    # 1. 加载模型文件
    print("=" * 60)
    print("步骤1: 加载模型文件")
    print("=" * 60)
    with open(model_path, 'r', encoding='utf-8') as f:
        model = json.load(f)

    # 确定反应列表的位置
    if 'reactions' in model:
        reactions = model['reactions']
        is_dict_format = True
    elif isinstance(model, list):
        reactions = model
        is_dict_format = False
    else:
        raise ValueError("无法识别的模型格式")

    print(f"模型包含 {len(reactions)} 个反应")

    # 2. 筛选反应
    print("\n" + "=" * 60)
    print("步骤2: 筛选转运反应")
    print("=" * 60)

    kept_reactions = []
    removed_reactions = []

    for reaction in reactions:
        rxn_id = reaction['id']

        if has_external_metabolite(reaction):
            # 含有外部代谢物，需要去除
            external_mets = get_external_metabolites(reaction)
            removed_reactions.append({
                'reaction_id': rxn_id,
                'reaction_name': reaction.get('name', ''),
                'external_metabolites': ', '.join(external_mets)
            })
        else:
            # 保留
            kept_reactions.append(reaction)

    # 3. 输出统计信息
    print(f"\n统计结果:")
    print(f"  - 原始反应数量: {len(reactions)}")
    print(f"  - 保留的反应数量: {len(kept_reactions)}")
    print(f"  - 去除的转运反应数量: {len(removed_reactions)}")

    # 4. 打印去除的反应详情
    if removed_reactions:
        print(f"\n去除的转运反应详情 (共 {len(removed_reactions)} 个):")
        print("-" * 100)
        print(f"{'反应ID':<25} {'反应名称':<40} {'外部代谢物':<30}")
        print("-" * 100)
        for rxn in removed_reactions[:50]:
            name = rxn['reaction_name'][:37] + '...' if len(rxn['reaction_name']) > 40 else rxn['reaction_name']
            mets = rxn['external_metabolites'][:27] + '...' if len(rxn['external_metabolites']) > 30 else rxn['external_metabolites']
            print(f"{rxn['reaction_id']:<25} {name:<40} {mets:<30}")
        if len(removed_reactions) > 50:
            print(f"... 还有 {len(removed_reactions) - 50} 个未显示")

    # 5. 更新模型并保存
    print("\n" + "=" * 60)
    print("步骤3: 保存处理后的模型")
    print("=" * 60)

    if is_dict_format:
        model['reactions'] = kept_reactions
    else:
        model = kept_reactions

    with open(output_path, 'w', encoding='utf-8') as f:
        json.dump(model, f, indent=2, ensure_ascii=False)

    print(f"处理后的模型已保存到: {output_path}")

    return kept_reactions, removed_reactions


if __name__ == "__main__":
    # 配置文件路径
    MODEL_PATH = "5. GAPFILLING/processed_universal_modelseed_corrected.json"
    OUTPUT_PATH = "5. GAPFILLING/processed_universal_modelseed_corrected2.json"

    # 运行去除转运反应
    kept, removed = remove_transport_reactions(MODEL_PATH, OUTPUT_PATH)

    print("\n" + "=" * 60)
    print("完成!")
    print("=" * 60)

# 手动去除ECG<5个反应

In [None]:
import sys
sys.path.append(r'./script/')
from egc_detector import EGCDetectorAutoFix, load_model
user_model = load_model('14067gem20_MM.xml')
universal_model = load_model('5. GAPFILLING/processed_universal_modelseed_corrected2.json')
detector = EGCDetectorAutoFix(
    user_model=user_model,
    universal_model=universal_model,
    atp_reaction='rxn00062_c',
    glucose_exchange='EX_cpd00027_e',  # 仅关闭此反应
    max_universal_reactions=5,
    max_iterations=5
)
# 检测并修复
egcs, fixes = detector.detect_and_fix_all()
# 保存修复记录
detector.save_results('fixed_reactions.csv')
# 获取修复后的通用模型
fixed_universal = detector.get_fixed_universal_model()
detector.save_fixed_universal_model('5. GAPFILLING/processed_universal_modelseed_corrected3.json')

# 去除RXN和R{5}的反应

In [None]:
#!/usr/bin/env python3
"""
去除特定反应模式脚本

逻辑：
- 去除符合特定ID格式的反应：R{5位数字} 或 RXN开头
"""

import json
import re


def has_pattern_to_remove(rxn_id: str) -> bool:
    """
    判断反应ID是否匹配需要删除的模式

    模式：
    1. R{5位数字}: 如 R12345
    2. RXN开头: 如 RXN-12345
    """
    # 模式1: R{5位数字}
    if re.match(r'^R\d{5}$', rxn_id):
        return True

    # 模式2: RXN开头
    if rxn_id.startswith('RXN'):
        return True

    return False


def get_reaction_id_pattern(rxn_id: str) -> str:
    """
    获取反应ID所属的模式类型
    """
    if re.match(r'^R\d{5}$', rxn_id):
        return "R{5位数字}"
    elif rxn_id.startswith('RXN'):
        return "RXN开头"
    else:
        return "保留类型"


def remove_specific_reactions(model_path: str, output_path: str):
    """
    主函数：去除特定模式的反应
    """
    # 1. 加载模型文件
    print("=" * 60)
    print("步骤1: 加载模型文件")
    print("=" * 60)
    with open(model_path, 'r', encoding='utf-8') as f:
        model = json.load(f)

    # 确定反应列表的位置
    if 'reactions' in model:
        reactions = model['reactions']
        is_dict_format = True
    elif isinstance(model, list):
        reactions = model
        is_dict_format = False
    else:
        raise ValueError("无法识别的模型格式")

    print(f"模型包含 {len(reactions)} 个反应")

    # 2. 筛选反应
    print("\n" + "=" * 60)
    print("步骤2: 筛选特定模式反应")
    print("=" * 60)

    kept_reactions = []
    removed_reactions = []
    pattern_counts = {
        "R{5位数字}": 0,
        "RXN开头": 0
    }

    for reaction in reactions:
        rxn_id = reaction['id']

        if has_pattern_to_remove(rxn_id):
            # 匹配需要删除的模式
            pattern_type = get_reaction_id_pattern(rxn_id)
            if pattern_type in pattern_counts:
                pattern_counts[pattern_type] += 1

            removed_reactions.append({
                'reaction_id': rxn_id,
                'reaction_name': reaction.get('name', ''),
                'pattern_type': pattern_type
            })
        else:
            # 保留
            kept_reactions.append(reaction)

    # 3. 输出统计信息
    print(f"\n统计结果:")
    print(f"  - 原始反应数量: {len(reactions)}")
    print(f"  - 保留的反应数量: {len(kept_reactions)}")
    print(f"  - 去除的反应数量: {len(removed_reactions)}")
    print(f"    ├─ R{{5位数字}}: {pattern_counts['R{5位数字}']} 个")
    print(f"    └─ RXN开头: {pattern_counts['RXN开头']} 个")

    # 4. 打印去除的反应详情
    if removed_reactions:
        print(f"\n去除的反应详情 (共 {len(removed_reactions)} 个):")
        print("-" * 90)
        print(f"{'反应ID':<25} {'反应名称':<40} {'模式类型':<20}")
        print("-" * 90)
        for rxn in removed_reactions[:50]:
            name = rxn['reaction_name']
            # 截断过长的名称
            if len(name) > 37:
                name = name[:37] + '...'
            print(f"{rxn['reaction_id']:<25} {name:<40} {rxn['pattern_type']:<20}")

        if len(removed_reactions) > 50:
            print(f"... 还有 {len(removed_reactions) - 50} 个未显示")

    # 5. 更新模型并保存
    print("\n" + "=" * 60)
    print("步骤3: 保存处理后的模型")
    print("=" * 60)

    if is_dict_format:
        model['reactions'] = kept_reactions
    else:
        model = kept_reactions

    with open(output_path, 'w', encoding='utf-8') as f:
        json.dump(model, f, indent=2, ensure_ascii=False)

    print(f"处理后的模型已保存到: {output_path}")

    return kept_reactions, removed_reactions, pattern_counts


if __name__ == "__main__":
    # 配置文件路径
    INPUT_FILE = "5. GAPFILLING/processed_universal_modelseed_corrected3.json"
    OUTPUT_FILE = "5. GAPFILLING/processed_universal_modelseed_final.json"

    # 运行去除特定模式反应
    kept, removed, pattern_counts = remove_specific_reactions(INPUT_FILE, OUTPUT_FILE)

    print("\n" + "=" * 60)
    print("处理完成!")
    print("=" * 60)

# 运行检测

In [None]:
import sys
sys.path.append(r'./script/')
from gapp import LooplessGapFillerOptimized, load_model

user_model = load_model('14067gem20_MM.xml')
universal_model = load_model('5. GAPFILLING/processed_universal_modelseed_final.json')

gapfiller = LooplessGapFillerOptimized(
    user_model=user_model,
    universal_model=universal_model,
    objective_reaction='EX_biomass_c',
    substrate='cpd00116_c',
    max_reactions=3,
    expansion_layers=10,
    max_valid_solutions=50,
    n_jobs=16,
    batch_size=10,
)

results = gapfiller.run()
gapfiller.save_results('./gap_output')

Set parameter Username
Set parameter LicenseID to value 2723056
Academic license - for non-commercial use only - expires 2026-10-16


No objective coefficients in model. Unclear what should be optimized


Read LP format model from file C:\Users\Asus\AppData\Local\Temp\tmpird2dh2c.lp
Reading time = 0.01 seconds
: 1621 rows, 3454 columns, 14990 nonzeros
Read LP format model from file C:\Users\Asus\AppData\Local\Temp\tmplb8_p2ke.lp
Reading time = 0.05 seconds
: 11377 rows, 18734 columns, 90308 nonzeros
并行配置: 16 核心, 批量大小 10
全局频率阈值: 反应在所有方案中最多出现 3 次
  构建索引...
  构建代谢物名称映射...
    映射了 11450 个代谢物名称
预序列化模型用于并行处理...
Loopless Gap-Filling [全局频率统计修复版 + 方程式注释]
目标反应: EX_biomass_c
底物: cpd00116_c
最大反应数: 3
并行核心数: 16
批量大小: 10
全局频率阈值: 3 (反应在所有方案中最多出现此次数)
Biomass下降阈值: 50%


>>> 正向扩展: 从 cpd00116_c 出发
    第1层: 33 个反应
    第2层: 704 个反应
    第3层: 3581 个反应
    第4层: 2374 个反应
    第5层: 561 个反应
    第6层: 150 个反应
    第7层: 35 个反应
    第8层: 13 个反应
    第9层: 7 个反应
    第10层: 1 个反应
  正向扩展共找到 7459 个候选反应

>>> 收集候选反应
  第10层: 1 → 0 个反应
  第9层: 7 → 3 个反应
  第8层: 13 → 3 个反应
  第7层: 35 → 8 个反应
  第6层: 150 → 47 个反应
  第5层: 561 → 218 个反应
  第4层: 2374 → 1690 个反应
  第3层: 3581 → 3456 个反应
  第2层: 704 → 698 个反应
  第1层: 33 → 33 个反应
  最终候选: 6156 个反应

>