# xlsx格式转换xml

In [2]:
import pandas as pd
import cobra
from cobra import Model, Reaction, Metabolite, Gene
import re

def parse_reaction_equation(equation):
    """解析反应方程式，返回反应物和产物的字典"""
    # 分割反应物和产物
    if '=>' in equation:
        left, right = equation.split('=>')
        reversible = False
    elif '<=>' in equation:
        left, right = equation.split('<=>')
        reversible = True
    else:
        raise ValueError(f"Invalid reaction equation format: {equation}")

    def parse_compounds(side):
        compounds = {}
        # 匹配格式: (数字) cpd数字_c0
        matches = re.finditer(r'\(([0-9.]+)\)\s*([^\s_]+)_([^\s]+)', side)
        for match in matches:
            coef = float(match.group(1))
            met_id = match.group(2)
            compartment = match.group(3)
            # 将代谢物ID和区室组合
            met_id_with_comp = f"{met_id}_{compartment}"
            compounds[met_id_with_comp] = coef
        return compounds

    reactants = parse_compounds(left)
    products = parse_compounds(right)

    return reactants, products, reversible

def parse_gene_rule(gene_rule):
    """解析基因反应规则，支持'and'和'or'逻辑操作"""
    def parse_logic(expression):
        expression = expression.strip()
        # 递归解析and和or逻辑
        if ' and ' in expression:
            return ' and '.join(parse_logic(part) for part in expression.split(' and '))
        elif ' or ' in expression:
            return ' or '.join(parse_logic(part) for part in expression.split(' or '))
        else:
            return expression  # 基因ID部分

    return parse_logic(gene_rule)

def create_sbml_model_from_excel(excel_file):
    # 创建新的COBRA模型
    model = Model('GEM_model')

    # 读取Excel文件
    reactions_df = pd.read_excel(excel_file, sheet_name='reactions')
    metabolites_df = pd.read_excel(excel_file, sheet_name='metabolites')
    genes_df = pd.read_excel(excel_file, sheet_name='genes')

    # 创建区室
    model.compartments = {'c': 'cytosol', 'e': 'extracellular', 'p': 'periplasm'}  # 添加周质空间区室

    # 创建代谢物字典
    metabolites = {}
    for _, row in metabolites_df.iterrows():
        met_id = row['metabolites_id']

        # 从代谢物ID中提取区室信息
        comp_match = re.search(r'_(\w+)$', met_id)
        if comp_match:
            compartment = comp_match.group(1)
        else:
            compartment = 'c'  # 默认使用细胞质

        met = Metabolite(
            id=met_id,
            name=row['name'] if pd.notna(row['name']) else met_id,
            formula=row['formula'] if pd.notna(row['formula']) else None,
            charge=row['charge'] if pd.notna(row['charge']) else 0,
            compartment=compartment
        )

        # 添加其他注释
        annotations = {}
        annotation_columns = {
            'seed.compound': 'seed.compound',
            'BiGG.compound': 'bigg.metabolite',
            'KEGG.compound': 'kegg.compound',
            'MetaNetX.compound': 'metanetx.chemical',
            'Biocyc': 'biocyc',
            'inchikey': 'inchikey',
            'sboTerm': 'sbo',  # 添加SBO注释
            'Reactome Compound': 'reactome',  # 新增Reactome Compound注释
            'CHEBI': 'chebi',  # 新增CHEBI注释
            'Human Metabolome Database': 'hmdb',  # 新增HMDB注释
            'PubChem Substance': 'pubchem.compound'  # 新增PubChem Substance注释
        }

        for col, prefix in annotation_columns.items():
            if pd.notna(row.get(col)):
                if col == 'KEGG.compound':
                    # 处理可能的多个KEGG ID
                    kegg_ids = str(row[col]).split(';')
                    annotations[prefix] = kegg_ids[0].strip()
                else:
                    annotations[prefix] = str(row[col])

        met.annotation = annotations
        metabolites[met_id] = met
        model.add_metabolites([met])

    # Create genes
    genes = {}
    for _, row in genes_df.iterrows():
        gene_id = row['GeneID']
        gene = Gene(
            id=gene_id,
            name=row['Gene_name'] if pd.notna(row['Gene_name']) else gene_id
        )

        # Add other annotations
        annotations = {}
        annotation_columns = {
            'EC_number': 'ec-code',
            'UniProt': 'uniprot',
            'NCBI_ProteinID': 'ncbigi',
            'sboTerm': 'sbo'  # 添加SBO注释
        }

        for col, prefix in annotation_columns.items():
            if pd.notna(row.get(col)):
                annotations[prefix] = str(row[col])

        gene.annotation = annotations
        genes[gene_id] = gene

    model.genes += list(genes.values())

    # 添加反应
    for _, row in reactions_df.iterrows():
        try:
            reaction = Reaction(row['ID'])
            reaction.name = row['Name'] if pd.notna(row['Name']) else row['ID']

            # 设置反应可逆性和边界
            reaction.lower_bound = float(row['lb'])
            reaction.upper_bound = float(row['ub'])

            # 解析反应方程式
            if pd.notna(row['Equation']):
                try:
                    reactants, products, reversible = parse_reaction_equation(row['Equation'])

                    # 添加反应物和产物（使用完整的代谢物ID,包括区室标识）
                    reaction.add_metabolites(
                        {metabolites[met_id]: -coef for met_id, coef in reactants.items()}
                    )
                    reaction.add_metabolites(
                        {metabolites[met_id]: coef for met_id, coef in products.items()}
                    )

                    reaction.reversible = reversible

                except Exception as e:
                    print(f"Error parsing reaction equation for {row['ID']}: {str(e)}")
                    continue

            # Add gene association, handling complex 'or' or 'and' logic
            if pd.notna(row['Genes']):
                gene_rule = row['Genes']
                parsed_gene_rule = parse_gene_rule(gene_rule)  # 解析基因规则
                reaction.gene_reaction_rule = parsed_gene_rule

            # 添加反应注释
            annotations = {}
            annotation_columns = {
                'seed.reaction': 'seed.reaction',
                'BiGG.reaction': 'bigg.reaction',
                'KEGG.reaction': 'kegg.reaction',
                'ec-code': 'ec-code',
                'rhea': 'rhea',
                'MetanetX.reaction': 'metanetx.reaction',
                'biocyc': 'biocyc',
                'sboTerm': 'sbo'
            }

            for col, prefix in annotation_columns.items():
                if pd.notna(row.get(col)):
                    annotations[prefix] = str(row[col])

            reaction.annotation = annotations
            model.add_reactions([reaction])

        except Exception as e:
            print(f"Error adding reaction {row['ID']}: {str(e)}")
            continue

    # 保存模型为SBML文件
    cobra.io.write_sbml_model(model, "iCZ870_MM.xml")
    return model

# 运行转换
excel_file = "14067gem20_MM.xlsx"
model = create_sbml_model_from_excel(excel_file)

# print xml文件细节

In [None]:
import cobra

# 加载 SBML 文件
file_path = "core_output_core_model.xml"  # 替换为你的 SBML 文件路径
model = cobra.io.read_sbml_model(file_path)

# 打印模型基本信息
print(f"模型名称: {model.name}")
print(f"反应数: {len(model.reactions)}")
print(f"代谢物数: {len(model.metabolites)}")
print(f"基因数: {len(model.genes)}")

# 打印所有反应信息
print("\n反应信息:")
for reaction in model.reactions:
    print(f"反应ID: {reaction.id}")
    print(f"反应名称: {reaction.name}")
    print(f"反应公式: {reaction.reaction}")  # 反应公式
    print(f"上下限: {reaction.lower_bound} ≤ flux ≤ {reaction.upper_bound}")  # 通量范围
    print(f"是否可逆: {'是' if reaction.reversibility else '否'}")
    print("-" * 40)

# 还可以打印具体的代谢物信息
print("\n代谢物信息:")
for metabolite in model.metabolites:
    print(f"代谢物ID: {metabolite.id}")
    print(f"代谢物名称: {metabolite.name}")
    print(f"化学式: {metabolite.formula}")
    print(f"电荷: {metabolite.charge}")
    print("-" * 40)


In [None]:
from cobra.io import read_sbml_model

# 加载模型
model = read_sbml_model("14067gem10.xml")

# 检查所有反应的 GPR 规则
for reaction in model.reactions:
    print(f"Reaction: {reaction.id}, GPR: {reaction.gene_reaction_rule}")

# 统计有 GPR 规则的反应数量
reactions_with_gpr = [rxn for rxn in model.reactions if rxn.gene_reaction_rule]
print(f"Total reactions: {len(model.reactions)}")
print(f"Reactions with GPR: {len(reactions_with_gpr)}")

# 打印模型中基因的数量
print(f"Total genes: {len(model.genes)}")

# 检查每个基因是否与反应关联
genes_with_reactions = [gene for gene in model.genes if gene.reactions]
print(f"Genes associated with reactions: {len(genes_with_reactions)}")
# 检查当前模型是否有目标反应
print(f"Current objective: {model.objective.expression}")

# 列出模型中的所有反应，找到生物量反应
for reaction in model.reactions:
    print(reaction.id)

# 假设生物量反应为 "BIOMASS"，设置为目标函数
model.objective = "Bio_cgATCC14067_c"

# 验证目标反应是否成功设置
print(f"New objective: {model.objective.expression}")

# 保存更新后的模型
from cobra.io import write_sbml_model
write_sbml_model(model, "14067gem10.xml")

# 进行FBA分析确认生长

In [None]:
from cobra.io import read_sbml_model

def validate_and_set_objective(model, default_reaction_id=None):
    """
    验证模型的目标函数。如果未设置目标函数，则尝试设置默认目标函数。

    参数:
        model (cobra.Model): COBRApy 模型对象。
        default_reaction_id (str): 如果目标函数未设置，使用此反应作为默认目标函数。

    返回:
        cobra.Model: 验证后的模型。
    """
    try:
        # 检查是否设置了目标函数
        if not model.objective or not model.objective.expression:
            print("Warning: Model does not have an objective function.")

            # 如果提供了默认目标函数 ID，尝试设置
            if default_reaction_id:
                if default_reaction_id in model.reactions:
                    model.objective = model.reactions.get_by_id(default_reaction_id)
                    print(f"Default objective function set to reaction: {default_reaction_id}")
                else:
                    print(f"Error: Reaction ID '{default_reaction_id}' not found in the model.")
            else:
                print("No default reaction provided. Objective function not set.")
        else:
            print(f"Objective function is set to: {model.objective.expression}")

    except Exception as e:
        print(f"Error during objective function validation: {e}")
    
    return model

if __name__ == "__main__":
    # 加载模型
    file_path = "iCZ871_CGXII.xml"
    try:
        model = read_sbml_model(file_path)
        print(f"Model successfully loaded from {file_path}.")
    except Exception as e:
        print(f"Error loading model: {e}")
        model = None
    reaction_ids_1 = {''}
    for reaction_id in reaction_ids_1:
        if reaction_id in model.reactions:
            reaction = model.reactions.get_by_id(reaction_id)
            print(f"反应 {reaction_id} 原始上下限: {reaction.lower_bound} 至 {reaction.upper_bound}")
            reaction.lower_bound = 1  # 设置下界
            reaction.upper_bound = 1  # 设置上界
            print(f"反应 {reaction_id} 修改后上下限: {reaction.lower_bound} 至 {reaction.upper_bound}")
        else:
            print(f"反应 {reaction_id} 不存在于模型中！")
    biomass_rxn_id = "Bio_cgATCC14067_c"  # 
    biomass_rxn = model.reactions.get_by_id(biomass_rxn_id)
    model.objective = biomass_rxn
    fba_results = model.optimize()
    print(f"Objective Value (FBA): {fba_results.objective_value}")
    
    print("\nReaction fluxes (non-zero):")
    for reaction in model.reactions:  # 使用 model.reactions 来遍历所有反应
        try:
            flux = fba_results.fluxes[reaction.id]
            if abs(flux) > 1e-6:  # 设置一个小的阈值,以避免浮点数比较的问题
                print(f"{reaction.id}: {flux}")
        except KeyError:
            continue

# 识别NAD/NADP

In [None]:
import xml.etree.ElementTree as ET
import pandas as pd

# 定义辅酶映射
COFACTORS = {
    'M_cpd00003_c': 'NAD',
    'M_cpd00004_c': 'NADH',
    'M_cpd00005_c': 'NADPH',
    'M_cpd00006_c': 'NADP'
}

# 定义辅酶对应关系
COFACTOR_PAIRS = [
    ('M_cpd00003_c', 'M_cpd00006_c'),  # NAD ↔ NADP
    ('M_cpd00004_c', 'M_cpd00005_c')   # NADH ↔ NADPH
]

def parse_reaction(reaction_element, namespace):
    """解析反应，获取ID、名称、反应物和产物"""
    reaction_id = reaction_element.get('id')
    reaction_name = reaction_element.get('name', reaction_id)
    
    reactants = {}
    for reactant in reaction_element.findall(f'.//{namespace}listOfReactants/{namespace}speciesReference'):
        species_id = reactant.get('species')
        stoichiometry = reactant.get('stoichiometry', '1')
        reactants[species_id] = float(stoichiometry)
    
    products = {}
    for product in reaction_element.findall(f'.//{namespace}listOfProducts/{namespace}speciesReference'):
        species_id = product.get('species')
        stoichiometry = product.get('stoichiometry', '1')
        products[species_id] = float(stoichiometry)
    
    # 构建方程式
    reactant_str = " + ".join([f"({stoich}) {s_id}" for s_id, stoich in reactants.items()])
    product_str = " + ".join([f"({stoich}) {s_id}" for s_id, stoich in products.items()])
    equation = f"{reactant_str} => {product_str}"
    
    return reaction_id, reaction_name, equation, reactants, products

def get_non_cofactors(compounds):
    """获取非辅酶部分"""
    return {k: v for k, v in compounds.items() if k not in COFACTORS}

def has_cofactor(compounds):
    """检查化合物集合是否包含辅酶"""
    return any(cofactor in compounds for cofactor in COFACTORS)

def is_cofactor_pair(rxn1, rxn2):
    """检查两个反应是否为辅酶反应对"""
    # 检查非辅酶部分是否相同
    if get_non_cofactors(rxn1['reactants']) != get_non_cofactors(rxn2['reactants']):
        return False
    if get_non_cofactors(rxn1['products']) != get_non_cofactors(rxn2['products']):
        return False
    
    # 检查辅酶是否成对替换
    for cf1, cf2 in COFACTOR_PAIRS:
        # 检查反应物中的辅酶替换
        if (cf1 in rxn1['reactants'] and cf2 in rxn2['reactants'] and 
            rxn1['reactants'][cf1] == rxn2['reactants'][cf2]):
            return True
        # 检查产物中的辅酶替换
        if (cf1 in rxn1['products'] and cf2 in rxn2['products'] and 
            rxn1['products'][cf1] == rxn2['products'][cf2]):
            return True
    
    return False

def find_nad_nadp_pairs(model_file):
    """找出只有NAD/NADP辅酶不同的反应对"""
    # 解析XML
    tree = ET.parse(model_file)
    root = tree.getroot()
    
    # 检查命名空间
    namespace = ''
    if root.tag.startswith('{'):
        namespace = '{' + root.tag.split('}')[0][1:] + '}'
    
    # 获取所有反应
    all_reactions = []
    for reaction in root.findall(f'.//{namespace}reaction'):
        rxn_id, rxn_name, equation, reactants, products = parse_reaction(reaction, namespace)
        all_reactions.append({
            'id': rxn_id,
            'name': rxn_name,
            'equation': equation,
            'reactants': reactants,
            'products': products
        })
    
    print(f"共解析到 {len(all_reactions)} 个反应")
    
    # 找出含辅酶的反应
    cofactor_reactions = []
    for rxn in all_reactions:
        if has_cofactor(rxn['reactants']) or has_cofactor(rxn['products']):
            cofactor_reactions.append(rxn)
    
    print(f"其中包含辅酶的反应有 {len(cofactor_reactions)} 个")
    
    # 寻找反应对
    pairs = []
    processed = set()
    
    for i, rxn1 in enumerate(cofactor_reactions):
        if rxn1['id'] in processed:
            continue
        
        for j, rxn2 in enumerate(cofactor_reactions):
            if i == j or rxn2['id'] in processed:
                continue
            
            if is_cofactor_pair(rxn1, rxn2):
                pairs.append({
                    'Reaction1_ID': rxn1['id'],
                    'Reaction1_Name': rxn1['name'],
                    'Reaction1_Equation': rxn1['equation'],
                    'Reaction2_ID': rxn2['id'],
                    'Reaction2_Name': rxn2['name'],
                    'Reaction2_Equation': rxn2['equation']
                })
                
                processed.add(rxn1['id'])
                processed.add(rxn2['id'])
                break
    
    # 保存结果
    df = pd.DataFrame(pairs)
    output_file = 'nad_nadp_reaction_pairs.xlsx'
    
    if not df.empty:
        df.to_excel(output_file, index=False)
        print(f"找到 {len(pairs)} 对只有NAD/NADP辅酶不同的反应")
    else:
        print("未找到符合条件的反应对")
        df = pd.DataFrame(columns=['Reaction1_ID', 'Reaction1_Name', 'Reaction1_Equation', 
                                   'Reaction2_ID', 'Reaction2_Name', 'Reaction2_Equation'])
        df.to_excel(output_file, index=False)
    
    print(f"结果已保存到 {output_file}")
    return df

if __name__ == "__main__":
    model_file = "iCZ871_CGXII.xml"  # 替换为您的XML文件路径
    pairs = find_nad_nadp_pairs(model_file)