# 从gene中提取信息

In [None]:
import re
import pandas as pd

# 读取FASTA文件并解析信息
def read_fasta(file_path):
    with open(file_path, 'r') as file:
        content = file.read()

    # 创建正则表达式：分别匹配gene名、locus_tag、EC_number、GO_function、product和protein_id
    gene_name_pattern = re.compile(r'/gene="([^"]+)"')  # 匹配gene名
    locus_tag_pattern = re.compile(r'/locus_tag="([^"]+)"')
    ec_number_pattern = re.compile(r'/EC_number="([^"]+)"')
    go_function_pattern = re.compile(r'/GO_function="([^"]+)"')
    product_pattern = re.compile(r'/product="([^"]+)"')
    protein_id_pattern = re.compile(r'/protein_id="([^"]+)"')

    genes_data = []

    # 按 "gene" 分割文件内容
    genbank_sections = content.split("gene            ")

    # 遍历每个分割后的基因信息
    for section in genbank_sections: 
        locus_tag_match = locus_tag_pattern.search(section)
        if locus_tag_match:
            gene_name = gene_name_pattern.search(section)
            locus_tag = locus_tag_pattern.search(section)
            ec_number = ec_number_pattern.search(section)
            go_function = go_function_pattern.findall(section)
            product = product_pattern.search(section)
            protein_id = protein_id_pattern.search(section)
                # 将提取的信息添加到字典中
            gene_data = {
                'Gene_name': gene_name.group(1) if gene_name else '',
                'Locus_tag': locus_tag.group(1) if locus_tag else '',
                'EC_number': ec_number.group(1) if ec_number else '',
                'GO_function': ', '.join(go_function) if go_function else '',
                'Product': product.group(1) if product else '',
                'Protein_id': protein_id.group(1) if protein_id else ''
            }
                
            genes_data.append(gene_data)

    return genes_data

# 读取FASTA文件路径
file_path = '1.Replace genes in cg14067_rxn/genomic.gbff'  # 替换为你的FASTA文件路径

# 提取基因数据
genes_data = read_fasta(file_path)

# 如果没有数据，打印错误提示
if not genes_data:
    print("未找到符合要求的基因信息，请检查文件格式或内容。")
else:
    # 将数据转换为DataFrame
    df = pd.DataFrame(genes_data)

    # 将结果保存到Excel文件
    output_file = '2.Expand GEM/14067gene.xlsx'
    df.to_excel(output_file, index=False)

    print(f"数据已成功提取并保存到 {output_file}")


# 从coryneRgeNet7 中爬取与13032的同源信息

In [None]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.edge.options import Options

# 初始化Edge WebDriver
options = Options()
options.add_argument("--headless")  # 如果你不希望打开浏览器窗口，取消注释此行
driver = webdriver.Edge(options=options)

# 读取xlsx文件，提取locusTag列
xlsx_file = '2.Expand GEM/14067gene.xlsx'  # 替换为你的文件路径
df = pd.read_excel(xlsx_file)

# 创建一个新列来存储爬取的内容
df['Predicted Genes'] = None

# 逐行遍历xlsx文件的locusTag进行爬取
for index, row in df.iterrows():
    if row['Predicted Genes'] is None:
        locusTag = row['locus_tag']  # 假设locusTag列的名字是'locusTag'
        
        # 打开目标网页
        url = f"https://exbio.wzw.tum.de/coryneregnet/geneInfo.htm?locusTag={locusTag}&type=predicted"
        driver.get(url)
        time.sleep(2)  # 等待页面加载
        
        # 点击指定的按钮
        try:
            button = driver.find_element(By.XPATH, '/html/body/div[2]/ul/li[3]/a')  # 点击目标按钮
            button.click()
            time.sleep(2)  # 等待新页面加载
        except Exception as e:
            print(f"Error clicking button for locusTag {locusTag}: {e}")
            continue  # 如果找不到按钮，跳过当前循环
    
        # 定位到输入框并输入13032进行搜索
        try:
            input_box = driver.find_element(By.XPATH, '//*[@id="homologous-table_filter"]/label/input')
            input_box.clear()
            input_box.send_keys('13032')
            input_box.send_keys(Keys.RETURN)  # 模拟回车
            time.sleep(2)  # 等待搜索结果加载
            
            # 获取搜索结果的链接
            result = driver.find_element(By.XPATH, '//*[@id="homologous-table"]/tbody/tr/td[1]/span/a')
            gene_link = result.get_attribute('href')
            print(gene_link)
            df.at[index, 'Predicted Genes'] = gene_link
        except Exception as e:
            df.at[index, 'Predicted Genes'] = None
            print(f"Error for locusTag {locusTag}: {e}")

# 关闭浏览器
driver.quit()

# 保存结果到新的xlsx文件
output_file = '2.Expand GEM/14067map13032.xlsx'  # 替换为你希望保存的文件路径
df.to_excel(output_file, index=False)

print(f"爬取完成，结果已保存到 {output_file}")


In [None]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.edge.options import Options

# 初始化Edge WebDriver
options = Options()
options.add_argument("--headless")  # 如果你不希望打开浏览器窗口，取消注释此行
driver = webdriver.Edge(options=options)

# 读取xlsx文件，提取locusTag列
xlsx_file = '14067map13032_1.xlsx'  # 替换为你的文件路径
df = pd.read_excel(xlsx_file)

# 找到Predicted Genes列为空的行
empty_predicted_genes = df[df['Predicted Genes'].isna()]

# 逐行遍历Predicted Genes列为空的行
for index, row in empty_predicted_genes.iterrows():
    locusTag = row['locus_tag']  # 假设locusTag列的名字是'locusTag'
    
    # 打开目标网页
    url = f"https://exbio.wzw.tum.de/coryneregnet/geneInfo.htm?locusTag={locusTag}&type=predicted"
    driver.get(url)
    time.sleep(2)  # 等待页面加载
    
    # 点击指定的按钮
    try:
        button = driver.find_element(By.XPATH, '/html/body/div[2]/ul/li[3]/a')  # 点击目标按钮
        button.click()
        time.sleep(2)  # 等待新页面加载
    except Exception as e:
        print(f"Error clicking button for locusTag {locusTag}: {e}")
        continue  # 如果找不到按钮，跳过当前循环

    # 定位到输入框并输入13032进行搜索
    try:
        input_box = driver.find_element(By.XPATH, '//*[@id="homologous-table_filter"]/label/input')
        input_box.clear()
        input_box.send_keys('13032')
        input_box.send_keys(Keys.RETURN)  # 模拟回车
        time.sleep(2)  # 等待搜索结果加载
        
        # 获取搜索结果的链接
        result = driver.find_element(By.XPATH, '//*[@id="homologous-table"]/tbody/tr/td[1]/span/a')
        gene_link = result.get_attribute('href')
        print(gene_link)
        df.at[index, 'Predicted Genes'] = gene_link  # 更新 Predicted Genes 列
    except Exception as e:
        df.at[index, 'Predicted Genes'] = None
        print(f"Error for locusTag {locusTag}: {e}")

# 关闭浏览器
driver.quit()

# 保存结果到新的xlsx文件
output_file = '2.Expand GEM/14067map13032.xlsx'  # 替换为你希望保存的文件路径
df.to_excel(output_file, index=False)

print(f"爬取完成，结果已保存到 {output_file}")


手动处理不明确的EC号

# 从KEGG获取有关cg的ec信息

In [85]:
import pandas as pd
import time
# 设置请求的重试机制
def create_session():
    session = requests.Session()
    retry = Retry(
        total=3,  # 设置最多重试次数
        backoff_factor=1,  # 设置退避因子，重试间隔会指数增长
        status_forcelist=[500, 502, 503, 504]  # 针对这些HTTP错误进行重试
    )
    adapter = HTTPAdapter(max_retries=retry)
    session.mount('https://', adapter)
    session.mount('http://', adapter)
    return session

# 获取基因详细信息
def get_gene_info(gene_id, session):
    # 构建KEGG API的URL，使用get操作获取基因详细信息
    url = f"https://rest.kegg.jp/get/cgb:{gene_id}"
    # 发送GET请求
    response = session.get(url)
    # 检查响应状态码
    if response.status_code == 200:
        # 获取响应文本
        text = response.text
        # 按行分割文本，并取前50行
        lines = text.splitlines()
        # 只返回前50行数据
        return "\n".join(lines[:50])
    else:
        return "Failed to retrieve gene information."

# 提取EC号
def getEC(gene_info):
    ec_pattern = re.compile(r'\[EC:([^\]]+)\]')
    matches = ec_pattern.findall(gene_info)
    return matches

# 读取Excel文件
df = pd.read_excel('14067map13032.xlsx')
# 创建一个会话对象
session = create_session()

# 遍历基因列，查询KEGG并添加EC号
for index, row in df.iterrows():
    gene = row['Predicted Genes']
    ec_number = row['EC_number']
    print(gene, ec_number)  # 获取现有的EC号
    if pd.notnull(gene) and pd.isnull(ec_number):
        gene_info = get_gene_info(gene, session)  # 修正为调用 get_gene_info
        if gene_info != "Failed to retrieve gene information.":
            time.sleep(2)  # 控制请求间隔为2秒，避免过于频繁
            new_ec_number = getEC(gene_info)
            if new_ec_number:
                df.at[index, 'EC_number'] = ', '.join(new_ec_number)
                print(f"替换: {gene} -> {new_ec_number}")
            else:
                print(f"{gene} 没有找到EC号")
        else:
            print(f"{gene} 查询失败或没有结果")

# 保存修改后的Excel文件
df.to_excel('14067map13032_NU.xlsx', index=False)

print('EC号已成功添加并保存到 14067map13032_NU.xlsx')


cg0001 nan
cg0001 没有找到EC号
cg0004 2.7.7.7
cg0005 nan
cg0005 没有找到EC号
cg0006 nan
cg0006 没有找到EC号
cg0007 5.6.2.2
cg0008 nan
cg0008 没有找到EC号
cg0009 nan
cg0009 没有找到EC号
cg0010 nan
cg0010 没有找到EC号
cg0012 nan
cg0012 没有找到EC号
cg0013 nan
cg0013 没有找到EC号
cg0014 nan
cg0014 没有找到EC号
cg0015 5.6.2.2
cg0016 nan
cg0016 没有找到EC号
nan nan
nan nan
cg0018 nan
cg0018 没有找到EC号
cg0019 nan
cg0019 没有找到EC号
cg0021 nan
cg0021 没有找到EC号
cg0025 nan
cg0025 没有找到EC号
cg0026 nan
cg0026 没有找到EC号
cg0027 nan
cg0027 没有找到EC号
cg0029 nan
cg0029 没有找到EC号
nan nan
nan nan
nan nan
nan nan
nan nan
nan nan
nan nan
nan nan
nan nan
nan nan
nan nan
nan nan
nan nan
nan nan
nan nan
nan nan
nan nan
cg0419 nan
cg0419 没有找到EC号
nan nan
nan nan
nan nan
nan nan
cg0038 nan
替换: cg0038 -> ['1.11.1.28']
cg0040 nan
cg0040 没有找到EC号
cg0041 nan
cg0041 没有找到EC号
cg0042 nan
cg0042 没有找到EC号
cg0043 nan
cg0043 没有找到EC号
cg0044 nan
cg0044 没有找到EC号
cg0045 nan
cg0045 没有找到EC号
cg0046 nan
替换: cg0046 -> ['7.5.2.-']
cg0047 nan
cg0047 没有找到EC号
cg0048 5.2.1.8
cg0049 nan
cg0049 没有找到EC号
cg00

# 进一步将CLEAN 和 ECRECer 预测相同的EC号补充进入表格

In [86]:
import pandas as pd

# 读取TSV文件，假设格式为protein_id号和EC号
tsv_file = '2.Expand GEM/利用CLEAN和ECRECer预测的EC号/SAME_EC.csv'  # 输入你的TSV文件路径
tsv_df = pd.read_csv(tsv_file, sep=',', header=None, names=['Protein_id', 'EC_number'])

# 读取Excel文件，假设有'protein_id'列需要匹配并填充'EC Number'列
excel_file = '2.Expand GEM/14067map13032_EC1.xlsx'  # 输入你的Excel文件路径
df = pd.read_excel(excel_file)

# 清理数据：去除多余的空格并统一大小写
tsv_df['Protein_id'] = tsv_df['Protein_id'].str.strip().str.upper()  # 统一去空格和大小写
df['Protein_id'] = df['Protein_id'].str.strip().str.upper()  # 同样清理Excel中的AST列

# 创建protein_id到EC号的映射字典
ast_to_ec = dict(zip(tsv_df['Protein_id'], tsv_df['EC_number']))
print(ast_to_ec)
# 遍历Excel文件中的'protein_id'列，查找并填充相应的EC号，只对EC号为空的行进行映射
def match_ec(row):
    if pd.isnull(row['EC_number']):  # 仅当EC Number为空时才进行映射
        ast_value = row['Protein_id']
        ec_number = ast_to_ec.get(ast_value, None)  # 如果找不到匹配的EC号，则返回None
        if ec_number:  # 如果找到了EC号，就进行替换
            print(f"替换 protein_id: {ast_value} -> 新的 EC_number: {ec_number}")
        return ec_number
    else:
        return row['EC_number']  # 如果已有EC号，则保持不变

# 将EC号添加到'EC Number'列
df['EC_number'] = df.apply(match_ec, axis=1)

# 保存修改后的Excel文件
output_file = '2.Expand GEM/14067map13032_EC2.xlsx'
df.to_excel(output_file, index=False)

print(f'EC号已成功添加并保存为 {output_file}')


{'PROTEIN_ID': 'EC_number', 'WP_004567953.1': '4.2.1.20', 'WP_003854900.1': '1.1.1.3', 'WP_003862609.1': '4.2.1.51', 'WP_003862250.1': '1.2.1.12', 'WP_003862252.1': '2.7.2.3', 'WP_170844394.1': '1.4.1.4', 'WP_003862033.1': '4.3.1.19', 'WP_003861135.1': '2.8.1.6', 'WP_003863349.1': '1.2.1.11', 'WP_003859378.1': '4.1.3.1', 'WP_003854117.1': '1.1.1.86', 'WP_003862874.1': '2.7.2.1', 'WP_003856413.1': '4.3.2.10', 'WP_003858678.1': '6.3.4.5', 'WP_003855910.1': '2.7.6.5', 'WP_011897259.1': '3.6.1.31', 'WP_003860435.1': '4.2.1.10', 'WP_003857183.1': '4.1.1.11', 'WP_003859897.1': '3.6.4.12', 'WP_003856418.1': '5.3.1.16', 'WP_003861983.1': '4.2.1.19', 'WP_006284182.1': '2.7.4.22', 'WP_003856756.1': '4.2.1.11', 'WP_003859294.1': '2.7.4.6', 'WP_003858849.1': '1.1.1.94', 'WP_003862850.1': '2.4.2.10', 'WP_003862229.1': '2.1.2.9', 'WP_003856127.1': '3.6.1.27', 'WP_003856281.1': '6.3.4.2', 'WP_004568094.1': '1.17.7.4', 'WP_003860361.1': '2.1.1.199', 'WP_003862210.1': '2.1.3.2', 'WP_003860060.1': '2.7.

# 根据EC号获取相应的R{5}反应名称

In [87]:
import pandas as pd
import requests
import re
import time
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry


# 设置请求的重试机制
def create_session():
    session = requests.Session()
    retry = Retry(
        total=3,  # 设置最多重试次数
        backoff_factor=1,  # 设置退避因子，重试间隔会指数增长
        status_forcelist=[500, 502, 503, 504]  # 针对这些HTTP错误进行重试
    )
    adapter = HTTPAdapter(max_retries=retry)
    session.mount('https://', adapter)
    session.mount('http://', adapter)
    return session


def get_rxn_from_ec(ec_number, session):
    try:
        # 构造URL并发送请求
        url = f"https://rest.kegg.jp/get/{ec_number}"
        response = session.get(url)
        time.sleep(5)  # 控制请求频率，避免过于频繁请求被限制

        if response.status_code == 200:
            data = response.text.strip()
            start = data.find("ALL_REAC")
            end = data.find("SUBSTRATE")

            # 提取反应编号
            if start != -1 and end != -1:
                relevant_text = data[start:end].strip()
                rxn_numbers = re.findall(r'R\d+', relevant_text)
                print(f"EC: {ec_number} -> RXNs: {rxn_numbers}")
                return rxn_numbers
            else:
                print(f"No relevant text found for EC number: {ec_number}")
                return None
        else:
            print(f"Failed to retrieve data for EC number: {ec_number}, Status code: {response.status_code}")
            return None
    except requests.exceptions.RequestException as e:
        print(f"Request failed for EC number: {ec_number}, Error: {e}")
        return None

# 定义安全解析函数，确保只对非空字符串进行解析
def safe_eval(val):
    if isinstance(val, str) and val.strip():  # 确保是非空字符串
        try:
            return eval(val)  # 转换为列表
        except:
            return None  # 如果转换失败返回None
    return None  # 如果不是字符串或为空，返回None

# 读取Excel文件
file_path = '2.Expand GEM/14067map13032_EC2.xlsx'  # 替换为您的文件路径
df = pd.read_excel(file_path)

# 新增一列用于存储RXN号
df['KEGG Rxn number'] = None

# 创建会话对象
session = create_session()

# 使用循环处理每一行
for index, row in df.iterrows():
    ec_number = row['EC_number']
    if pd.notnull(ec_number):
        rxn_numbers = get_rxn_from_ec(ec_number, session)
        df.at[index, 'KEGG Rxn number'] = ', '.join(rxn_numbers) if rxn_numbers else None

# 保存结果到新的Excel文件
output_file_path = '2.Expand GEM/14067map13032_R1.xlsx'  # 输出文件的路径
df.to_excel(output_file_path, index=False)
print(f"Results saved to {output_file_path}")

EC: 2.7.7.7 -> RXNs: ['R00379', 'R00375', 'R00376', 'R00377', 'R00378', 'R11029']
No relevant text found for EC number: 5.6.2.2
No relevant text found for EC number: 5.6.2.2
EC: 1.11.1.28 -> RXNs: ['R12602', 'R12603']
Failed to retrieve data for EC number: 7.5.2.-, Status code: 404
EC: 5.2.1.8 -> RXNs: ['R04273']
No relevant text found for EC number: 3.4.21.105
No relevant text found for EC number: 7.2.2.17 7.2.2.-
EC: 2.7.11.1 -> RXNs: ['R00162', 'R03632']
EC: 2.7.11.1 -> RXNs: ['R00162', 'R03632']
EC: 3.1.3.16 -> RXNs: ['R00164']
EC: 1.2.1.16 1.2.1.79 1.2.1.20 -> RXNs: ['R00713', 'R00714']
EC: 3.1.2.6 -> RXNs: ['R04090', 'R01736']
No relevant text found for EC number: 2.7.13.3
EC: 2.8.1.6 -> RXNs: ['R01078']
EC: 3.5.4.1 3.5.4.21 -> RXNs: ['R00974', 'R01411']
EC: 2.3.1.286 -> RXNs: ['R10634', 'R10633']
EC: 3.1.1.3 -> RXNs: ['R01369', 'R02250', 'R02687', 'R02688', 'R05209']
EC: 3.1.1.3 -> RXNs: ['R01369', 'R02250', 'R02687', 'R02688', 'R05209']
EC: 3.5.1.5 -> RXNs: ['R00131', 'R06134']

针对多个酶号的进行手动管理获取R{5}，并分开R

In [89]:
import pandas as pd

# 定义安全解析函数，将逗号分隔的字符串转换为列表
def safe_eval(val):
    if isinstance(val, str) and val.strip():  # 如果是非空字符串
        try:
            # 将逗号分隔的字符串转换为列表，并去除空格
            return [x.strip() for x in val.split(',')]
        except:
            return None  # 如果转换失败返回None
    elif isinstance(val, (int, float)):  # 如果是数字类型，直接转为单元素列表
        return [val]
    return None  # 如果是空值，返回None

# 读取数据
df = pd.read_excel('14067map13032_R1.xlsx')

# 应用 safe_eval 并过滤掉返回 None 的行
df['KEGG Rxn number'] = df['KEGG Rxn number'].apply(safe_eval)
df = df[df['KEGG Rxn number'].notna()]  # 只保留有值的行

# 使用 explode() 将 "KEGG Rxn number" 列展开成多行
df_exploded = df.explode('KEGG Rxn number', ignore_index=True)

# 清除空值行
df_exploded = df_exploded.dropna(subset=['KEGG Rxn number'])

# 保存结果到新的Excel文件
output_exploded_file = '2.Expand GEM/14067map13032_R2.xlsx'
df_exploded.to_excel(output_exploded_file, index=False)

print(f"已完成处理并保存到 {output_exploded_file} 文件")


已完成处理并保存到 14067map13032_R2.xlsx 文件


# 根据R{5}匹配modelseed rxn

In [92]:
import pandas as pd

# 读取xlsx文件
xlsx_file = '2.Expand GEM/14067map13032_R2.xlsx'  # 请替换为您的xlsx文件路径
df_xlsx = pd.read_excel(xlsx_file)

# 读取tsv文件
tsv_file = '4. add linker/modelseed_reactions.tsv'  # 请替换为您的tsv文件路径
df_tsv = pd.read_csv(tsv_file, sep='\t')

# 筛选出status列为OK的记录
df_tsv_ok = df_tsv[df_tsv['status'] == 'OK']

# 根据abbreviation和KEGG Rxn number进行匹配
# 这里假设xlsx文件中的KEGG Rxn number列名为'KEGG_Rxn_number'
# 假设tsv文件中的abbreviation列名为'abbreviation'
merged_df = pd.merge(df_xlsx, df_tsv_ok, left_on='KEGG Rxn number', right_on='abbreviation', how='left')

# 处理deltaG数据，将deltag和deltagerr拼接成新的列
merged_df['deltaG'] = merged_df['deltag'].astype(str) + '±' + merged_df['deltagerr'].astype(str) + ' (kcal/mol)'

# 选择需要的列，并重新排列
final_columns = df_xlsx.columns.tolist() + ['id', 'name', 'equation', 'definition', 'pathways', 'deltaG']
merged_df = merged_df[final_columns]
# 输出为新的xlsx文件
output_file = '14067map13032_rxn.xlsx'  # 请替换为您希望保存的输出文件路径
merged_df.to_excel(output_file, index=False)

print(f"合并完成，结果已保存到 {output_file}")


合并完成，结果已保存到 14067map13032_rxn.xlsx


# 将相同的反应合并起来

In [93]:
import pandas as pd

# 读取xlsx文件
xlsx_file = '2.Expand GEM/14067map13032_rxn1.xlsx'  # 请替换为您的xlsx文件路径
df = pd.read_excel(xlsx_file)

# 删除id列为空的行
df_cleaned = df.dropna(subset=['id'])

# 按id列分组，将同一个id的行合并
def merge_values(series):
    # 对于不同的值，用'//'连接
    return '//'.join(series.astype(str).unique())

# 对所有列进行处理，将同一id下不同的值合并
df_merged = df_cleaned.groupby('id', as_index=False).agg(merge_values)

# 输出合并后的文件
output_file = '2.Expand GEM/14067map13032_rxn2.xlsx'  # 请替换为您希望保存的输出文件路径
df_merged.to_excel(output_file, index=False)

print(f"数据处理完成，结果已保存到 {output_file}")


数据处理完成，结果已保存到 14067map13032_rxn2.xlsx


手动处理列表中的GPR

# 将检索到的rxn添加到模型中，扩展GEM

In [94]:
import pandas as pd

# 读取原始xlsx文件
original_file = '2.Expand GEM/14067map13032_rxn2.xlsx'  # 替换为你的文件路径
df_original = pd.read_excel(original_file)

# 提取相关列，并修改ID格式
df_original = df_original[['id', 'locus_tag', 'name', 'equation', 'definition']]
df_original['id'] = df_original['id'].apply(lambda x: f"{x}_c0" if isinstance(x, str) and x.startswith('rxn') else x)

# 读取14067_rxn_1.xlsx文件
rxn_file = '2.Expand GEM/14067_rxn_1.xlsx'  # 替换为你的文件路径
df_rxn = pd.read_excel(rxn_file)

# 将ID列中的数据转换为字符串并检查哪些ID在原文件中不存在
missing_rxns = df_original[~df_original['id'].isin(df_rxn['ID'].astype(str))]

# 将缺失的rxn添加到14067_rxn_1.xlsx中
if not missing_rxns.empty:
    # 将没有匹配到的rxn的相关信息添加到14067_rxn_1文件
    df_rxn_updated = pd.concat([df_rxn, missing_rxns[['id', 'locus_tag', 'name', 'equation', 'definition']].rename(columns={
        'id': 'ID', 
        'locus_tag': 'Genes', 
        'name': 'Name', 
        'equation': 'Equation', 
        'definition': 'Definition'
    })], ignore_index=True)

    # 输出为新的14067_rxn_2.xlsx文件
    df_rxn_updated.to_excel('14067_rxn_2.xlsx', index=False)

    print("Missing rxn data added and saved as 14067_rxn_2.xlsx.")
else:
    print("No missing rxn data to add.")


Missing rxn data added and saved as 14067_rxn_2.xlsx.
