In [None]:
#Finds all the gi numbers to be uploaded on UniProt mapping and put them in a text file
import re

with open("/path", "r") as f:
    sbml_data = f.read()

gi_numbers = re.findall(r"http://identifiers.org/ncbigi/(\d+)", sbml_data)

with open("gi_numbers.txt", "w") as f:
    for gi_number in gi_numbers:
        f.write(gi_number + "\n")
print (len(gi_numbers))

In [None]:
#unique sequences
def read_fasta_file(file_path):
    sequences = {}
    with open(file_path, 'r') as f:
        header = None
        sequence = ''
        for line in f:
            line = line.strip()
            if line.startswith('>'):
                if sequence:
                    sequences[header] = sequence
                header = line.strip()[1:] 
                sequence = ''
            else:
                sequence += line.strip()
        if sequence: 
            sequences[header] = sequence
    return sequences


def find_unique_sequences(sequences):
    unique_seqs = {}
    for sequence in set(sequences.values()):
        headers = [header for header, seq in sequences.items() if seq == sequence]
        if len(headers) == 1:
            unique_seqs[headers[0]] = sequence
        else:
            ljungdahlii_headers = [h for h in headers if 'clostridium ljungdahlii' in h.lower()]
            if ljungdahlii_headers:
                unique_seqs[ljungdahlii_headers[0]] = sequence
            else:
                ljungdahlii_replicates = [h for h in headers if 'OS=Clostridium ljungdahlii' in h]
                if ljungdahlii_replicates:
                    unique_seqs[ljungdahlii_replicates[0]] = sequence
                else:
                    print(f"Multiple sequences found for:\n{sequence}\n")
                    for i, h in enumerate(headers):
                        print(f"{i+1}. {h}")
                    while True:
                        choice = input("Which sequence do you want to keep? (enter the number): ")
                        if choice.isdigit() and 1 <= int(choice) <= len(headers):
                            unique_seqs[headers[int(choice)-1]] = sequence
                            break
                        else:
                            print("Invalid choice. Please enter a valid number.")
    return unique_seqs


def save_sequences_to_file(sequences, file_path):
    with open(file_path, 'w') as f:
        for header, sequence in sequences.items():
            f.write('>' + header + '\n')
            f.write(sequence + '\n')



fasta_file = '/path'
sequences = read_fasta_file(fasta_file)
unique_seqs = find_unique_sequences(sequences)
output_file = 'unique_sequences.fasta'
save_sequences_to_file(unique_seqs, output_file)

In [None]:
#create updated model with addition of uniprot id 

import json
import libsbml

with open('/path', 'r') as f:
    mapping_data = json.load(f)

sbml_doc = libsbml.readSBML('/path')
model = sbml_doc.getModel()
fbc_plugin = model.getPlugin('fbc')

for gp in fbc_plugin.getListOfGeneProducts():
    gi = None
    num_children = gp.getAnnotation().getNumChildren()
    for i in range(num_children):
        child = gp.getAnnotation().getChild(i)
        if child.getName() == 'RDF':
            num_rdf_children = child.getNumChildren()
            for j in range(num_rdf_children):
                rdf_child = child.getChild(j)
                if rdf_child.getName() == 'Description':
                    bqbiol_is_child = rdf_child.getChild('is')
                    if bqbiol_is_child is not None:
                        bag_child = bqbiol_is_child.getChild('Bag')
                        if bag_child is not None:
                            for k in range(bag_child.getNumChildren()):
                                li_child = bag_child.getChild(k)
                                if li_child.getName() == 'li':
                                    resource = li_child.getAttributes().getValue('resource')
                                    if 'ncbigi' in resource:
                                        gi = resource.split('/')[-1]
                                        if gi:
                                            break
    if gi:
        uniprot_id = None
        for mapping in mapping_data['results']:
            if mapping['to'] == gi:
                uniprot_id = mapping['from']
                break
                
        if uniprot_id:
            rdf_child = None
            for child_index in range(gp.getAnnotation().getNumChildren()):
                child = gp.getAnnotation().getChild(child_index)
                for j in range(child.getNumChildren()):
                    rdf_child = child.getChild(j)
                    if rdf_child.getName() == 'Description':
                        bqbiol_is_child = rdf_child.getChild('is')
                        if bqbiol_is_child is not None:
                            bag_child = bqbiol_is_child.getChild('Bag')
                            if bag_child is not None:
                                li_child = libsbml.XMLNode(libsbml.XMLTriple("li", "", "rdf"), libsbml.XMLAttributes())
                                li_child.getAttributes().add(libsbml.XMLTriple("resource", "", "rdf"), "http://identifiers.org/uniprot/{}".format(uniprot_id))
                                bag_child.addChild(li_child)
                                break

libsbml.writeSBMLToFile(sbml_doc, 'updated_iHN637.xml')


                                

In [None]:
# Add the EC number to the reaction annotation
import libsbml
import pandas as pd


df = pd.read_excel('/path to file of manual curation', sheet_name='Foglio2')


document = libsbml.readSBML('updated_iHN637.xml')
model = document.getModel()


fbc_plugin = model.getPlugin("fbc")


for index, row in df.iterrows():
    
    bigg_id = row['BIGG ID']
    ec_number = row['EC number']
    bigg_id = "R_" + bigg_id
    

    fbc_obj = model.getReaction(bigg_id)
    if fbc_obj is not None:
        rdf_child = None
        for child_index in range(fbc_obj.getAnnotation().getNumChildren()):
            child = fbc_obj.getAnnotation().getChild(child_index)
            for j in range(child.getNumChildren()):
                rdf_child = child.getChild(j)
                if rdf_child.getName() == 'Description':
                    bqbiol_is_child = rdf_child.getChild('is')
                    if bqbiol_is_child is not None:
                        bag_child = bqbiol_is_child.getChild('Bag')
                        if bag_child is not None:
                            print(f"Bag content before update: {bag_child.toXMLString()}")
                            li_child = libsbml.XMLNode(libsbml.XMLTriple("li", "", "rdf"), libsbml.XMLAttributes())
                            li_child.getAttributes().add(libsbml.XMLTriple("resource", "", "rdf"), "http://identifiers.org/ec-code/{}".format(ec_number))
                            bag_child.addChild(li_child)
                            print(f"Bag content after update: {bag_child.toXMLString()}")
                            break


output_file = "updated_iHN637_EC.xml"
libsbml.writeSBML(document, output_file)
print("Updated SBML file saved as", output_file)

In [None]:
import json
import libsbml

sbml_file = "updated_iHN637_with_EC.xml"
output_file = "name_to_uniprot.json"

document = libsbml.readSBMLFromFile(sbml_file)
model = document.getModel()
fbc_plugin = model.getPlugin("fbc")


name_to_id = {}


for gp in fbc_plugin.getListOfGeneProducts():
    name = gp.getName()
    uniprot_id = None
    num_children = gp.getAnnotation().getNumChildren()
    for i in range(num_children):
        child = gp.getAnnotation().getChild(i)
        if child.getName() == 'RDF':
            num_rdf_children = child.getNumChildren()
            for j in range(num_rdf_children):
                rdf_child = child.getChild(j)
                if rdf_child.getName() == 'Description':
                    bqbiol_is_child = rdf_child.getChild('is')
                    if bqbiol_is_child is not None:
                        bag_child = bqbiol_is_child.getChild('Bag')
                        if bag_child is not None:
                            for k in range(bag_child.getNumChildren()):
                                li_child = bag_child.getChild(k)
                                if li_child.getName() == 'li':
                                    resource = li_child.getAttributes().getValue('resource')
                                    if 'uniprot' in resource:
                                        uniprot_id = resource.split('/')[-1]
                                        if uniprot_id:
                                            break
        if uniprot_id:
            name_to_id[name] = uniprot_id


with open(output_file, "w") as f:
    json.dump(name_to_id, f, indent=2)


In [None]:
import json

# Load the JSON files
with open('name_to_uniprot.json', 'r') as f1, 
     open('protein_mw.json', 'r') as f2:
    name_to_uniprot = json.load(f1)
    uniprot_to_mw = json.load(f2)

    # Create a new dictionary with name:mw pairs
name_to_mw = {}
for name, uniprot in name_to_uniprot.items():
    if uniprot in uniprot_to_mw:
        name_to_mw[name] = uniprot_to_mw[uniprot]

# Save the new dictionary as JSON
with open('name_to_mw.json', 'w') as f:
    json.dump(name_to_mw, f, indent=None)

In [None]:
import json

# Load your original JSON file
with open('name_to_mw.json', 'r') as f:
    original_data = json.load(f)

# Write formatted JSON to a new file
with open('clj_protein_id_mass_mapping', 'w') as f:
    json.dump(original_data, f, indent=None, separators=(",\n", ": "))

In [None]:
import cobra

In [None]:
from autopacmen.submodules.parse_bigg_metabolites_file import parse_bigg_metabolites_file
from autopacmen.submodules.create_combined_kcat_database import create_combined_kcat_database
from autopacmen.submodules.create_smoment_model_reaction_wise import create_smoment_model_reaction_wise_with_sbml
from autopacmen.submodules.get_initial_spreadsheets import get_initial_spreadsheets_with_sbml
from autopacmen.submodules.get_protein_mass_mapping import get_protein_mass_mapping_with_sbml
from autopacmen.submodules.get_reactions_kcat_mapping import get_reactions_kcat_mapping
from autopacmen.submodules.parse_bigg_metabolites_file import parse_bigg_metabolites_file
from autopacmen.submodules.parse_brenda_textfile import parse_brenda_textfile
from autopacmen.submodules.parse_brenda_json_for_model import parse_brenda_json_for_model
from autopacmen.submodules.parse_sabio_rk_for_model import parse_sabio_rk_for_model_with_sbml
from autopacmen.submodules.create_gecko_model_reaction_wise import create_gecko_model_reaction_wise_with_sbml

In [None]:
bigg_metabolites_file_path = ""
json_output_folder = "/"
parse_bigg_metabolites_file(bigg_metabolites_file_path, json_output_folder)

In [None]:
brenda_textfile_path = ""
bigg_metabolites_json_folder = ""
json_output_path = ""
parse_brenda_textfile(brenda_textfile_path,
                      bigg_metabolites_json_folder, json_output_path)

In [None]:
sbml_path = ""
brenda_json_path = ""
output_json_path = ""
parse_brenda_json_for_model(sbml_path, brenda_json_path, output_json_path)

In [None]:
sbml_path = ""
json_output_path = ""
bigg_id_name_mapping_path: str = ""
parse_sabio_rk_for_model_with_sbml(
    sbml_path, json_output_path, bigg_id_name_mapping_path)

In [None]:
sabio_rk_kcat_database_path = ""
brenda_kcat_database_path = ""
output_path = ""
create_combined_kcat_database(
    sabio_rk_kcat_database_path, brenda_kcat_database_path, output_path)

In [None]:
input_sbml = ""
project_folder = ""
project_name = "clj"
get_initial_spreadsheets_with_sbml(input_sbml, project_folder, project_name)

In [None]:
sbml_path = ""
project_folder = ""
project_name = "clj"
organism = "Clostridium ljungdahlii"
kcat_database_path = "/"
protein_kcat_database_path = "/"
get_reactions_kcat_mapping(sbml
                           organism, kcat_database_path, protein_kcat_database_path)


In [None]:
input_sbml = ""
output_sbml = ""
project_folder = ""
project_name = "clj"
excluded_reactions = [""]
create_smoment_model_reaction_wise_with_sbml(
    input_sbml, output_sbml, project_folder, project_name, excluded_reactions)

In [None]:
model_original = cobra.io.read_sbml_model(
    "")
model_smoment = cobra.io.read_sbml_model(
    "")

print("Original model:")
print(f"Number of reactions is {len(model_original.reactions)}")
print(f"Number of metabolites is {len(model_original.metabolites)}")

print("sMOMENT model:")
print(f"Number of reactions is {len(model_smoment.reactions)}")
print(f"Number of metabolites is {len(model_smoment.metabolites)}")
