In [1]:
# import libraries 
import mineapy
from cobra.io import load_matlab_model,load_json_model
import pandas as pd
from mineapy.core.taskEnrich import TaskEnrichment
from mineapy.core.thermo_model import ThermoModel_WithoutInfo
from mineapy.core.rxnExp import ReactionExp
import sys
import os
import pickle
# Add the path to the tutorials directory to sys.path
sys.path.append(os.path.abspath(os.path.join('..')))

#print(os.path.abspath(os.path.join('..')))

from data_utility import getLiverStage

Set parameter Username
Academic license - for non-commercial use only - expires 2025-08-20


## Get gene expression data at exo-erythrocytic stage for plasmodium beghei
To understand the enriched minimal network at the exo-erythrocytic stage, data is sourced from this [publication](https://malariajournal.biomedcentral.com/articles/10.1186/s12936-019-2968-7).
- Caldelari R, Dogga S, Schmid MW, Franke-Fayard B, Janse CJ, Soldati-Favre D, Heussler V. Transcriptome analysis of Plasmodium berghei during exo-erythrocytic development. Malar J. 2019 Sep 24;18(1):330. doi: 10.1186/s12936-019-2968-7. PMID: 31551073; PMCID: PMC6760107.


In [2]:
# This function load the gene expression data
df=getLiverStage()
print(df.head())

          Gene ID                            Product Description  EEF_54h_A  \
0  PBANKA_0100061                                  fam-c protein         39   
1  PBANKA_0100200  Plasmodium exported protein, unknown function        189   
2  PBANKA_0100700  Plasmodium exported protein, unknown function        201   
3  PBANKA_0100800                  zinc finger protein, putative        242   
4  PBANKA_0100900           ATP-dependent RNA helicase, putative        770   

   EEF_54h_B  RPKM_EEF_54h_A  RPKM_EEF_54h_B     Mean_RPKM  
0         16    4.187801e+07    4.129695e+07  4.158748e+07  
1         79    8.129760e+07    8.168072e+07  8.148916e+07  
2        156    9.182950e+07    1.713118e+08  1.315707e+08  
3        110    2.574158e+07    2.812478e+07  2.693318e+07  
4        321    8.706738e+07    8.724616e+07  8.715677e+07  


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


## Get the metabolic model for plasmodium berghei
The MATLAB version of the metabolic model for Plasmodium berghei can be found in this [publication](https://pubmed.ncbi.nlm.nih.gov/31730853/).
- Stanway RR, Bushell E, Chiappino-Pepe A, Roques M, Sanderson T, Franke-Fayard B, Caldelari R, Golomingi M, Nyonda M, Pandey V, Schwach F, Chevalley S, Ramesar J, Metcalf T, Herd C, Burda PC, Rayner JC, Soldati-Favre D, Janse CJ, Hatzimanikatis V, Billker O, Heussler VT. Genome-Scale Identification of Essential Metabolic Processes for Targeting the Plasmodium Liver Stage. Cell. 2019 Nov 14;179(5):1112-1128.e26. doi: 10.1016/j.cell.2019.10.030. PMID: 31730853; PMCID: PMC6904910.

In [3]:
# load the metabolic model and Python version is saved in the models folder. 
ipbe_blood=pickle.load(open('../models/ipbeblood_py.pickle','rb'))
setattr(ipbe_blood, 'annotation',ipbe_blood._annotation)
for item in ipbe_blood.metabolites:
    setattr(item, 'annotation',item._annotation)
for item in ipbe_blood.reactions:
    setattr(item, 'annotation',item._annotation)
for item in ipbe_blood.genes:
    setattr(item, 'annotation',item._annotation)


## get biomass reaction 
biomass = ipbe_blood.reactions.get_by_id('biomass')
biomass.lower_bound=0
tfa_solution = ipbe_blood.optimize()
tfa_value = tfa_solution.objective_value
print('Solution found : {0:.5g}'.format(tfa_value))


# print genes
gene_ids=[g.id for g in ipbe_blood.genes]
print(gene_ids)

Read LP format model from file /var/folders/ty/spqw6k0n4dn1yvpgn_swvjm80000gn/T/tmpiw8c0b1q.lp
Reading time = 0.03 seconds
: 8651 rows, 9012 columns, 28182 nonzeros
Read LP format model from file /var/folders/ty/spqw6k0n4dn1yvpgn_swvjm80000gn/T/tmpu9ib1ywp.lp
Reading time = 0.01 seconds
: 1409 rows, 3136 columns, 11056 nonzeros
Solution found : 0.14107
['PBANKA_1014100', 'PBANKA_1024100', 'PBANKA_0416500', 'PBANKA_0942700', 'PBANKA_1446300', 'PBANKA_1140200', 'PBANKA_0609800', 'PBANKA_1344100', 'PBANKA_0908500', 'PBANKA_0710800', 'PBANKA_0818600', 'PBANKA_1014800', 'PBANKA_1444500', 'PBANKA_0312700', 'PBANKA_0514100', 'PBANKA_1009500', 'PBANKA_0824700', 'PBANKA_1023400', 'PBANKA_1143400', 'PBANKA_1317200', 'PBANKA_1207200', 'PBANKA_0202800', 'PBANKA_1307600', 'PBANKA_1406700', 'PBANKA_1142400', 'PBANKA_0104400', 'PBANKA_0202300', 'PBANKA_1213400', 'PBANKA_0823100', 'PBANKA_1344400', 'PBANKA_1210800', 'PBANKA_1419800', 'PBANKA_1333700', 'PBANKA_1419900', 'PBANKA_0607900', 'PBANKA_123560

## Get high- and low-expressed reactions form the gene expression data at the exo-erythrocytic stage.

In [4]:
# A value of high_cutoff=0.15 indicates that the top 15% of highly expressed genes.
# and the bottom 15% of lowly expressed genes will be selected when low_cutoff=0.15.
gene_exp={'gene_id':df['Gene ID'].to_list(),'exp_val':df['Mean_RPKM'].to_list(),'high_cutoff':0.15,'low_cutoff':0.15}

exp_analysis=ReactionExp(ipbe_blood,gene_exp=gene_exp)

#params_rxns={'up_rxns':reg_analysis.up_rxns,'down_rxns':reg_analysis.down_rxns}
params_rxns={'high_rxns':exp_analysis.high_rxns,'low_rxns':exp_analysis.low_rxns}



2024-12-10 09:43:10,024 - expression_logger - INFO - start analysis of context (tissue-specific).....


 gene value is not found or nan for the reaction = R00243_c
why or_vals are empty
 gene value is not found or nan for the reaction = R01057_c
 gene value is not found or nan for the reaction = R08639_c
 gene value is not found or nan for the reaction = R00756_c
 gene value is not found or nan for the reaction = R01195_m
 gene value is not found or nan for the reaction = R10159_m
why or_vals are empty
 gene value is not found or nan for the reaction = R02163_m
why or_vals are empty
 gene value is not found or nan for the reaction = R02161_m
why or_vals are empty
 gene value is not found or nan for the reaction = R00086_m
why or_vals are empty
 gene value is not found or nan for the reaction = R02971_c
 gene value is not found or nan for the reaction = R03018_c
 gene value is not found or nan for the reaction = R04391_c
 gene value is not found or nan for the reaction = R02749_c
 gene value is not found or nan for the reaction = Tc2eC00831_c
why or_vals are empty
 gene value is not found

In [5]:
sol=ipbe_blood.optimize()
print(sol,type(ipbe_blood.solver))


<Solution 0.141 at 0x7f7ff99c1910> <class 'optlang.gurobi_interface.Model'>


## Load the MiNEA parameters and apply enrichment

In [None]:
path_to_params = path_to_params = '../input/Minea_parameter_ipbe.yaml'
task_enrich = TaskEnrichment(ipbe_blood,path_to_params,params_rxns)
task_enrich.run()

Opened parameters file
Read LP format model from file /var/folders/ty/spqw6k0n4dn1yvpgn_swvjm80000gn/T/tmpffoezoek.lp
Reading time = 0.01 seconds
: 1409 rows, 3136 columns, 11056 nonzeros


2024-12-10 09:43:16,564 - thermomodel_RelaxedModel tutorial_basics - INFO - # Model initialized with units kcal/mol and temperature 298.15 K

importing sympy.core.numbers with 'from sympy import *' has been
deprecated since SymPy 1.6. Use import sympy.core.numbers instead. See
https://github.com/sympy/sympy/issues/18245 for more info.

  deprecated_since_version="1.6").warn()


Read LP format model from file /var/folders/ty/spqw6k0n4dn1yvpgn_swvjm80000gn/T/tmpk8qg865b.lp
Reading time = 0.01 seconds
: 1409 rows, 3136 columns, 11056 nonzeros


2024-12-10 09:43:28,684 - thermomodel_RelaxedModel tutorial_basics - INFO - # Model initialized with units kcal/mol and temperature 298.15 K
2024-12-10 09:43:34,332 - thermomodel_RelaxedModel tutorial_basics - INFO - Setting minimal growth rate to 95% of the TFA solution
2024-12-10 09:43:34,714 - thermomodel_RelaxedModel tutorial_basics - INFO - Setting minimal growth rate to 0.1410658307210131
2024-12-10 09:43:34,715 - thermomodel_RelaxedModel tutorial_basics - INFO - Enumerating minmal networks ...


Timeout limit is 1000s


2024-12-10 09:43:42,214 - thermomodel_RelaxedModel tutorial_basics - INFO - # Model preparation starting...


Preparing metabolic tasks...


2024-12-10 09:43:44,619 - thermomodel_RelaxedModel tutorial_basics - INFO - # Model preparation done.
2024-12-10 09:43:44,619 - thermomodel_RelaxedModel tutorial_basics - INFO - # Model conversion starting...
2024-12-10 09:43:53,407 - thermomodel_RelaxedModel tutorial_basics - INFO - # Model conversion done.
2024-12-10 09:43:53,407 - thermomodel_RelaxedModel tutorial_basics - INFO - # Updating cobra_model variables...
2024-12-10 09:43:53,443 - thermomodel_RelaxedModel tutorial_basics - INFO - # cobra_model variables are up-to-date
met=2Fe2S_a:   0%|          | 0/86 [00:00<?, ?it/s]

Min network method detected: min+1
Produced 20.0 with 1086 reactions deactivated
Produced 20.0 with 1086 reactions deactivated
Produced 20.0 with 1086 reactions deactivated


met=2Fe2S_m:   1%|          | 1/86 [00:02<04:13,  2.98s/it]

Produced 20.0 with 882 reactions deactivated
Produced 20.0 with 882 reactions deactivated
Produced 20.0 with 882 reactions deactivated


met=4Fe4S_a:   2%|▏         | 2/86 [00:05<03:52,  2.77s/it]

Produced 10.0 with 760 reactions deactivated
Produced 10.0 with 760 reactions deactivated
Produced 10.0 with 760 reactions deactivated


met=4Fe4S_m:   3%|▎         | 3/86 [00:08<03:58,  2.88s/it]

Produced 16.666666666666668 with 1260 reactions deactivated
Produced 16.666666666666668 with 1260 reactions deactivated
Produced 16.666666666666668 with 1260 reactions deactivated


met=C00001_c:   5%|▍         | 4/86 [00:11<03:49,  2.80s/it]

Produced 470.3145509859155 with 4097 reactions deactivated
Produced 470.3145509859155 with 4097 reactions deactivated
Produced 470.3145509859155 with 4097 reactions deactivated


met=C00002_c:   6%|▌         | 5/86 [00:15<04:39,  3.45s/it]

Produced 0.007200000000000273 with 0 reactions deactivated
Produced 0.007200000000000273 with 0 reactions deactivated
Produced 0.007200000000000273 with 0 reactions deactivated


met=C00003_c:   7%|▋         | 6/86 [00:18<04:25,  3.32s/it]

Produced 0.007199999999999835 with 0 reactions deactivated
Produced 0.007199999999999835 with 0 reactions deactivated
Produced 0.007199999999999835 with 0 reactions deactivated


met=C00004_c:   8%|▊         | 7/86 [00:22<04:29,  3.41s/it]

Produced 0.0072000000000000015 with 0 reactions deactivated
Produced 0.0072000000000000015 with 0 reactions deactivated
Produced 0.0072000000000000015 with 0 reactions deactivated


met=C00005_c:   9%|▉         | 8/86 [00:25<04:25,  3.40s/it]

Produced 0.007200000000000007 with 0 reactions deactivated
Produced 0.007200000000000007 with 0 reactions deactivated
Produced 0.007200000000000007 with 0 reactions deactivated


met=C00006_c:  10%|█         | 9/86 [00:29<04:32,  3.54s/it]

Produced 0.007200000000000007 with 0 reactions deactivated
Produced 0.007200000000000007 with 0 reactions deactivated
Produced 0.007200000000000007 with 0 reactions deactivated


met=C00010_c:  12%|█▏        | 10/86 [00:32<04:19,  3.42s/it]

Produced 0.00720000000000005 with 0 reactions deactivated
Produced 0.00720000000000005 with 0 reactions deactivated
Produced 0.00720000000000005 with 0 reactions deactivated


met=C00013_c:  13%|█▎        | 11/86 [00:35<04:07,  3.30s/it]

Produced 60.0 with 538 reactions deactivated
Produced 60.0 with 538 reactions deactivated
Produced 60.0 with 538 reactions deactivated


met=C00014_c:  14%|█▍        | 12/86 [00:39<04:00,  3.26s/it]