In [1]:
import pandas as pd

mrna_data = pd.read_csv('./PDAC_data/mRNA_preprocessed.csv', index_col=0)
print(mrna_data.shape)
mrna_data.head(1)

(137, 1500)


Unnamed: 0,XIST,PNLIP,AMY2A,RNA18SN4,RNA18SN1,RNA18SN3,RNA18SN2,RNA18SN5,CELA2A,CPA1,...,RNF5P1,GIP,RIMBP2,PSG1,TRPM5,LOC105376382,DNASE2B,PIR-FIGF,LINC02490,FAM30A
C3L-00017,-0.796118,-1.774577,-1.69026,0.153086,0.153086,0.153086,0.153086,0.153086,-1.281799,-1.897223,...,-0.851949,-0.648965,0.832518,0.082587,1.624735,-2.602721,0.952308,-1.738352,-1.199231,-0.013946


In [2]:
proteome_data = pd.read_csv('./PDAC_data/proteome_preprocessed.csv', index_col=0)
print(proteome_data.shape)
proteome_data.head(1)

(137, 9644)


Unnamed: 0,A1BG,A1CF,A2M,A2ML1,AAAS,AACS,AADAC,AAGAB,AAK1,AAMDC,...,ZRANB2,ZRSR2,ZSCAN18,ZSWIM8,ZW10,ZWILCH,ZYG11B,ZYX,ZZEF1,ZZZ3
C3L-00017,0.185445,-0.592399,0.964998,-0.543458,-0.727576,1.512089,-0.475572,-0.361871,-0.994617,1.43181,...,0.322185,-0.670025,0.716651,0.246046,0.881191,-1.408678,0.26554,-1.48282,-0.977116,0.431648


In [3]:
scna_data = pd.read_csv('./PDAC_data/SCNA_preprocessed.csv', index_col=0)
print(scna_data.shape)
scna_data.head(1)

(137, 3000)


Unnamed: 0,IL3RA,VCY,UTY,USP9Y,DDX3Y,TMSB4Y,VCY1B,HSFY1,HSFY2,CDY2A,...,CCDC167,CCDC171,RAB40B,WDR45B,FOXK2,SH3GL2,ADAMTSL1,SAXO1,RRAGA,HAUS6
C3L-00017,-0.102157,2.132233,2.435571,2.435571,2.435571,2.435571,2.440122,2.441097,2.441097,2.441097,...,0.077383,-1.276814,-0.082938,-0.082938,-0.082938,-1.291565,-1.296635,-1.296729,-1.296729,-1.296729


In [4]:
data = [
    [mrna_data],      
    [proteome_data],  
    [scna_data]       
]

print("Data structure prepared:")
print(f"  View 0 (mRNA): {data[0][0].shape}")
print(f"  View 1 (Proteome): {data[1][0].shape}")
print(f"  View 2 (SCNA): {data[2][0].shape}")
print(f"  All views have {data[0][0].shape[0]} samples")

Data structure prepared:
  View 0 (mRNA): (137, 1500)
  View 1 (Proteome): (137, 9644)
  View 2 (SCNA): (137, 3000)
  All views have 137 samples


In [5]:
from mofapy2.run.entry_point import entry_point

ent = entry_point()

ent.set_data_options(
    scale_views=False  # Data is already normalized
)
views_names = ["mRNA", "Proteome", "SCNA"]
groups_names = ["PDAC"]
samples_names = [mrna_data.index.tolist()]  
features_names = [
    ["mrna_" + str(col) for col in mrna_data.columns.tolist()],
    ["proteome_" + str(col) for col in proteome_data.columns.tolist()],
    ["scna_" + str(col) for col in scna_data.columns.tolist()]
]
ent.set_data_matrix(
    data, 
    views_names=views_names, 
    groups_names=groups_names, 
    samples_names=samples_names,
    features_names=features_names
)
print("Data matrix set successfully!")


        #########################################################
        ###           __  __  ____  ______                    ### 
        ###          |  \/  |/ __ \|  ____/\    _             ### 
        ###          | \  / | |  | | |__ /  \ _| |_           ### 
        ###          | |\/| | |  | |  __/ /\ \_   _|          ###
        ###          | |  | | |__| | | / ____ \|_|            ###
        ###          |_|  |_|\____/|_|/_/    \_\              ###
        ###                                                   ### 
        ######################################################### 
       
 
        
Successfully loaded view='mRNA' group='PDAC' with N=137 samples and D=1500 features...
Successfully loaded view='Proteome' group='PDAC' with N=137 samples and D=9644 features...
Successfully loaded view='SCNA' group='PDAC' with N=137 samples and D=3000 features...


Data matrix set successfully!


In [6]:
ent.set_model_options(
    factors=10,               # Number of latent factors
    spikeslab_weights=True,   # Sparse feature weights
    ard_weights=True          # Automatic relevance determination
)

print("Model options set!")

ent.set_train_options(
    iter=1000,                    # Maximum iterations
    convergence_mode="medium",    # Convergence speed
    dropR2=0.001,                 # Drop factors explaining < 0.1% variance
    gpu_mode=False,               # Set True if GPU available
    seed=42,                      # Reproducibility
    verbose=True                  # Show training progress
)

print("Training options set!")

Model options:
- Automatic Relevance Determination prior on the factors: False
- Automatic Relevance Determination prior on the weights: True
- Spike-and-slab prior on the factors: False
- Spike-and-slab prior on the weights: True
Likelihoods:
- View 0 (mRNA): gaussian
- View 1 (Proteome): gaussian
- View 2 (SCNA): gaussian


Model options set!

Dropping factors with minimum threshold of 0.001% variance explained

Convergence mode: medium

Training options set!


In [7]:
print("Building MOFA model...")
ent.build()
print("Model built!")

Building MOFA model...
Model built!


In [8]:
print("\nTraining MOFA model... This may take a few minutes.")
ent.run()
print("Training complete!")


Training MOFA model... This may take a few minutes.


######################################
## Training the model with seed 42 ##
######################################


ELBO before training:
Z=-88.63  W=-182260.69  Tau=-75507.01  Y=-14251047.52  AlphaW=-919.77  ThetaW=0.00  
Total: -14509823.63

Iteration 1: time=0.47, ELBO=-2950084.79, deltaELBO=11559738.834 (79.66836215%), Factors=9
- ELBO decomposition:  Z=-2406.62  W=-119067.38  Tau=-107595.54  Y=-2720082.33  AlphaW=-932.93  ThetaW=0.00  
- Time spent in ELBO computation: 2.0%
- Variance explained:  View 0: 6.45%   View 1: 6.25%   View 2: 4.86%
- Fraction of zero weights:  View 0: 25%   View 1: 23%   View 2: 20%
- Maximum correlation between factors: 0.18
- Factor norms:  5.05 3.74 2.46 2.11 2.07 1.05 0.79 0.69 0.40
- Tau per view (average):  View 0: 1.05   View 1: 1.04   View 2: 1.03


Iteration 2: time=0.44, ELBO=-2747393.35, deltaELBO=202691.446 (1.39692564%), Factors=8
- ELBO decomposition:  Z=-3557.86  W=-170492.43  Tau=-1

In [10]:
outfile = "./PDAC_data/mofa_model.hdf5"
ent.save(outfile, save_data=True)
print(f"Model saved to {outfile}")

Saving model in ./PDAC_data/mofa_model.hdf5...
Model saved to ./PDAC_data/mofa_model.hdf5
