#### install the libraries

In [1]:
!pip install scikit-learn==1.5.2
!pip install scipy==1.14.1
!pip install joblib==1.4.2
!pip install numpy==2.0.2
!pip install pandas==2.2.3

Collecting scikit-learn==1.5.2
  Using cached scikit_learn-1.5.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Collecting numpy>=1.19.5 (from scikit-learn==1.5.2)
  Using cached numpy-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (62 kB)
Collecting scipy>=1.6.0 (from scikit-learn==1.5.2)
  Using cached scipy-1.14.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (60 kB)
Collecting joblib>=1.2.0 (from scikit-learn==1.5.2)
  Using cached joblib-1.4.2-py3-none-any.whl.metadata (5.4 kB)
Collecting threadpoolctl>=3.1.0 (from scikit-learn==1.5.2)
  Using cached threadpoolctl-3.5.0-py3-none-any.whl.metadata (13 kB)
Using cached scikit_learn-1.5.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.3 MB)
Using cached joblib-1.4.2-py3-none-any.whl (301 kB)
Using cached numpy-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.4 MB)
Using cached scipy-1.14.1-cp311-cp311-manylinux_2_17_x86_64

### In this tutorial, we use tumor samples from COAD in TCGA as an example to predict elastic modulus using MechanoGEPred.

#### load MechanoGEPred

In [2]:
import numpy as np
import pandas as pd
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import StandardScaler
import scipy
import joblib

# load model and scaler
model = joblib.load('/mnt/Storage/home/zhouxiaoyan/model/MechanoGEPred.joblib')
scaler = joblib.load('/mnt/Storage/home/zhouxiaoyan/model/scaler.joblib')
# read the file containing feature names and their order
mechanosensitive_gene_df = pd.read_csv('/mnt/Storage/home/zhouxiaoyan/example_data/mechanosensitive_name.csv',header=0)

mechanosensitive_gene_list = mechanosensitive_gene_df['mechanosensitive_gene'].tolist()

In [3]:
mechanosensitive_gene_list

['ENSG00000187608|ISG15',
 'ENSG00000142798|HSPG2',
 'ENSG00000070831|CDC42',
 'ENSG00000117632|STMN1',
 'ENSG00000253368|TRNP1',
 'ENSG00000090020|SLC9A1',
 'ENSG00000130766|SESN2',
 'ENSG00000175130|MARCKSL1',
 'ENSG00000162522|KIAA1522',
 'ENSG00000160094|ZNF362',
 'ENSG00000127603|MACF1',
 'ENSG00000157216|SSBP3',
 'ENSG00000177606|JUN',
 'ENSG00000142871|CCN1',
 'ENSG00000162692|VCAM1',
 'ENSG00000085491|SLC25A24',
 'ENSG00000168765|GSTM4',
 'ENSG00000134250|NOTCH2',
 'ENSG00000288825|H2AC18',
 'ENSG00000160789|LMNA',
 'ENSG00000117143|UAP1',
 'ENSG00000073756|PTGS2',
 'ENSG00000198625|MDM4',
 'ENSG00000196352|CD55',
 'ENSG00000082482|KCNK2',
 'ENSG00000092969|TGFB2',
 'ENSG00000162909|CAPN2',
 'ENSG00000196187|TMEM63A',
 'ENSG00000163050|COQ8A',
 'ENSG00000143632|ACTA1',
 'ENSG00000134318|ROCK2',
 'ENSG00000163029|SMC6',
 'ENSG00000143878|RHOB',
 'ENSG00000115129|TP53I3',
 'ENSG00000197329|PELI1',
 'ENSG00000138071|ACTR2',
 'ENSG00000163017|ACTG2',
 'ENSG00000115318|LOXL3',
 'ENS

#### predict the elastic modulus for tumor samples from COAD in TCGA

In [4]:
tcga_gene_expression = pd.read_csv('/mnt/Storage/home/zhouxiaoyan/example_data/example_TCGA_COAD.csv', sep=',', header=0)

# Reindex the bulk data to match the training data genes, filling missing values with 0
tcga_gene_expression_selected = tcga_gene_expression.reindex(columns=mechanosensitive_gene_list, fill_value=0)

tcga_gene_expression_scaled = scaler.transform(tcga_gene_expression_selected)

y_pred = model.predict(tcga_gene_expression_scaled)


In [5]:
tcga_gene_expression_selected

Unnamed: 0,ENSG00000187608|ISG15,ENSG00000142798|HSPG2,ENSG00000070831|CDC42,ENSG00000117632|STMN1,ENSG00000253368|TRNP1,ENSG00000090020|SLC9A1,ENSG00000130766|SESN2,ENSG00000175130|MARCKSL1,ENSG00000162522|KIAA1522,ENSG00000160094|ZNF362,...,ENSG00000196576|PLXNB2,ENSG00000147010|SH3KBP1,ENSG00000102265|TIMP1,ENSG00000015285|WAS,ENSG00000102024|PLS3,ENSG00000102081|FMR1,ENSG00000196924|FLNA,ENSG00000102119|EMD,ENSG00000130827|PLXNA3,ENSG00000164920|OSR2
0,111.323139,35.067831,182.143800,43.125441,9.558425,55.809260,36.077335,371.035381,128.444835,8.184816,...,114.595915,40.654756,515.577295,11.312534,40.841742,16.969035,133.745523,103.462237,7.458063,3.853723
1,31.168886,21.599403,187.540042,78.161852,1.959078,24.255049,20.800606,653.748515,96.916973,11.675186,...,111.873625,30.334387,436.099609,4.677432,48.636173,38.559231,39.992800,142.668965,23.695238,1.585449
2,161.663390,21.242979,159.293754,59.909107,3.891378,26.213669,18.846879,536.474948,106.396285,11.204307,...,139.554290,47.665101,362.433731,9.452102,57.779385,28.428212,203.261362,151.243344,28.700381,12.737729
3,63.993286,14.541301,201.014112,79.667995,7.709810,18.708414,11.866719,458.697944,57.090612,11.118523,...,77.094804,36.127833,274.170707,4.026578,32.349967,39.695870,104.607027,71.777832,10.363053,1.389526
4,407.670686,181.865723,206.279080,40.301146,25.909146,48.717153,20.182419,348.531560,63.144973,32.856656,...,149.042834,40.455372,1113.949923,42.433713,85.111095,18.453423,614.522515,72.930922,9.917370,6.258258
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
448,48.514347,14.821024,222.963506,127.733620,2.284226,53.845236,15.391835,455.104540,98.942980,14.397277,...,66.929078,22.834965,250.334199,1.074223,35.453873,13.060571,38.267433,100.271878,13.076425,0.770244
449,18.965223,40.809133,164.363133,75.615533,0.692932,31.574865,24.018662,378.046501,103.710448,16.364017,...,108.858018,22.786720,284.966130,1.999041,34.291797,29.664625,78.870226,143.766310,41.882704,0.801112
450,59.704721,20.856706,150.510273,106.996603,24.053707,17.370601,19.627010,714.992729,63.315798,24.115039,...,94.253579,36.328391,420.920756,3.389817,26.859028,47.576323,218.271110,118.092784,27.209085,6.427540
451,106.059445,8.997200,266.842890,117.563425,3.830358,14.823371,24.111400,645.953041,65.473503,10.679466,...,88.286879,17.350595,406.189969,3.178827,43.367601,22.722655,50.196400,79.247455,5.493996,4.262287


In [6]:
y_pred

array([1.31407064, 2.16586685, 2.01923911, 1.19248622, 1.61291145,
       1.343618  , 1.27730129, 1.12255639, 1.22810708, 1.05453439,
       1.63393211, 1.54943165, 1.15290251, 1.42419977, 2.39943933,
       2.38191276, 1.56971146, 2.2963401 , 2.41535884, 1.204438  ,
       1.11916767, 2.35958438, 1.19042196, 1.30661018, 1.00288586,
       1.3728508 , 1.7519927 , 1.16140659, 1.2618107 , 1.4054436 ,
       1.28387987, 1.21656556, 1.20661147, 1.12602859, 1.10473862,
       1.06283656, 0.86336717, 1.12988753, 1.32499443, 1.37455847,
       2.64899587, 1.43324622, 1.5651828 , 1.2877839 , 1.29013381,
       1.33634177, 1.16958869, 1.34387936, 2.49297579, 0.82568332,
       1.21925983, 1.39727401, 1.8449982 , 1.18690525, 1.65556698,
       1.69042093, 1.22423023, 1.44580061, 1.40641196, 1.19446976,
       1.1429018 , 1.20974927, 1.29091302, 1.26755971, 0.99935082,
       2.20629125, 1.26060017, 1.78825066, 1.35084136, 1.37846169,
       1.1283408 , 1.30711272, 1.16839691, 1.22508538, 1.42168