# MechanoGEPred Model Training

This notebook demonstrates the complete training process for the MechanoGEPred model.

## Overview

The model uses a Gradient Boosting Regressor to predict mechanical properties from gene expression profiles.

In [None]:
# Import required libraries
import numpy as np
import pandas as pd
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler
import joblib
import os
from pathlib import Path

print("Libraries imported successfully!")

# Change working directory
os.chdir("./MechanoGEPred")

Libraries imported successfully!


## Load Training Data

In [2]:
# Load training data
try:
    file_path = 'data/train_test_data.csv'
    if os.path.exists(file_path):
        print(f"Loading data from: {file_path}")
        data = pd.read_csv(file_path, index_col=0, header=0)
        print(f"Data loaded successfully! Shape: {data.shape}")
    else:
        print("Training data not found. Please ensure train_test_data.csv is available.")
        raise FileNotFoundError("Training data not found")
except FileNotFoundError:
    print("\nTo reproduce the training process, you need the training data.")
    

Loading data from: data/train_test_data.csv
Data loaded successfully! Shape: (104, 345)


## Prepare Data

In [3]:
# Extract features and target
X = data.drop('MechanicalProperty', axis=1)
y = data['MechanicalProperty']

print(f"Features: {X.shape[1]} genes, Samples: {X.shape[0]} cell lines")
print(f"Target range: {y.min():.3f} - {y.max():.3f}")

Features: 344 genes, Samples: 104 cell lines
Target range: 0.060 - 8.500


## Train Model

In [4]:
# Split data
seed = 1021
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=seed)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train model
model = GradientBoostingRegressor(random_state=seed)
model.fit(X_train_scaled, y_train)

print("Model training completed!")

Model training completed!


## Evaluate Model

In [5]:
# Make predictions
y_train_pred = model.predict(X_train_scaled)
y_test_pred = model.predict(X_test_scaled)

# Calculate metrics
r2_test = r2_score(y_test, y_test_pred)
mse_test = mean_squared_error(y_test, y_test_pred)

print(f"Test R²: {r2_test:.2f}")
print(f"Test MSE: {mse_test:.2f}")

Test R²: 0.37
Test MSE: 1.76


## Save Model

In [6]:
# Save model and scaler
joblib.dump(model, 'model/model.joblib')
joblib.dump(scaler, 'model/scaler.joblib')

print("Model and scaler saved successfully!")

Model and scaler saved successfully!
