In [0]:
import os,sys

os.environ.setdefault("OMP_NUM_THREADS", "1")
os.environ.setdefault("MKL_NUM_THREADS", "1")
os.environ.setdefault("OPENBLAS_NUM_THREADS", "1")

print("Thread limits set.")

In [0]:
%pip install sqlmodel


print("Thread limits set.")

In [0]:
raise_on_error = True   # --raise-on-error    
disable_dm = True     #  --disable-dm     
rf_n_jobs = 1           #  --rf-n-jobs 1   
cv_n_jobs = 1           #  --cv-n-jobs 1     
only_node = "Haematological malignancy"        #  --only-node "Haematological malignancy"   
prefilter_topk = 200    #  --prefilter-topk 50 
prefilter_scan_max = 5000   # --prefilter-scan-max 3000 
prefilter_chunk_size = 1000     # --prefilter-chunk-size 500

if disable_dm:
    os.environ["MCH_DISABLE_DM"] = "1"

os.environ["RF_N_JOBS"] = str(rf_n_jobs)
os.environ["CV_N_JOBS"] = str(cv_n_jobs)
os.environ["MCH_PREFILTER_TOPK"] = str(prefilter_topk)
os.environ["MCH_PREFILTER_SCAN_MAX"] = str(prefilter_scan_max)
os.environ["MCH_PREFILTER_CHUNK_SIZE"] = str(prefilter_chunk_size)
if only_node:
    os.environ["MCH_ONLY_NODE"] = only_node

print("Environment variables set.")

In [0]:
import polars as pl

try:
    test = pl.read_csv(
        "/Volumes/cb_prod/comp9300-9900-f18a-cake/9900-f18a-cake/data/mvalue_outputs_masked_subset_leukaemia_subsampled/MValue_polaris_pivot_0.csv"
    )
    print(f"Test CSV read. Shape: {test.height} rows, {test.width} columns")
    print(test.head(2))
except Exception as e:
    print(f"Initial data check failed (this is expected if you are using the old path): {e}")

In [0]:
import sys
import importlib.util
import os
from pathlib import Path

# --- Workaround: Attempt to clean up potential conflicting table definitions before loading ---

try:
    # Assume the conflicting table definition comes from mch.db.database_tables
    # Try to remove it from Python's module cache
    if 'mch.db.database_tables' in sys.modules:
        del sys.modules['mch.db.database_tables']
        print("üí° Module 'mch.db.database_tables' has been removed from the cache.")
        
    # If you can determine where the AnalysisSet table object itself is stored,
    # and that object has a MetaData attribute, you can try to clear it:
    # from mch.db.database_tables import Base # Assume the base class is Base
    # if hasattr(Base.metadata, 'tables') and 'zcc_analysis_set' in Base.metadata.tables:
    #     del Base.metadata.tables['zcc_analysis_set']
    #     print("üí° Conflicting table definition has been removed from MetaData.")

except Exception as e:
    print(f"Warning: Error occurred while attempting to clean up conflicts: {e}")
    pass # Ignore cleanup failure and continue trying to load dynamically

# --- Dynamic module loading (unchanged) ---
SOURCE_PATH = r"/Workspace/9900-f18a-cake/mt-method2/src/mch/models/train_logreg.py"
MODULE_NAME = "train_logreg_module" 

try:
    if not os.path.exists(SOURCE_PATH):
        raise FileNotFoundError(f"Source file not found at: {SOURCE_PATH}")
    
    spec = importlib.util.spec_from_file_location(MODULE_NAME, SOURCE_PATH)
    custom_module = importlib.util.module_from_spec(spec)
    sys.modules[MODULE_NAME] = custom_module 
    spec.loader.exec_module(custom_module)
    
    BatchModelTrainer = custom_module.BatchModelTrainer
    print(f"‚úÖ Successfully loaded module from absolute path: {SOURCE_PATH}")

except Exception as e:
    print(f"‚ùå Failed to dynamically load module: {e}")
    raise
# ----------------------------------------------


print("Starting training with Logistic Regression...")
trainer = BatchModelTrainer()
stats = trainer.train_all_models(raise_on_error=raise_on_error)

print("Training finished!")
display(stats)

# --- Verify if the model is loaded correctly ---
trained_model = trainer.models.get('Haematological malignancy')
if trained_model and hasattr(trained_model, 'named_steps'):
    model_type = trained_model.named_steps['modelGeneration']
    print(f"Model Type Actually Trained: {type(model_type)}")
# ------------------------------