In [41]:
from itertools import product
import os
import glob
import random

import numpy as np
import pandas as pd
from pathlib import Path


# function

In [47]:
def grid_args_generator(lr_range_ls,wd_range_ls,steps:str=False,steps_lr=10,steps_wd=10,mode:str='linspace'):
    '''
    description: grid search function
    args:
        lr_range_ls:list of tuples, every tuple is constructed in form of (lr_start,lr_end)
        wd_range_ls:list of tuples, in form that is similar to lr_range_ls
        steps: steps in linspace/logspace when generating seq of lr/wd,if assigned, steps_lr & steps_wd will be assigned the same as steps
    '''
    lr_ls = []
    wd_ls = []
    if steps:
        steps_lr = eval(steps)
        steps_wd = eval(steps)
    assert mode in ['linspace','logspace'], 'wrong space searching strategy,should be choosen from [linspace,logspace]'
    if mode == 'linspace':
        f = np.linspace
    if mode == 'logspace':
        f = np.logspace
    for lr_range in lr_range_ls:
        lr_ls += f(lr_range[0],lr_range[1],steps_lr,endpoint=False).tolist()
        
    if type(wd_range_ls) != list: #若固定wd，wd_range_ls为一固定数值
        wd_ls = [wd_range_ls]
    else:
        for wd_range in wd_range_ls:
            wd_ls += f(wd_range[0],wd_range[1],steps_wd,endpoint=False).tolist()

    hyperparameter_combs = [i for i in product(lr_ls,wd_ls)]
    return hyperparameter_combs

def random_args_generator(lr_range_ls, wd_range_ls, num_lr, num_wd, **kwargs):
    """
    description: random search function.

    Args:
        lr_range_ls (list): List of learning rate ranges, each element is a tuple (min, max)
        wd_range_ls (list): List of weight decay ranges, each element is a tuple (min, max)
        num_trials (int): Number of random trials

    Returns:
        list: A list of randomly sampled hyperparameter combinations (lr, wd)
    """
    random.seed(42)
    lr_ls = []
    wd_ls = []

    for lr_range in lr_range_ls:
        lr_min, lr_max = lr_range
        lr_ls += [random.uniform(lr_min, lr_max) for _ in range(num_lr)]

    for wd_range in wd_range_ls:
        wd_min, wd_max = wd_range
        wd_ls += [random.uniform(wd_min, wd_max) for _ in range(num_wd)]
    
    if kwargs:
        print('kwargs passed')
        hyperparameter_combs = [i for i in product(lr_ls,wd_ls,kwargs['dropout_patch'],kwargs['dropout_node'])]
    else:
        print('kwargs not passed')
        hyperparameter_combs = [i for i in product(lr_ls,wd_ls)]
    
    return hyperparameter_combs
    
def bash_file_generator(hyperparameter_combs,description,k=5,extractor='pretrained_resnet18',scale=10,feats_size=512,average=True,num_classes = 2,num_epoch=75,total_gpu=4,gpu_count=4,record='best',reg=False,reg_coef=0,warmup=False):
    '''
    same as above, use train_PtRes which read from pretrained Resnet18 extracted feature，with complete API
    update:
        folding out_files & sh files of one batch:
            out_file_saving_dir: /out/5_classifier/hyperparam_select_batch{run}
            sh_file_saving_dir: /processing/mil classifier/hyperparam_select_batch_{run}
        self_detecting if the saving files exists,makedir if not
        
    2023-5-14 update:
        Notice:!!!
        add arg:layer, 18 by default --- control a print info below & train_PtRes{layer}.py selection
            ***for PtRes18 features, train file is named as train_PtRes without layer info***
            ***newly formed sh file using batch3-6 function will not run correctly , train_PtRes.py need to be renamed as train_PtRes18.py***
    2023-5-15 update:
        add arg record = False
    2023-6-23 update:
        change arg record[str]: select from ['none','best','all']
        remove arg run, use arg description[str] instead:
            brief description of the trial, used as daughter folder name under folder <5-classifier>
        set Tmax as 100
    '''
    string = {}
    count = 1
    N = len(hyperparameter_combs)
    print(f'{N} hypermarameter combinations in total,run with {extractor} extracted features')
    for i in range(total_gpu):
        string[f'block{i+1}'] = ''
        
    #path detection    
    out_saving_dir = f'./train/training_outfiles/{description}'
    if not os.path.exists(out_saving_dir):
        os.makedirs(out_saving_dir) 
    sh_saving_dir = f'./train/training_details/{description}'
    if not os.path.exists(sh_saving_dir):
        os.makedirs(sh_saving_dir)
        
    for id,i in enumerate(hyperparameter_combs):
        if len(i) == 2:
            dropout_node=0
            dropout_patch=0
            string[f'block{count}'] += f'nohup python ../../train.py --description {description} --lr {i[0]} --weight_decay {i[1]} --Tmax {num_epoch} --extractor {extractor} --kfold {k} --scale {scale} --feats_size {feats_size} --average {average} --dropout_node {dropout_node} --dropout_patch {dropout_patch} --num_epochs {num_epoch} --num_classes {num_classes} --gpu_index {count%gpu_count} --record {record} --reg {reg} --reg_coef {reg_coef} --warmup {warmup}> ../../training_outfiles/{description}/train_tcga_{id}.out \n'
        elif len(i) == 4:
            string[f'block{count}'] += f'nohup python ../../train.py --description {description} --lr {i[0]} --weight_decay {i[1]} --Tmax {num_epoch} --extractor {extractor} --kfold {k} --scale {scale} --feats_size {feats_size} --average {average} --dropout_node {i[2]} --dropout_patch {i[3]} --num_epochs {num_epoch} --num_classes {num_classes} --gpu_index {count%gpu_count} --record {record} --reg {reg} --reg_coef {reg_coef} --warmup {warmup}> ../../training_outfiles/{description}/train_tcga_{id}.out \n'
        if (id+1)%(N//total_gpu+1) == 0:
            count +=1
    for i in range(total_gpu):
        with open(f'{sh_saving_dir}/{description}_{i+1}.sh','w') as f:
            f.write(string[f'block{i+1}'])
    print('file generated')

# 2024-1-25 train，新提取的patch（threshold = 100）,random search

## 10X, fold5
### pretrained resnet18，pretrained resnet50，ctranspath，retccl，

In [51]:
def batch1():
    lr_range_ls = [(1e-4,1e-3),(1e-5,1e-4),(1e-6,1e-5)]
    wd_range_ls = [(1e-5,1e-4),(1e-4,1e-3),(1e-6,1e-5)]
    hyperparameter_combs = random_args_generator(lr_range_ls,wd_range_ls,num_lr=3,num_wd=3)
    bash_file_generator(hyperparameter_combs,description='10X_5fold_retccl',k=5,scale=10,extractor='retccl_resnet50_2048',num_epoch=75,record='best',feats_size=2048,total_gpu=4,gpu_count=4,num_classes=1)
    bash_file_generator(hyperparameter_combs,description='20X_5fold_retccl',k=5,scale=20,extractor='retccl_resnet50_2048',num_epoch=75,record='best',feats_size=2048,total_gpu=4,gpu_count=4,num_classes=1)
    bash_file_generator(hyperparameter_combs,description='10X_5fold_pretrained_resnet18',k=5,scale=10,extractor='pretrained_resnet18',num_epoch=75,record='best',total_gpu=4,gpu_count=4,num_classes=1)
    bash_file_generator(hyperparameter_combs,description='20X_5fold_pretrained_resnet18',k=5,scale=20,extractor='pretrained_resnet18',num_epoch=75,record='best',total_gpu=4,gpu_count=4,num_classes=1)
    bash_file_generator(hyperparameter_combs,description='10X_5fold_pretrained_resnet50',k=5,scale=10,extractor='pretrained_resnet50',num_epoch=75,record='best',total_gpu=4,gpu_count=4,num_classes=1)
    bash_file_generator(hyperparameter_combs,description='20X_5fold_pretrained_resnet50',k=5,scale=20,extractor='pretrained_resnet50',num_epoch=75,record='best',total_gpu=4,gpu_count=4,num_classes=1)
    bash_file_generator(hyperparameter_combs,description='10X_5fold_cTransPath',k=5,scale=10,extractor='cTransPath',num_epoch=75,record='best',feats_size=768,total_gpu=4,gpu_count=4,num_classes=1)
    bash_file_generator(hyperparameter_combs,description='20X_5fold_cTransPath',k=5,scale=20,extractor='cTransPath',num_epoch=75,record='best',feats_size=768,total_gpu=4,gpu_count=4,num_classes=1)
    bash_file_generator(hyperparameter_combs,description='10X_5fold_TCGA_high',k=5,scale=10,extractor='TCGA_high',num_epoch=75,record='best',feats_size=256,total_gpu=4,gpu_count=4,num_classes=1)
    bash_file_generator(hyperparameter_combs,description='20X_5fold_TCGA_high',k=5,scale=20,extractor='TCGA_high',num_epoch=75,record='best',feats_size=256,total_gpu=4,gpu_count=4,num_classes=1)
    bash_file_generator(hyperparameter_combs,description='10X_5fold_TCGA_low',k=5,scale=10,extractor='TCGA_low',num_epoch=75,record='best',feats_size=256,total_gpu=4,gpu_count=4,num_classes=1)
    bash_file_generator(hyperparameter_combs,description='20X_5fold_TCGA_low',k=5,scale=20,extractor='TCGA_low',num_epoch=75,record='best',feats_size=256,total_gpu=4,gpu_count=4,num_classes=1)
    bash_file_generator(hyperparameter_combs,description='10X_5fold_c16_high',k=5,scale=10,extractor='c16_high',num_epoch=75,record='best',feats_size=256,total_gpu=4,gpu_count=4,num_classes=1)
    bash_file_generator(hyperparameter_combs,description='20X_5fold_c16_high',k=5,scale=20,extractor='c16_high',num_epoch=75,record='best',feats_size=256,total_gpu=4,gpu_count=4,num_classes=1)
batch1()

kwargs not passed
81 hypermarameter combinations in total,run with retccl_resnet50_2048 extracted features
file generated
81 hypermarameter combinations in total,run with retccl_resnet50_2048 extracted features
file generated
81 hypermarameter combinations in total,run with pretrained_resnet18 extracted features
file generated
81 hypermarameter combinations in total,run with pretrained_resnet18 extracted features
file generated
81 hypermarameter combinations in total,run with pretrained_resnet50 extracted features
file generated
81 hypermarameter combinations in total,run with pretrained_resnet50 extracted features
file generated
81 hypermarameter combinations in total,run with cTransPath extracted features
file generated
81 hypermarameter combinations in total,run with cTransPath extracted features
file generated
81 hypermarameter combinations in total,run with TCGA_high extracted features
file generated
81 hypermarameter combinations in total,run with TCGA_high extracted features
fil