In [1]:
import numpy as np
import pandas as pd
from itertools import product

# function

In [96]:
def args_generator(lr_range_ls,wd_range_ls,steps:str=False,steps_lr=10,steps_wd=10,mode:str='linspace'):
    '''
    args:
        lr_range_ls:list of tuples, every tuple is constructed in form of (lr_start,lr_end)
        wd_range_ls:list of tuples, in form that is similar to lr_range_ls
        steps: steps in linspace/logspace when generating seq of lr/wd,if assigned, steps_lr & steps_wd will be assigned the same as steps
    '''
    lr_ls = []
    wd_ls = []
    if steps:
        steps_lr = steps
        steps_wd = steps
    assert mode in ['linspace','logspace'], 'wrong space searching strategy,should be choose from [linspace,logspace]'
    if mode == 'linspace':
        for lr_range in lr_range_ls:
            lr_ls += np.linspace(lr_range[0],lr_range[1],steps_lr,endpoint=False).tolist()
        for wd_range in wd_range_ls:
            wd_ls += np.linspace(wd_range[0],wd_range[1],steps_wd,endpoint=False).tolist()
    else:
        for lr_range in lr_range_ls:
            lr_ls += np.logspace(lr_range[0],lr_range[1],steps_lr,endpoint=False).tolist()
        for wd_range in wd_range_ls:
            wd_ls += np.logspace(wd_range[0],wd_range[1],steps_wd,endpoint=False).tolist()
    hyperparameter_combs = [i for i in product(lr_ls,wd_ls)]
    return hyperparameter_combs

# need to be changed based on function: bash_file_generator_PtRes, or be combined into one and use an arg to switch
# only serve for generation of trial 1-3
def bash_file_generator(hyperparameter_combs,run,total_gpu=4,gpu_count=4):
    '''
    args:
        total_gpu: total gpu number you own, [8]
        gpu count: gpu number on single server, [4]
    command saving path:
        ./mil classifier/hyperparam_select_batch_{run}/hyperparam_select_batch_{run}_{i+1}.sh
    '''
    string = {}
    count = 1
    N = len(hyperparameter_combs)
    print(f'{N} hypermarameter combinations in total')
    
    #path operation
    out_saving_dir = f'../out/5_classifier/hyperparam_select_batch{run}'
    if not os.path.exists(out_saving_dir):
        os.makedirs(out_saving_dir) 
    sh_saving_dir = f'./mil classifier/hyperparam_select_batch_{run}'
    if not os.path.exists(sh_saving_dir):
        os.makedirs(sh_saving_dir)
        
    for i in range(total_gpu):
        string[f'block{i+1}'] = ''
    for id,i in enumerate(hyperparameter_combs):
        
        sh_saving_dir = f'../../out/5_classifier/hyperparam_select_batch{run}'
        if not os.path.exists(sh_saving_dir):
            os.makedirs(sh_saving_dir)
        
        string[f'block{count}'] += f'nohup python train_tcga.py --lr {i[0]} --weight_decay {i[1]} --gpu_index {count%gpu_count} --run {run} > ../../out/5_classifier/hyperparam_select_batch{run}/train_tcga_{id}.out \n'
        if (id+1)%(N/total_gpu) == 0:
            count +=1
    for i in range(total_gpu):
        with open(f'./mil classifier/hyperparam_select_batch_{run}/hyperparam_select_batch_{run}_{i+1}.sh','w') as f:
            f.write(string[f'block{i+1}'])
    print('file generated')
    
import os
def bash_file_generator_PtRes(hyperparameter_combs,run,total_gpu=4,gpu_count=4):
    '''
    same as above, use train_PtRes which read from pretrained Resnet18 extracted feature，with complete API
    upgrade:
        folding out_files & sh files of one batch:
            out_file_saving_dir: /out/5_classifier/hyperparam_select_batch{run}
            sh_file_saving_dir: /processing/mil classifier/hyperparam_select_batch{run}
        self_detecting if the saving files exists,makedir if not
    '''
    string = {}
    count = 1
    N = len(hyperparameter_combs)
    print(f'{N} hypermarameter combinations in total,run with pretrained Resnet18 extracted features')
    for i in range(total_gpu):
        string[f'block{i+1}'] = ''
        
    #path detection    
    out_saving_dir = f'../out/5_classifier/hyperparam_select_batch{run}'
    if not os.path.exists(out_saving_dir):
        os.makedirs(out_saving_dir) 
    sh_saving_dir = f'./mil classifier/hyperparam_select_batch_{run}'
    if not os.path.exists(sh_saving_dir):
        os.makedirs(sh_saving_dir)
        
    for id,i in enumerate(hyperparameter_combs):
        string[f'block{count}'] += f'nohup python ../train_PtRes.py --lr {i[0]} --weight_decay {i[1]} --gpu_index {count%gpu_count} --run {run} > ../../../out/5_classifier/hyperparam_select_batch{run}/train_tcga_{id}.out \n'
        if (id+1)%(N/total_gpu) == 0:
            count +=1
    for i in range(total_gpu):
        with open(f'./mil classifier/hyperparam_select_batch_{run}/hyperparam_select_batch_{run}_{i+1}.sh','w') as f:
            f.write(string[f'block{i+1}'])
    print('file generated')

# args generation for 1st trial

In [2]:
ls_lr = np.linspace(1e-4,1e-3,10,endpoint=False).tolist() + np.linspace(1e-5,1e-4,10,endpoint=False).tolist()
ls_weight_decay = np.linspace(1e-5,1e-4,10,endpoint=False).tolist() + np.linspace(1e-7,1e-6,10,endpoint=False).tolist() + np.linspace(1e-6,1e-5,10,endpoint=False).tolist()
ls_Tmax = [50,100,200]

In [3]:
hp_comb = [i for i in product(ls_lr,ls_weight_decay,ls_Tmax)]
len(hp_comb)

1800

In [7]:
string = {}
count = 1
for i in range(12):
    string[f'block{i+1}'] = ''
for id,i in enumerate(hp_comb):
    string[f'block{count}'] += f'nohup python train_tcga.py --lr {i[0]} --weight_decay {i[1]} --Tmax {i[2]} --gpu_index {count%4} > ../../out/5_classifier/hyperparam_select/train_tcga_{id}.out \n'
    if (id+1)%150 == 0:
        count +=1

In [9]:
for i in range(12):
    with open(f'./mil classifier/hyperparam_select_{i+1}.sh','w') as f:
        f.write(string[f'block{i+1}'])

In [None]:
# with open('./mil classifier/hyperparam_select.sh','w') as f:
#     f.write(str)

In [None]:
grouping = np.load('/home/wangyh/uro_biomarker/patho_AI/config/data_segmentation_csv/10X_grouping.npy',allow_pickle=True).item()

In [33]:
grouping['train_list']

Unnamed: 0,dir_uuid,TMB_H/L,10x,path,img_list
1,bff29d20-3a8f-4a5d-a2de-0e142390551d,L,1067,/mnt/wangyh/CN_patches/L/bff29d20-3a8f-4a5d-a2...,[/mnt/wangyh/CN_patches/L/bff29d20-3a8f-4a5d-a...
2,edcf4ae6-c985-40ad-aff4-a0ce31b46aeb,L,951,/mnt/wangyh/CN_patches/L/edcf4ae6-c985-40ad-af...,[/mnt/wangyh/CN_patches/L/edcf4ae6-c985-40ad-a...
3,e20c2176-2b57-4cc9-a68a-eb6933bf60b1,L,6,/mnt/wangyh/CN_patches/L/e20c2176-2b57-4cc9-a6...,[/mnt/wangyh/CN_patches/L/e20c2176-2b57-4cc9-a...
5,70e383bf-91cb-4a65-b12a-950f365c4d62,L,389,/mnt/wangyh/CN_patches/L/70e383bf-91cb-4a65-b1...,[/mnt/wangyh/CN_patches/L/70e383bf-91cb-4a65-b...
7,741d4359-8615-4783-98ab-0f04bb10c52c,L,319,/mnt/wangyh/CN_patches/L/741d4359-8615-4783-98...,[/mnt/wangyh/CN_patches/L/741d4359-8615-4783-9...
...,...,...,...,...,...
365,350cb3f7-a92b-412f-b285-22bf818c5156,L,459,/mnt/wangyh/CN_patches/L/350cb3f7-a92b-412f-b2...,[/mnt/wangyh/CN_patches/L/350cb3f7-a92b-412f-b...
367,5dd71f48-ba3e-46e4-b1d6-164722a3e06a,L,150,/mnt/wangyh/CN_patches/L/5dd71f48-ba3e-46e4-b1...,[/mnt/wangyh/CN_patches/L/5dd71f48-ba3e-46e4-b...
372,613f9ee2-07d2-4ec7-98b5-335e3eb164cd,H,141,/mnt/wangyh/CN_patches/H/613f9ee2-07d2-4ec7-98...,[/mnt/wangyh/CN_patches/H/613f9ee2-07d2-4ec7-9...
373,4d2d1928-abac-4604-becc-d73dac15bfed,H,88,/mnt/wangyh/CN_patches/H/4d2d1928-abac-4604-be...,[/mnt/wangyh/CN_patches/H/4d2d1928-abac-4604-b...


In [35]:
train = np.load('/home/wangyh/uro_biomarker/patho_AI/config/data_segmentation_csv/10X_tv_grouping.npy',allow_pickle=True).item()
train['tv_list']

Unnamed: 0,dir_uuid,TMB_H/L,10x,path,img_list
1,bff29d20-3a8f-4a5d-a2de-0e142390551d,L,1067,/mnt/wangyh/CN_patches/L/bff29d20-3a8f-4a5d-a2...,[/mnt/wangyh/CN_patches/L/bff29d20-3a8f-4a5d-a...
2,edcf4ae6-c985-40ad-aff4-a0ce31b46aeb,L,951,/mnt/wangyh/CN_patches/L/edcf4ae6-c985-40ad-af...,[/mnt/wangyh/CN_patches/L/edcf4ae6-c985-40ad-a...
3,e20c2176-2b57-4cc9-a68a-eb6933bf60b1,L,6,/mnt/wangyh/CN_patches/L/e20c2176-2b57-4cc9-a6...,[/mnt/wangyh/CN_patches/L/e20c2176-2b57-4cc9-a...
5,70e383bf-91cb-4a65-b12a-950f365c4d62,L,389,/mnt/wangyh/CN_patches/L/70e383bf-91cb-4a65-b1...,[/mnt/wangyh/CN_patches/L/70e383bf-91cb-4a65-b...
6,790d96d7-1191-4cf7-9c78-3e879723afd8,L,1193,/mnt/wangyh/CN_patches/L/790d96d7-1191-4cf7-9c...,[/mnt/wangyh/CN_patches/L/790d96d7-1191-4cf7-9...
...,...,...,...,...,...
376,6d94e519-6dea-42d0-a505-f36ab27f2f3f,L,583,/mnt/wangyh/CN_patches/L/6d94e519-6dea-42d0-a5...,[/mnt/wangyh/CN_patches/L/6d94e519-6dea-42d0-a...
377,16c68e40-fc90-495a-b856-fe1f11f82143,L,169,/mnt/wangyh/CN_patches/L/16c68e40-fc90-495a-b8...,[/mnt/wangyh/CN_patches/L/16c68e40-fc90-495a-b...
378,ff7d6bdf-b564-41e0-b3e5-2a649423796b,L,49,/mnt/wangyh/CN_patches/L/ff7d6bdf-b564-41e0-b3...,[/mnt/wangyh/CN_patches/L/ff7d6bdf-b564-41e0-b...
379,f7db97bf-bcb4-4383-afe2-36f549a0bdf0,L,504,/mnt/wangyh/CN_patches/L/f7db97bf-bcb4-4383-af...,[/mnt/wangyh/CN_patches/L/f7db97bf-bcb4-4383-a...


In [36]:
full = np.load('/home/wangyh/uro_biomarker/patho_AI/config/data_segmentation_csv/10X_full.npy',allow_pickle=True).item()
full

{'full_list':      Unnamed: 0                              dir_uuid  Tumor_Sample_ID  \
 0             0  d2e43ec6-5027-4f2c-932b-28a681da7cd9  TCGA-2F-A9KO-01   
 1             1  bff29d20-3a8f-4a5d-a2de-0e142390551d  TCGA-2F-A9KP-01   
 2             2  edcf4ae6-c985-40ad-aff4-a0ce31b46aeb  TCGA-2F-A9KP-01   
 3             3  e20c2176-2b57-4cc9-a68a-eb6933bf60b1  TCGA-2F-A9KQ-01   
 4             4  a085fe18-1709-417a-a779-fe69f5964766  TCGA-2F-A9KR-01   
 ..          ...                                   ...              ...   
 376         376  6d94e519-6dea-42d0-a505-f36ab27f2f3f  TCGA-ZF-AA54-01   
 377         377  16c68e40-fc90-495a-b856-fe1f11f82143  TCGA-ZF-AA58-01   
 378         378  ff7d6bdf-b564-41e0-b3e5-2a649423796b  TCGA-ZF-AA5H-01   
 379         379  f7db97bf-bcb4-4383-afe2-36f549a0bdf0  TCGA-ZF-AA5N-01   
 380         380  fa95d06a-b753-4bfd-b890-82bd9ddfd974  TCGA-ZF-AA5P-01   
 
     TMB_H/L   5x   10x   20x    40x  \
 0         H   10    53   270   1506   
 1   

In [3]:
# with open('./mil classifier/hyperparam_select.sh') as f:
#     lines = f.readlines()
#     print(len(lines))

1800


In [10]:
# for i in range(12):
#     with open(f'./mil classifier/hyperparam_select_{i+1}.sh') as f:
#         print(f.readline())

nohup python train_tcga.py --lr 0.0001 --weight_decay 1e-05 --Tmax 50 --gpu_index 1 > ../../out/5_classifier/hyperparam_select/train_tcga_0.out 

nohup python train_tcga.py --lr 0.00019 --weight_decay 1e-06 --Tmax 50 --gpu_index 2 > ../../out/5_classifier/hyperparam_select/train_tcga_150.out 

nohup python train_tcga.py --lr 0.00036999999999999994 --weight_decay 1e-07 --Tmax 50 --gpu_index 3 > ../../out/5_classifier/hyperparam_select/train_tcga_300.out 

nohup python train_tcga.py --lr 0.00055 --weight_decay 1e-05 --Tmax 50 --gpu_index 0 > ../../out/5_classifier/hyperparam_select/train_tcga_450.out 

nohup python train_tcga.py --lr 0.0006399999999999999 --weight_decay 1e-06 --Tmax 50 --gpu_index 1 > ../../out/5_classifier/hyperparam_select/train_tcga_600.out 

nohup python train_tcga.py --lr 0.00082 --weight_decay 1e-07 --Tmax 50 --gpu_index 2 > ../../out/5_classifier/hyperparam_select/train_tcga_750.out 

nohup python train_tcga.py --lr 1e-05 --weight_decay 1e-05 --Tmax 50 --gpu_index

# args for 2nd trial

In [25]:
def batch2():
    lr_range_ls = [(6.4e-5,8.2e-5),(1e-4,2.8e-4),(4.5e-4,6.3e-4),(1e-3,1e-2)]
    wd_range_ls = [(1e-7,4.6e-7),(2.8e-6,6.4e-6),(8.2e-6,1e-5)]
    hyperparameter_combs = args_generator(lr_range_ls,wd_range_ls)
    bash_file_generator(hyperparameter_combs,2)

In [26]:
# batch2()

1200 hypermarameter combinations in total
file generated


# 3rd trial

In [37]:
def batch3():
    lr_range_ls = [(4e-4,5.2e-4),(8.2e-3,0.015),(5.5e-4,6.1-4),(1.9e-3,3.7e-3)]
    wd_range_ls = [(5.32e-6,8.38e-6),(9.3e-6,9.9e-6),(8.74e-6,9.1e-6),(1.36e-7,2.08e-7)]
    hyperparameter_combs = args_generator(lr_range_ls,wd_range_ls,steps=20)
    bash_file_generator(hyperparameter_combs,3,total_gpu=8)

In [38]:
# batch3()

6400 hypermarameter combinations in total
file generated


# 1st trial --- pretrained Resnet18 extracted feature(4th run)

In [100]:
def batch4():
    lr_range_ls = [(1e-4,1e-3),(1e-5,1e-4),((1e-3,1e-2))]
    wd_range_ls = [(1e-7,1e-6),(1e-6,1e-5)]
    hyperparameter_combs = args_generator(lr_range_ls,wd_range_ls,steps = 10)
    bash_file_generator_PtRes(hyperparameter_combs,4)
    print(f'selected lr range:{lr_range_ls}')
    print(f'selected wd range:{wd_range_ls}')

In [95]:
# batch4()

[0.0001, 0.00019, 0.00028, 0.00036999999999999994, 0.00045999999999999996, 0.00055, 0.0006399999999999999, 0.00073, 0.00082, 0.00091, 1e-05, 1.9e-05, 2.8000000000000003e-05, 3.7e-05, 4.6e-05, 5.5e-05, 6.4e-05, 7.3e-05, 8.2e-05, 9.1e-05, 0.001, 0.0019000000000000002, 0.0028000000000000004, 0.0037, 0.0046, 0.0055000000000000005, 0.0064, 0.007300000000000001, 0.0082, 0.0091]
600 hypermarameter combinations in total,run with pretrained Resnet18 extracted features
file generated
selected lr range:[(0.0001, 0.001), (1e-05, 0.0001), (0.001, 0.01)]
selected wd range:[(1e-07, 1e-06), (1e-06, 1e-05)]


# 2nd trial --- pretrained Resnet18 extracted feature,10X(5th run)

In [98]:
def batch5():
    lr_range_ls = [(4e-4,1e-3)]
    wd_range_ls = [(1e-7,1e-6),(1e-6,1e-5)]
    hyperparameter_combs = args_generator(lr_range_ls,wd_range_ls,steps_lr = 30,mode='linspace')
    bash_file_generator_PtRes(hyperparameter_combs,run=5) # run need to be correctly assigned!
    print(f'selected lr range:{lr_range_ls}')
    print(f'selected wd range:{wd_range_ls}')

In [99]:
batch5()

600 hypermarameter combinations in total,run with pretrained Resnet18 extracted features
file generated
selected lr range:[(0.0004, 0.001)]
selected wd range:[(1e-07, 1e-06), (1e-06, 1e-05)]


# 3rd trial --- pretrained Resnet18 extracted feature,10X(6th run)

In [102]:
def batch6():
    lr_range_ls = [(4.2e-4,4.6e-4),(9e-4,9.6e-4)]
    wd_range_ls = [(1e-7,1e-6),(1e-6,1e-5)]
    hyperparameter_combs = args_generator(lr_range_ls,wd_range_ls,steps_lr = 20,mode='linspace')
    bash_file_generator_PtRes(hyperparameter_combs,run=6) # run need to be correctly assigned!
    print(f'selected lr range:{lr_range_ls}')
    print(f'selected wd range:{wd_range_ls}')

In [104]:
batch6()

800 hypermarameter combinations in total,run with pretrained Resnet18 extracted features
file generated
selected lr range:[(0.00042, 0.00046), (0.0009, 0.00096)]
selected wd range:[(1e-07, 1e-06), (1e-06, 1e-05)]
