Skip to content

Commit

Permalink
made hpc signal handling optional
Browse files Browse the repository at this point in the history
  • Loading branch information
williamFalcon committed Sep 5, 2019
1 parent fa49681 commit a5dcf31
Showing 1 changed file with 9 additions and 4 deletions.
13 changes: 9 additions & 4 deletions test_tube/hpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,12 +120,14 @@ def optimize_parallel_cluster_gpu(
train_function,
nb_trials,
job_name,
enable_auto_resubmit=False,
job_display_name=None
):
if job_display_name is None:
job_display_name = job_name

self.__optimize_parallel_cluster_internal(train_function, nb_trials, job_name, job_display_name, on_gpu=True)
self.__optimize_parallel_cluster_internal(train_function, nb_trials, job_name, job_display_name,
enable_auto_resubmit, on_gpu=True)

def optimize_parallel_cluster_cpu(
self,
Expand All @@ -145,6 +147,7 @@ def __optimize_parallel_cluster_internal(
nb_trials,
job_name,
job_display_name,
enable_auto_resubmit,
on_gpu
):
"""
Expand All @@ -157,6 +160,7 @@ def __optimize_parallel_cluster_internal(
self.job_name = job_name
self.job_display_name = job_display_name
self.on_gpu = on_gpu
self.enable_auto_resubmit = enable_auto_resubmit

# layout logging structure
self.__layout_logging_dir()
Expand Down Expand Up @@ -256,9 +260,10 @@ def term_handler(self, signum, frame):
print("bypassing sigterm")

def __run_experiment(self, train_function):
print('setting signal')
signal.signal(signal.SIGUSR1, self.sig_handler)
signal.signal(signal.SIGTERM, self.term_handler)
if self.enable_auto_resubmit:
print('setting signal')
signal.signal(signal.SIGUSR1, self.sig_handler)
signal.signal(signal.SIGTERM, self.term_handler)

try:
# run training
Expand Down

0 comments on commit a5dcf31

Please sign in to comment.