From d2721521cd67973195a9d2242dcd70f44700dccc Mon Sep 17 00:00:00 2001 From: Bo Peng Date: Tue, 9 Oct 2018 17:55:41 -0400 Subject: [PATCH] Better handling error from job submission script. vatlab/sos#1078 --- src/sos_pbs/tasks.py | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/src/sos_pbs/tasks.py b/src/sos_pbs/tasks.py index f39c0c4..a7cd767 100755 --- a/src/sos_pbs/tasks.py +++ b/src/sos_pbs/tasks.py @@ -114,6 +114,9 @@ def _prepare_script(self, task_id): except Exception as e: raise RuntimeError(f'Failed to submit task {task_id}: {e}') + if not cmd_output: + raise RuntimeError(f'Failed to submit task {task_id} with command {cmd}. No output returned.') + if 'submit_cmd_output' not in self.config: submit_cmd_output = '{job_id}' else: @@ -127,23 +130,14 @@ def _prepare_script(self, task_id): # let us write an job_id file so that we can check status of tasks more easily job_id_file = os.path.join(os.path.expanduser('~'), '.sos', 'tasks', task_id + '.job_id') with open(job_id_file, 'w') as job: - try: - res = extract_pattern(submit_cmd_output, [cmd_output.strip()]) - if 'job_id' not in res or len(res['job_id']) != 1: - env.logger.warning( - f'Failed to extract job_id from "{cmd_output.strip()}" using pattern "{submit_cmd_output}"') - job_id = '000000' - job.write(f'job_id: {job_id}\n') - else: - job_id = res['job_id'][0] - # other variables - for k,v in res.items(): - job.write(f'{k}: {v[0]}\n') - except Exception as e: - env.logger.warning( - f'Failed to extract job_id from "{cmd_output.strip()}" using pattern "{submit_cmd_output}"') - job_id = '000000' - job.write(f'job_id: {job_id}\n') + res = extract_pattern(submit_cmd_output, [cmd_output.strip()]) + if 'job_id' not in res or len(res['job_id']) != 1 or res['job_id'][0] is None: + raise RuntimeError(f'Failed to extract job_id from "{cmd_output.strip()}" using pattern "{submit_cmd_output}"') + else: + job_id = res['job_id'][0] + # other variables + for k,v in res.items(): + job.write(f'{k}: {v[0]}\n') # Send job id files to remote host so that # 1. the job could be properly killed (with job_id) on remote host (not remotely) # 2. the job status could be perperly probed in case the job was not properly submitted (#911)