diff --git a/tensorflowonspark/TFSparkNode.py b/tensorflowonspark/TFSparkNode.py index d9d6f084..3669968d 100644 --- a/tensorflowonspark/TFSparkNode.py +++ b/tensorflowonspark/TFSparkNode.py @@ -275,7 +275,7 @@ def _mapfn(iter): cluster_spec[njob] = hosts # update TF_CONFIG if cluster spec has a 'master' node (i.e. tf.estimator) - if 'master' in cluster_spec: + if 'master' in cluster_spec or 'chief' in cluster_spec: tf_config = json.dumps({ 'cluster': cluster_spec, 'task': {'type': job_name, 'index': task_index}, diff --git a/tensorflowonspark/gpu_info.py b/tensorflowonspark/gpu_info.py index ffc7ae7a..9e776ab6 100644 --- a/tensorflowonspark/gpu_info.py +++ b/tensorflowonspark/gpu_info.py @@ -96,9 +96,9 @@ def parse_gpu(gpu_str): proposed_gpus = free_gpus[:num_gpu] else: # ordered by worker index - if worker_index + num_gpu > num_available: - worker_index = worker_index % num_available - proposed_gpus = free_gpus[worker_index:(worker_index + num_gpu)] + if worker_index * num_gpu + num_gpu > num_available: + worker_index = worker_index * num_gpu % num_available + proposed_gpus = free_gpus[worker_index * num_gpu:(worker_index * num_gpu + num_gpu)] logging.info("Proposed GPUs: {}".format(proposed_gpus)) return ','.join(str(x) for x in proposed_gpus)