Skip to content

Commit

Permalink
[SPARK-5641] [EC2] implement --deploy-root-dir
Browse files Browse the repository at this point in the history
  • Loading branch information
florianverhein committed Feb 13, 2015
1 parent 1d0596a commit 87d922c
Showing 1 changed file with 40 additions and 0 deletions.
40 changes: 40 additions & 0 deletions ec2/spark_ec2.py
Expand Up @@ -159,6 +159,11 @@ def parse_args():
"--spark-ec2-git-branch",
default=DEFAULT_SPARK_EC2_BRANCH,
help="Github repo branch of spark-ec2 to use (default: %default)")
parser.add_option(
"--deploy-root-dir",
default=None,
help="A directory to deploy onto / on the first master. " +
"Must be absolute. (default: %default)")
parser.add_option(
"--hadoop-major-version", default="1",
help="Major version of Hadoop (default: %default)")
Expand Down Expand Up @@ -694,6 +699,17 @@ def setup_cluster(conn, master_nodes, slave_nodes, opts, deploy_ssh_key):
modules=modules
)

if opts.deploy_root_dir is not None:
print "Deploying {s} to master...".format(s=opts.deploy_root_dir)
deploy_user_files(
conn=conn,
root_dir=opts.deploy_root_dir,
opts=opts,
master_nodes=master_nodes,
slave_nodes=slave_nodes,
modules=modules
)

print "Running setup on master..."
setup_spark_cluster(master, opts)
print "Done!"
Expand Down Expand Up @@ -931,6 +947,22 @@ def deploy_files(conn, root_dir, opts, master_nodes, slave_nodes, modules):
shutil.rmtree(tmp_dir)


# Deploy files in a given local directory to a cluster, WITHOUT parameter substitution.
# Note that unlike deploy_files, this works for binary files.
# Files are only deployed to the first master instance in the cluster.
#
# root_dir should be an absolute path to the directory with the files we want to deploy.
def deploy_user_files(conn, root_dir, opts, master_nodes, slave_nodes, modules):
active_master = master_nodes[0].public_dns_name
command = [
'rsync', '-rv',
'-e', stringify_command(ssh_command(opts)),
"%s/" % root_dir,
"%s@%s:/" % (opts.user, active_master)
]
subprocess.check_call(command)


def stringify_command(parts):
if isinstance(parts, str):
return parts
Expand Down Expand Up @@ -1099,6 +1131,14 @@ def real_main():
"Furthermore, we currently only support forks named spark-ec2."
sys.exit(1)

if not (opts.deploy_root_dir is None or
(os.path.isabs(opts.deploy_root_dir) and
os.path.isdir(opts.deploy_root_dir) and
os.path.exists(opts.deploy_root_dir))):
print >> stderr, "--deploy-root-dir must be an absolute path to a directory that exists " \
"on the local file system"
sys.exit(1)

try:
conn = ec2.connect_to_region(opts.region)
except Exception as e:
Expand Down

0 comments on commit 87d922c

Please sign in to comment.