Skip to content

Commit

Permalink
Modified java_gateway.py to recognize arguments
Browse files Browse the repository at this point in the history
Modified spark-shell and pyspark for enabling to use arguments including white spaces

Modified utils.sh, spark-shell and pyspark for forcing scripts to set usage function to the variable "SUBMIT_USAGE_FUNCTION" in utils.sh
  • Loading branch information
sarutak committed Aug 9, 2014
1 parent 5afc584 commit 28a374e
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 23 deletions.
16 changes: 10 additions & 6 deletions bin/pyspark
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,14 @@ source $FWDIR/bin/utils.sh

SCALA_VERSION=2.10

if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
function usage() {
echo "Usage: ./bin/pyspark [options]" 1>&2
$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
exit 0
}

if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
usage
fi

# Exit if the user hasn't compiled Spark
Expand Down Expand Up @@ -68,11 +72,11 @@ fi
# Build up arguments list manually to preserve quotes and backslashes.
# We export Spark submit arguments as an environment variable because shell.py must run as a
# PYTHONSTARTUP script, which does not take in arguments. This is required for IPython notebooks.

gatherSparkSubmitOpts $@
SUBMIT_USAGE_FUNCTION=usage
gatherSparkSubmitOpts "$@"
PYSPARK_SUBMIT_ARGS=""
whitespace="[[:space:]]"
for i in ${SUBMISSION_OPTS[@]}; do
for i in "${SUBMISSION_OPTS[@]}"; do
if [[ $i =~ \" ]]; then i=$(echo $i | sed 's/\"/\\\"/g'); fi
if [[ $i =~ $whitespace ]]; then i=\"$i\"; fi
PYSPARK_SUBMIT_ARGS="$PYSPARK_SUBMIT_ARGS $i"
Expand All @@ -95,8 +99,8 @@ if [[ "$1" =~ \.py$ ]]; then
echo -e "Use ./bin/spark-submit <python file>\n" 1>&2
primary=$1
shift
gatherSparkSubmitOpts $@
exec $FWDIR/bin/spark-submit ${SUBMISSION_OPTS[@]} $primary ${APPLICATION_OPTS[@]}
gatherSparkSubmitOpts "$@"
exec $FWDIR/bin/spark-submit "${SUBMISSION_OPTS[@]}" $primary "${APPLICATION_OPTS[@]}"
else
# Only use ipython if no command line arguments were provided [SPARK-1134]
if [[ "$IPYTHON" = "1" ]]; then
Expand Down
17 changes: 11 additions & 6 deletions bin/spark-shell
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,19 @@ set -o posix
## Global script variables
FWDIR="$(cd `dirname $0`/..; pwd)"

function usage() {
echo "Usage: ./bin/spark-shell [options]"
$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
exit 0
}

if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
echo "Usage: ./bin/spark-shell [options]"
$FWDIR/bin/spark-submit --help 2>&1 | grep -v Usage 1>&2
exit 0
usage
fi

source $FWDIR/bin/utils.sh
gatherSparkSubmitOpts $@
SUBMIT_USAGE_FUNCTION=usage
gatherSparkSubmitOpts "$@"

function main() {
if $cygwin; then
Expand All @@ -49,11 +54,11 @@ function main() {
# (see https://github.com/sbt/sbt/issues/562).
stty -icanon min 1 -echo > /dev/null 2>&1
export SPARK_SUBMIT_OPTS="$SPARK_SUBMIT_OPTS -Djline.terminal=unix"
$FWDIR/bin/spark-submit --class org.apache.spark.repl.Main ${SUBMISSION_OPTS[@]} spark-shell ${APPLICATION_OPTS[@]}
$FWDIR/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
stty icanon echo > /dev/null 2>&1
else
export SPARK_SUBMIT_OPTS
$FWDIR/bin/spark-submit --class org.apache.spark.repl.Main ${SUBMISSION_OPTS[@]} spark-shell ${APPLICATION_OPTS[@]}
$FWDIR/bin/spark-submit --class org.apache.spark.repl.Main "${SUBMISSION_OPTS[@]}" spark-shell "${APPLICATION_OPTS[@]}"
fi
}

Expand Down
22 changes: 12 additions & 10 deletions bin/utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,22 +19,24 @@

# Gather all all spark-submit options into SUBMISSION_OPTS
function gatherSparkSubmitOpts() {

if [ -z "$SUBMIT_USAGE_FUNCTION" ]; then

echo "Function for printing usage of $0 is not set." 1>&2
echo "Please set usage function to shell variable 'SUBMIT_USAGE_FUNCTION' in $0" 1>&2
exit 1
fi

SUBMISSION_OPTS=()
APPLICATION_OPTS=()
while (($#)); do
case $1 in
--master | --deploy-mode | --class | --name | --jars | --py-files | --files)
;&

--conf | --properties-file | --driver-memory | --driver-java-options)
;&

--driver-library-path | --driver-class-path | --executor-memory | --driver-cores)
;&

--master | --deploy-mode | --class | --name | --jars | --py-files | --files | \
--conf | --properties-file | --driver-memory | --driver-java-options | \
--driver-library-path | --driver-class-path | --executor-memory | --driver-cores | \
--total-executor-cores | --executor-cores | --queue | --num-executors | --archives)
if [[ $# -lt 2 ]]; then
usage
"$SUBMIT_USAGE_FUNCTION"
exit 1;
fi
SUBMISSION_OPTS+=($1); shift
Expand Down
6 changes: 5 additions & 1 deletion python/pyspark/java_gateway.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,11 @@ def launch_gateway():
submit_args = os.environ.get("PYSPARK_SUBMIT_ARGS")
submit_args = submit_args if submit_args is not None else ""
submit_args = shlex.split(submit_args)
command = [os.path.join(SPARK_HOME, script)] + submit_args + ["pyspark-shell"]
application_opts = os.environ.get("APPLICATION_OPTS")
application_opts = application_opts if application_opts is not None else ""
application_opts = shlex.split(application_opts)
command = [os.path.join(SPARK_HOME, script)] + submit_args + \
["pyspark-shell"] + application_opts
if not on_windows:
# Don't send ctrl-c / SIGINT to the Java gateway:
def preexec_func():
Expand Down

0 comments on commit 28a374e

Please sign in to comment.