zinggAI · sonalgoyal · May 26, 2022 · May 11, 2022 · May 11, 2022 · May 13, 2022
diff --git a/client/src/main/java/zingg/client/Client.java b/client/src/main/java/zingg/client/Client.java
@@ -160,7 +160,7 @@ else if (options.get(ClientOptions.CONF).value.endsWith("env")) {
 			LOG.warn("Zingg processing has completed");				
 		} 
 		catch(ZinggClientException e) {
-			if (options != null) {
+			if (options != null && options.get(ClientOptions.EMAIL) != null) {
 				Email.email(options.get(ClientOptions.EMAIL).value, new EmailBody("Error running Zingg job",
 					"Zingg Error ",
 					e.getMessage()));
@@ -186,7 +186,7 @@ else if (options.get(ClientOptions.CONF).value.endsWith("env")) {
 				}
 			}
 			catch(ZinggClientException e) {
-				if (options != null) {
+				if (options != null && options.get(ClientOptions.EMAIL) != null) {
 					Email.email(options.get(ClientOptions.EMAIL).value, new EmailBody("Error running Zingg job",
 						"Zingg Error ",
 						e.getMessage()));

diff --git a/config/zingg.conf b/config/zingg.conf
@@ -0,0 +1,26 @@
+# file config/zingg-defaults.conf
+# This file defines default spark properties. These properties are passed to 'spark-submit' as spark configurations (--conf)
+# This is useful for setting default environmental settings.
+# Entries in this file could be - 
+#     A. Blank Lines
+#     B. Comment Lines(Starts with #)
+#     C. Property in key=value format
+#
+# Leading or trailing spaces could be fine.
+# Please note that any key or value already comprising spaces or double quotes must be enclosed with single quotes ('')
+
+
+### General properties
+spark.serializer=org.apache.spark.serializer.KryoSerializer
+spark.default.parallelism=8
+spark.debug.maxToStringFields=200
+spark.driver.memory=8g
+spark.executor.memory=8g
+
+# Additional Jars could be passed to spark through below configuration. Jars list should be comma(,) separated. 
+#spark.jars=
+#spark.executor.extraClassPath=
+#spark.driver.extraClassPath=
+
+### Below property must be set for BigQuery
+#spark.hadoop.fs.gs.impl=com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem                                                      
diff --git a/docs/dataSourcesAndSinks/bigquery.md b/docs/dataSourcesAndSinks/bigquery.md
@@ -6,8 +6,12 @@ The two driver jars namely **spark-bigquery-with-dependencies_2.12-0.24.2.jar**
 
 ```bash
 export ZINGG_EXTRA_JARS=./spark-bigquery-with-dependencies_2.12-0.24.2.jar,./gcs-connector-hadoop2-latest.jar
-export ZINGG_EXTRA_SPARK_CONF="--conf spark.hadoop.fs.gs.impl=com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem"
 ```
+Set the following property in Zingg's configuration file i.e. in **config/zingg.conf**.
+```bash
+spark.hadoop.fs.gs.impl=com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem                                                      
+```
+Similarly, in place of setting env variable **ZINGG_EXTRA_JARS** as above, equivalent property **spark.jars** can also be set in the zingg.conf file.
 
 If Zingg is run from outside Google cloud, it requires authentication, please set the following env variable to the location of the file containing service account key. A service account key can be created and downloaded in json format from [Google Cloud console](https://cloud.google.com/docs/authentication/getting-started)
 

diff --git a/docs/dataSourcesAndSinks/jdbc.md b/docs/dataSourcesAndSinks/jdbc.md
@@ -47,3 +47,5 @@ $ export ZINGG_EXTRA_JARS=path to postgresql-xx.jar
 ```
 $ export ZINGG_EXTRA_JARS=path to mysql-connector-java-xx.jar
 ```
+
+Please note, instead of setting env variable **ZINGG_EXTRA_JARS** as above, equivalent property **spark.jars** can be set in Zingg's configuration file (config/zingg.conf).
diff --git a/docs/dataSourcesAndSinks/snowflake.md b/docs/dataSourcesAndSinks/snowflake.md
@@ -30,3 +30,4 @@ One must include Snowflake JDBC driver and Spark dependency on the spark classpa
 ```
 export ZINGG_EXTRA_JARS=snowflake-jdbc-3.13.18.jar,spark-snowflake_2.12-2.10.0-spark_3.1.jar
 ```
+Optionally, instead of setting env variable **ZINGG_EXTRA_JARS** as above, equivalent property **spark.jars** can be set in Zingg's configuration file (config/zingg.conf).
diff --git a/scripts/load-zingg-env.sh b/scripts/load-zingg-env.sh
@@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+
+ZINGG_ENV_SH="zingg-env.sh"
+export ZINGG_CONF_DIR="$(dirname "$0")"/../config
+
+ZINGG_ENV_SH="${ZINGG_CONF_DIR}/${ZINGG_ENV_SH}"
+if [[ -f "${ZINGG_ENV_SH}" ]]; then
+  # Promote all variable declarations to environment (exported) variables
+  set -a
+  . ${ZINGG_ENV_SH}
+  set +a
+fi
diff --git a/scripts/zingg.sh b/scripts/zingg.sh
@@ -3,19 +3,34 @@
 ZINGG_JARS=$ZINGG_HOME/zingg-0.3.3-SNAPSHOT.jar
 EMAIL=xxx@yyy.com
 LICENSE="test"
-##for local
-export SPARK_MEM=10g
 
 if [[ -z "${ZINGG_EXTRA_JARS}" ]]; then
   OPTION_JARS=""
 else
   OPTION_JARS="--jars ${ZINGG_EXTRA_JARS}"
 fi
 
-if [[ -z "${ZINGG_EXTRA_SPARK_CONF}" ]]; then
-  OPTION_SPARK_CONF=""
-else
-  OPTION_SPARK_CONF="${ZINGG_EXTRA_SPARK_CONF}"
-fi
+function read_zingg_conf() {
+    local CONF_PROPS=""
+
+    ZINGG_CONF_DIR="$(cd "`dirname "$0"`"/../config; pwd)"
+
+    file="${ZINGG_CONF_DIR}/zingg.conf"
+    # Leading blanks removed; comment Lines, blank lines removed
+    PROPERTIES=$(sed 's/^[[:blank:]]*//;s/#.*//;/^[[:space:]]*$/d' $file)
+
+    while IFS='=' read -r key value; do
+      # Trim leading and trailing spaces
+      key=$(echo $key | sed 's/^[[:blank:]]*//;s/[[:blank:]]*$//;')
+      value=$(echo $value | sed 's/^[[:blank:]]*//;s/[[:blank:]]*$//;')
+      # Append to conf variable
+      CONF_PROPS+=" --conf ${key}=${value}"
+    done <<< "$(echo -e "$PROPERTIES")"
+
+    echo $CONF_PROPS
+}
 
-$SPARK_HOME/bin/spark-submit --master $SPARK_MASTER $OPTION_JARS $OPTION_SPARK_CONF --conf spark.serializer=org.apache.spark.serializer.KryoSerializer --conf spark.es.nodes="127.0.0.1" --conf spark.es.port="9200" --conf spark.es.resource="cluster/cluster1" --conf spark.default.parallelism="8" --conf spark.executor.extraJavaOptions="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+HeapDumpOnOutOfMemoryError -Xloggc:/tmp/memLog.txt -XX:+UseCompressedOops" --conf spark.executor.memory=10g --conf spark.debug.maxToStringFields=200 --driver-class-path $ZINGG_JARS --class zingg.client.Client $ZINGG_JARS $@ --email $EMAIL --license $LICENSE 
+OPTION_SPARK_CONF+=$(read_zingg_conf)
+# All the additional options must be added here
+ALL_OPTIONS=" ${OPTION_JARS} ${OPTION_SPARK_CONF} "
+$SPARK_HOME/bin/spark-submit --master $SPARK_MASTER ${ALL_OPTIONS}  --conf spark.executor.extraJavaOptions="-verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+HeapDumpOnOutOfMemoryError -Xloggc:/tmp/memLog.txt -XX:+UseCompressedOops" --driver-class-path $ZINGG_JARS --class zingg.client.Client $ZINGG_JARS $@ --email $EMAIL --license $LICENSE