In [1]:
import findspark
findspark.init()

In [2]:
from pyspark.sql import SparkSession

## DIfferent Spark Session but SAME Spark Context
* The newSession method creates a new spark session with isolated SQL configurations, temporary tables.

### Spark Context
* A SparkContext represents the connection to a Spark cluster.
* It is used to create RDDs, accumulators and broadcast variables on that cluster. 
* ***Note: Only one SparkContext should be active per JVM. You must stop() the active SparkContext before creating a new one.***

In [3]:
spark = SparkSession.builder.master("local[*]").appName("SparkSQL").getOrCreate()
print(spark)
print(spark.sparkContext)

<pyspark.sql.session.SparkSession object at 0x000001ABA30F51F0>
<SparkContext master=local[*] appName=SparkSQL>


In [4]:
# spark.conf.set("spark.executor.memory", '8g')
# spark.conf.set('spark.executor.cores', '3')
# spark.conf.set('spark.cores.max', '3')
# spark.conf.set("spark.driver.memory",'8g')
spark.conf.set("VAR1", "TEST")
spark.conf.set("spark.sql.shuffle.partitions", "100")

In [5]:
spark.conf.get("VAR1")

'TEST'

In [6]:
print("\n=== Conf ===")
confList = spark.sparkContext.getConf().getAll()
# confList = spark.sparkContext._conf.getAll()
for conf in confList:
    print(conf)


=== Conf ===
('spark.app.submitTime', '1681791418690')
('spark.executor.id', 'driver')
('spark.app.name', 'SparkSQL')
('spark.app.startTime', '1681791418989')
('spark.driver.extraJavaOptions', '-Djava.net.preferIPv6Addresses=false -XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED -Djdk.reflect.useDirectMethodHandle=false'

In [7]:
data = [('James', 'Smith', '1998-04-01', 3000),
        ('Michael', 'Rose', '2000-05-19', 4000),
        ('Maria', 'Jones', '1999-12-01', 4000)]
columns = ["first", "last", "birth_date", "salary"]
df = spark.createDataFrame(data=data, schema=columns)
df.createOrReplaceTempView("employee_tmp")
spark.sql("SELECT * FROM employee_tmp").show()

+-------+-----+----------+------+
|  first| last|birth_date|salary|
+-------+-----+----------+------+
|  James|Smith|1998-04-01|  3000|
|Michael| Rose|2000-05-19|  4000|
|  Maria|Jones|1999-12-01|  4000|
+-------+-----+----------+------+



In [8]:
spark.catalog.tableExists('employee_tmp')

True

In [9]:
spark.catalog.listTables()

[Table(name='employee_tmp', catalog=None, namespace=[], description=None, tableType='TEMPORARY', isTemporary=True)]

## newSession()
* Configurations set in Spark Session are not shared.
* Temporary tables are not shared.

In [10]:
sparkNew = spark.newSession()
print(sparkNew)
print(sparkNew.sparkContext)

<pyspark.sql.session.SparkSession object at 0x000001ABA42234F0>
<SparkContext master=local[*] appName=SparkSQL>


In [11]:
print("\n=== Conf ===")
confList = spark.sparkContext.getConf().getAll()
for conf in confList:
    print(conf)


=== Conf ===
('spark.app.submitTime', '1681791418690')
('spark.executor.id', 'driver')
('spark.app.name', 'SparkSQL')
('spark.app.startTime', '1681791418989')
('spark.driver.extraJavaOptions', '-Djava.net.preferIPv6Addresses=false -XX:+IgnoreUnrecognizedVMOptions --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.invoke=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/sun.nio.cs=ALL-UNNAMED --add-opens=java.base/sun.security.action=ALL-UNNAMED --add-opens=java.base/sun.util.calendar=ALL-UNNAMED --add-opens=java.security.jgss/sun.security.krb5=ALL-UNNAMED -Djdk.reflect.useDirectMethodHandle=false'

In [12]:
try:
    sparkNew.conf.get("VAR1")
except Exception as e:
    print(e)

An error occurred while calling o157.get.
: java.util.NoSuchElementException: VAR1
	at org.apache.spark.sql.errors.QueryExecutionErrors$.noSuchElementExceptionError(QueryExecutionErrors.scala:2138)
	at org.apache.spark.sql.internal.SQLConf.$anonfun$getConfString$3(SQLConf.scala:5032)
	at scala.Option.getOrElse(Option.scala:189)
	at org.apache.spark.sql.internal.SQLConf.getConfString(SQLConf.scala:5032)
	at org.apache.spark.sql.RuntimeConfig.get(RuntimeConfig.scala:81)
	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
	at java.lang.reflect.Method.invoke(Method.java:498)
	at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
	at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374)
	at py4j.Gateway.invoke(Gateway.java:282)
	at py4j.commands.AbstractCommand.invokeMethod(

In [13]:
sparkNew.conf.get("spark.sql.shuffle.partitions")

'200'

In [14]:
sparkNew.catalog.tableExists('employee_tmp')

False

## New Session using getOrCreate()
* Here existing SparkSession was used so spark configurations and temporary tables are accessible

In [15]:
spark2 = SparkSession.builder.master("local[*]").appName("SparkSQL").getOrCreate()
print(spark2)
print(spark2.sparkContext)

<pyspark.sql.session.SparkSession object at 0x000001ABA30F51F0>
<SparkContext master=local[*] appName=SparkSQL>


In [16]:
spark2.conf.get("spark.sql.shuffle.partitions")

'100'

In [17]:
spark2.catalog.tableExists('employee_tmp')

True