In [7]:
# Init FindSpark
import findspark
findspark.init()

In [8]:
findspark.find()

'/Users/a06411/opt/anaconda3/envs/fluentPython/lib/python3.11/site-packages/pyspark'

In [9]:
from pyspark.sql import SparkSession

In [10]:
# Create a SparkSession
spark = SparkSession.builder \
    .appName("MyPySparkApp") \
    .master("local") \
    .getOrCreate()

## 스파크 세션 1에서 처리 하기 

In [11]:
# Create a DataFrame
data = [("Alice", 25), ("Bob", 30), ("Charlie", 35)]
df = spark.createDataFrame(data, ["name", "age"])

# Create a GlobalTempView
df.createGlobalTempView("global_view")

# Create a TempView
df.createOrReplaceTempView("temp_view")

# Access the TempView in the current session
temp_result = spark.sql("SELECT * FROM temp_view")
temp_result.show()  # This will display the data from the TempView


+-------+---+
|   name|age|
+-------+---+
|  Alice| 25|
|    Bob| 30|
|Charlie| 35|
+-------+---+



## 스파크 세션 1의 정보 확인 

In [12]:
spark.sql("show tables").show()

+---------+---------+-----------+
|namespace|tableName|isTemporary|
+---------+---------+-----------+
|         |temp_view|       true|
+---------+---------+-----------+



In [13]:
spark.sql("show tables from global_temp").show()

+-----------+-----------+-----------+
|  namespace|  tableName|isTemporary|
+-----------+-----------+-----------+
|global_temp|global_view|       true|
|           |  temp_view|       true|
+-----------+-----------+-----------+



## 스파크 세션 2 생성 

In [14]:
# Access the GlobalTempView from another session
spark2 = SparkSession.builder \
    .appName("AnotherSparkSession") \
    .master("local") \
    .getOrCreate()

global_result = spark2.sql("SELECT * FROM global_temp.global_view")
global_result.show()  # This will display the data from the GlobalTempView



+-------+---+
|   name|age|
+-------+---+
|  Alice| 25|
|    Bob| 30|
|Charlie| 35|
+-------+---+



23/06/07 16:27:03 WARN SparkSession: Using an existing Spark session; only runtime SQL configurations will take effect.


In [15]:
spark2.sql("show tables from global_temp").show()

+-----------+-----------+-----------+
|  namespace|  tableName|isTemporary|
+-----------+-----------+-----------+
|global_temp|global_view|       true|
|           |  temp_view|       true|
+-----------+-----------+-----------+



## 스파크 세션2 처리 

In [17]:
spark2.sql("show tables").show()

+---------+---------+-----------+
|namespace|tableName|isTemporary|
+---------+---------+-----------+
|         |temp_view|       true|
+---------+---------+-----------+



In [19]:
# Access the TempView in the current session
temp_result2 = spark2.sql("SELECT * FROM temp_view")
temp_result2.show() 

+-------+---+
|   name|age|
+-------+---+
|  Alice| 25|
|    Bob| 30|
|Charlie| 35|
+-------+---+



## 글로벌 템프에는 없지만 로컬템프도 접근이 가능

- 왜 

In [18]:
# Access the TempView in the current session
temp_result2 = spark2.sql("SELECT * FROM global_temp.temp_view")
temp_result2.show() 

AnalysisException: [TABLE_OR_VIEW_NOT_FOUND] The table or view `global_temp`.`temp_view` cannot be found. Verify the spelling and correctness of the schema and catalog.
If you did not qualify the name with a schema, verify the current_schema() output, or qualify the name with the correct schema and catalog.
To tolerate the error on drop use DROP VIEW IF EXISTS or DROP TABLE IF EXISTS.; line 1 pos 14;
'Project [*]
+- 'UnresolvedRelation [global_temp, temp_view], [], false


In [20]:
# List all Global Temp View tables
global_temp_views = spark.catalog.listTables("global_temp")

In [21]:
global_temp_views

[Table(name='global_view', catalog=None, namespace=['global_temp'], description=None, tableType='TEMPORARY', isTemporary=True),
 Table(name='temp_view', catalog=None, namespace=[], description=None, tableType='TEMPORARY', isTemporary=True)]

In [22]:
# Drop a Global Temp View
view_name = "global_view"
spark.catalog.dropGlobalTempView(view_name)

True

In [23]:
# List all Global Temp View tables
global_temp_views = spark.catalog.listTables("global_temp")

In [24]:
global_temp_views

[Table(name='temp_view', catalog=None, namespace=[], description=None, tableType='TEMPORARY', isTemporary=True)]

In [16]:
spark.catalog.currentCatalog()

'spark_catalog'

In [17]:
spark2.catalog.currentCatalog()

'spark_catalog'

In [18]:
spark.catalog.currentDatabase()

'default'

In [19]:
spark2.catalog.currentDatabase()

'default'

In [20]:
spark.range(1).createTempView("test_view")
spark.catalog.listTables()

[Table(name='temp_view', catalog=None, namespace=[], description=None, tableType='TEMPORARY', isTemporary=True),
 Table(name='test_view', catalog=None, namespace=[], description=None, tableType='TEMPORARY', isTemporary=True)]

In [21]:
spark.catalog.dropTempView("test_view")
spark.catalog.listTables()

[Table(name='temp_view', catalog=None, namespace=[], description=None, tableType='TEMPORARY', isTemporary=True)]

In [4]:
# Stop the SparkSessions
spark.stop()
spark2.stop()
