## Managed Tables

Managed tables allow the use of Spark as a Database, or saving intermediate results

In [1]:
import numpy as np
import pandas as pd
import pyspark
import urllib

from pyspark.sql import SparkSession
import pyspark.sql.functions as F

In [2]:
spark = (SparkSession
         .builder
         .appName("Managed Tables")
         .getOrCreate())

In [4]:
spark.catalog.listTables()

[]

In [5]:
import urllib
URL = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
urllib.request.urlretrieve(URL, "iris.csv");

In [3]:
columns = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class']

# inferSchema reads the file twice, but detects numerical columns
data = spark.read.csv('iris.csv', header=False, inferSchema=True)
data = data.toDF(*columns)
data.printSchema()

root
 |-- sepal-length: double (nullable = true)
 |-- sepal-width: double (nullable = true)
 |-- petal-length: double (nullable = true)
 |-- petal-width: double (nullable = true)
 |-- class: string (nullable = true)



In [4]:
data.write.mode("overwrite").saveAsTable("iris_managed")

In [5]:
spark.catalog.listTables()

[Table(name='iris_managed', database='default', description=None, tableType='MANAGED', isTemporary=False)]

In [8]:
spark.sql("SELECT * FROM iris_managed").show()

+------------+-----------+------------+-----------+-----------+
|sepal-length|sepal-width|petal-length|petal-width|      class|
+------------+-----------+------------+-----------+-----------+
|         5.1|        3.5|         1.4|        0.2|Iris-setosa|
|         4.9|        3.0|         1.4|        0.2|Iris-setosa|
|         4.7|        3.2|         1.3|        0.2|Iris-setosa|
|         4.6|        3.1|         1.5|        0.2|Iris-setosa|
|         5.0|        3.6|         1.4|        0.2|Iris-setosa|
|         5.4|        3.9|         1.7|        0.4|Iris-setosa|
|         4.6|        3.4|         1.4|        0.3|Iris-setosa|
|         5.0|        3.4|         1.5|        0.2|Iris-setosa|
|         4.4|        2.9|         1.4|        0.2|Iris-setosa|
|         4.9|        3.1|         1.5|        0.1|Iris-setosa|
|         5.4|        3.7|         1.5|        0.2|Iris-setosa|
|         4.8|        3.4|         1.6|        0.2|Iris-setosa|
|         4.8|        3.0|         1.4| 

In [10]:
spark.sql("DROP TABLE iris_managed")
spark.catalog.listTables()

[]