https://github.com/delta-io/delta/blob/master/examples/python/quickstart_sql.py

fix permission

```
chmod -R a+w /tmp/hive
```

In [1]:
from pyspark.sql import SparkSession


# Enable SQL/DML commands and Metastore tables for the current spark session.
# We need to set the following configs

spark = SparkSession.builder \
    .appName("quickstart_sql") \
    .master("local[*]") \
    .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
    .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \
    .getOrCreate()

In [2]:
tableName = "tbltestpython"
# Clear any previous runs
spark.sql(f"DROP TABLE IF EXISTS {tableName}")
spark.sql("DROP TABLE IF EXISTS newData")

DataFrame[]

In [4]:
# Create a table
print("############# Creating a table ###############")
spark.sql(f"CREATE TABLE {tableName}(id LONG) USING delta")
spark.sql(f"INSERT INTO {tableName} VALUES 0, 1, 2, 3, 4")

# Read the table
print("############ Reading the table ###############")
spark.sql(f"SELECT * FROM {tableName} order by id").show()

############# Creating a table ###############
############ Reading the table ###############
+---+
| id|
+---+
|  0|
|  1|
|  2|
|  3|
|  4|
+---+



In [6]:
# Upsert (merge) new data
print("########### Upsert new data #############")
spark.sql("CREATE TABLE newData(id LONG) USING parquet")
spark.sql("INSERT INTO newData VALUES 3, 4, 5, 6")

spark.sql(f'''MERGE INTO {tableName} USING newData
        ON {tableName}.id = newData.id
        WHEN MATCHED THEN
          UPDATE SET {tableName}.id = newData.id
        WHEN NOT MATCHED THEN INSERT *
    ''')

spark.sql(f"SELECT * FROM {tableName} order by id").show()

########### Upsert new data #############
+---+
| id|
+---+
|  0|
|  1|
|  2|
|  3|
|  4|
|  5|
|  6|
+---+



In [9]:
# Update table data
print("########## Overwrite the table ###########")
spark.sql(f"INSERT OVERWRITE {tableName} select * FROM (VALUES 5, 6, 7, 8, 9) x (id)")
spark.sql(f"SELECT * FROM {tableName} order by id").show()

########## Overwrite the table ###########
+---+
| id|
+---+
|  5|
|  6|
|  7|
|  8|
|  9|
+---+



In [10]:
# Update every even value by adding 100 to it
print("########### Update to the table(add 100 to every even value) ##############")
spark.sql(f"UPDATE {tableName} SET id = (id + 100) WHERE (id % 2 == 0)")
spark.sql(f"SELECT * FROM {tableName} order by id").show()

########### Update to the table(add 100 to every even value) ##############
+---+
| id|
+---+
|  5|
|  7|
|  9|
|106|
|108|
+---+



In [11]:
# Delete every even value
print("######### Delete every even value ##############")
spark.sql(f"DELETE FROM {tableName} WHERE (id % 2 == 0)")
spark.sql(f"SELECT * FROM {tableName} order by id").show()

######### Delete every even value ##############
+---+
| id|
+---+
|  5|
|  7|
|  9|
+---+



In [12]:
# Read old version of data using time travel
print("######## Read old data using time travel ############")
df = spark.read.format("delta").option("versionAsOf", 0).table(tableName)
df.orderBy("id").show()

######## Read old data using time travel ############
+---+
| id|
+---+
|  5|
|  7|
|  9|
+---+



In [13]:
# cleanup
spark.sql(f"DROP TABLE {tableName}")
spark.sql("DROP TABLE IF EXISTS newData")
spark.stop()