In [0]:
%sql
show databases

databaseName
default
test_database


In [0]:
from pyspark.sql.types import StructType, StructField, IntegerType, StringType, FloatType

# Define schemas
schema1 = StructType([
    StructField("id", IntegerType(), True),
    StructField("name", StringType(), True)
])

schema2 = StructType([
    StructField("id", IntegerType(), True),
    StructField("description", StringType(), True)
])

schema3 = StructType([
    StructField("product_id", IntegerType(), True),
    StructField("price", FloatType(), True)
])

schema4 = StructType([
    StructField("order_id", IntegerType(), True),
    StructField("quantity", IntegerType(), True)
])

# Sample data
data1 = [(1, "Alice"), (2, "Bob")]
data2 = [(1, "First record"), (2, "Second record")]
data3 = [(1, 19.99), (2, 29.99)]
data4 = [(1, 5), (2, 10)]


In [0]:
# Create DataFrames
df1 = spark.createDataFrame(data1, schema1)
df2 = spark.createDataFrame(data2, schema2)
df3 = spark.createDataFrame(data3, schema3)
df4 = spark.createDataFrame(data4, schema4)



In [0]:
# Save DataFrames as tables
df1.write.mode("overwrite").saveAsTable("default.table1")
df2.write.mode("overwrite").saveAsTable("default.table2")
df3.write.mode("overwrite").saveAsTable("default.table3")
df4.write.mode("overwrite").saveAsTable("default.table4")


In [0]:
# Create a new database
spark.sql("CREATE DATABASE IF NOT EXISTS test_database")
df1.write.mode("overwrite").saveAsTable("test_database.table1")
df2.write.mode("overwrite").saveAsTable("test_database.table2")
df3.write.mode("overwrite").saveAsTable("test_database.table3")
df4.write.mode("overwrite").saveAsTable("test_database.table4")


In [0]:
%sql
use test_database;
show tables ;

database,tableName,isTemporary
test_database,table1,False
test_database,table2,False
test_database,table3,False
test_database,table4,False


In [0]:
# list comprehension in python
all_dbs=[ db.databaseName for db in spark.sql("show databases").collect()]
print(all_dbs)

['default', 'test_database']


In [0]:
%fs mkdirs file:/tmp/ddls/

In [0]:


def createddl(database):
 all_tables=spark.catalog.listTables('default')
#The open function in Python is used to open a file. It allows you to create, read, write, or append to files. Here’s a detailed explanation of the open function
#"w": This is the mode in which the file is opened. The mode "w" stands for "write". It allows you to write to the file. If the file already exists, it will be truncated (emptied) before writing.
 f=open(f"/tmp/ddls/ddl_{database}","w")
 for tables in all_tables:
    ddl=spark.sql(f"show create table {database}.{tables.name};")
    # Write some data to the file
    f.write(ddl.first()[0])
    f.write(";\n")
 f.close()

In [0]:
createddl('test_database')

In [0]:
%fs ls file:/tmp/ddls/

path,name,size,modificationTime
file:/tmp/ddls/ddl_test_database,ddl_test_database,714,1720182071642


In [0]:
%fs head file:/tmp/ddls/ddl_test_database

In [0]:
alldbs = [db.databaseName for db in spark.sql("show databases").collect()]
alldbs
from multiprocessing.pool import ThreadPool
processes = ThreadPool(4)
processes.map(createddl,[database for database in alldbs])

Out[48]: [None, None]