# Sorting Functions

In [4]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import col

# Create a Spark session
spark = SparkSession.builder.appName("SortingAndStringFunctions").getOrCreate()

# Sample data
data = [
    ("USA", "North America", 100, 50.5),
    ("India", "Asia", 300, 20.0),
    ("Germany", "Europe", 200, 30.5),
    ("Australia", "Oceania", 150, 60.0),
    ("Japan", "Asia", 120, 45.0),
    ("Brazil", "South America", 180, 25.0)
]

# Define the schema
columns = ["Country", "Region", "UnitsSold", "UnitPrice"]

# Create DataFrame
df = spark.createDataFrame(data, columns)

# Display the original DataFrame
df.show()

StatementMeta(, 0c65f4cd-0968-4a73-bcc5-19317153cc2f, 6, Finished, Available, Finished)

+---------+-------------+---------+---------+
|  Country|       Region|UnitsSold|UnitPrice|
+---------+-------------+---------+---------+
|      USA|North America|      100|     50.5|
|    India|         Asia|      300|     20.0|
|  Germany|       Europe|      200|     30.5|
|Australia|      Oceania|      150|     60.0|
|    Japan|         Asia|      120|     45.0|
|   Brazil|South America|      180|     25.0|
+---------+-------------+---------+---------+



# 1. Sorting by a Single Column (Ascending Order)
This example sorts the DataFrame by "UnitsSold" in ascending order.

In [5]:
# Sort by UnitsSold in Ascending Order
df_sorted_units = df.orderBy("UnitsSold")
df_sorted_units.show()

StatementMeta(, 0c65f4cd-0968-4a73-bcc5-19317153cc2f, 7, Finished, Available, Finished)

+---------+-------------+---------+---------+
|  Country|       Region|UnitsSold|UnitPrice|
+---------+-------------+---------+---------+
|      USA|North America|      100|     50.5|
|    Japan|         Asia|      120|     45.0|
|Australia|      Oceania|      150|     60.0|
|   Brazil|South America|      180|     25.0|
|  Germany|       Europe|      200|     30.5|
|    India|         Asia|      300|     20.0|
+---------+-------------+---------+---------+




# 2. Sorting by a Single Column (Descending Order)
Sorting the DataFrame by "UnitPrice" in descending order.

In [6]:
# Sort by UnitPrice in Descending Order
df_sorted_price = df.orderBy(col("UnitPrice").desc())
df_sorted_price.show()

StatementMeta(, 0c65f4cd-0968-4a73-bcc5-19317153cc2f, 8, Finished, Available, Finished)

+---------+-------------+---------+---------+
|  Country|       Region|UnitsSold|UnitPrice|
+---------+-------------+---------+---------+
|Australia|      Oceania|      150|     60.0|
|      USA|North America|      100|     50.5|
|    Japan|         Asia|      120|     45.0|
|  Germany|       Europe|      200|     30.5|
|   Brazil|South America|      180|     25.0|
|    India|         Asia|      300|     20.0|
+---------+-------------+---------+---------+



# 3. Sorting by Multiple Columns
Sorting first by "Region" (Ascending) and then by "UnitsSold" (Descending).


In [7]:
# Sort by Region (Ascending) and UnitsSold (Descending)
df_sorted_multi = df.orderBy(col("Region").asc(), col("UnitsSold").desc())
df_sorted_multi.show()

StatementMeta(, 0c65f4cd-0968-4a73-bcc5-19317153cc2f, 9, Finished, Available, Finished)

+---------+-------------+---------+---------+
|  Country|       Region|UnitsSold|UnitPrice|
+---------+-------------+---------+---------+
|    India|         Asia|      300|     20.0|
|    Japan|         Asia|      120|     45.0|
|  Germany|       Europe|      200|     30.5|
|      USA|North America|      100|     50.5|
|Australia|      Oceania|      150|     60.0|
|   Brazil|South America|      180|     25.0|
+---------+-------------+---------+---------+



# 4. Using `sort()` Function
The `sort()` function provides the same functionality as `orderBy()`.

In [8]:
# Using sort() instead of orderBy()
df_sorted_sort = df.sort("UnitPrice")
df_sorted_sort.show()

StatementMeta(, 0c65f4cd-0968-4a73-bcc5-19317153cc2f, 10, Finished, Available, Finished)

+---------+-------------+---------+---------+
|  Country|       Region|UnitsSold|UnitPrice|
+---------+-------------+---------+---------+
|    India|         Asia|      300|     20.0|
|   Brazil|South America|      180|     25.0|
|  Germany|       Europe|      200|     30.5|
|    Japan|         Asia|      120|     45.0|
|      USA|North America|      100|     50.5|
|Australia|      Oceania|      150|     60.0|
+---------+-------------+---------+---------+



# 5. Sorting Using SQL Query
Using SQL query to sort by "UnitsSold" in descending order.

In [9]:
df.createOrReplaceTempView("sales")

# Using SQL to sort
df_sorted_sql = spark.sql("SELECT * FROM sales ORDER BY UnitsSold DESC")
df_sorted_sql.show()

StatementMeta(, 0c65f4cd-0968-4a73-bcc5-19317153cc2f, 11, Finished, Available, Finished)

+---------+-------------+---------+---------+
|  Country|       Region|UnitsSold|UnitPrice|
+---------+-------------+---------+---------+
|    India|         Asia|      300|     20.0|
|  Germany|       Europe|      200|     30.5|
|   Brazil|South America|      180|     25.0|
|Australia|      Oceania|      150|     60.0|
|    Japan|         Asia|      120|     45.0|
|      USA|North America|      100|     50.5|
+---------+-------------+---------+---------+

