## Import Statements

In [0]:
import pyspark.sql.functions as f

## Create a dataframe

In [0]:
df = spark.read.format("csv").option("header", True).load("/FileStore/tables/store_sales/test.csv")

data_with_null = [
    (1, "Name1", ["meta","microsoft"], 5000, "USD"),
    (2, "Name2", ["google"], 3000, "USD"),
    (3, "Name3", ["openai"], 6000, "USD"),
    (4, "Name4", ["tesla","google"], 10000, "USD"),
    (5, "Name5", ["tcs"], 50000, "USD"),
    (6, "Name6", ["infosys", "l&t"], 15000, "USD"),
    (7, "Name7", [], None, None),
    (8, "Name8", [], None, None)
]

df1 = spark.createDataFrame(data_with_null, ["id", "Name", "Companies", "salary", "currency"])

In [0]:
df1.show()

+---+-----+-----------------+------+--------+
| id| Name|        Companies|salary|currency|
+---+-----+-----------------+------+--------+
|  1|Name1|[meta, microsoft]|  5000|     USD|
|  2|Name2|         [google]|  3000|     USD|
|  3|Name3|         [openai]|  6000|     USD|
|  4|Name4|  [tesla, google]| 10000|     USD|
|  5|Name5|            [tcs]| 50000|     USD|
|  6|Name6|   [infosys, l&t]| 15000|     USD|
|  7|Name7|               []|  null|    null|
|  8|Name8|               []|  null|    null|
+---+-----+-----------------+------+--------+



In [0]:
df1.printSchema()

root
 |-- id: long (nullable = true)
 |-- Name: string (nullable = true)
 |-- Companies: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- salary: long (nullable = true)
 |-- currency: string (nullable = true)



In [0]:
df.show(10)

+-------+----------+---------+------------+-----------+
|     id|      date|store_nbr|      family|onpromotion|
+-------+----------+---------+------------+-----------+
|3000888|2017-08-16|        1|  AUTOMOTIVE|          0|
|3000889|2017-08-16|        1|   BABY CARE|          0|
|3000890|2017-08-16|        1|      BEAUTY|          2|
|3000891|2017-08-16|        1|   BEVERAGES|         20|
|3000892|2017-08-16|        1|       BOOKS|          0|
|3000893|2017-08-16|        1|BREAD/BAKERY|         12|
|3000894|2017-08-16|        1| CELEBRATION|          0|
|3000895|2017-08-16|        1|    CLEANING|         25|
|3000896|2017-08-16|        1|       DAIRY|         45|
|3000897|2017-08-16|        1|        DELI|         18|
+-------+----------+---------+------------+-----------+
only showing top 10 rows



In [0]:
df.printSchema()

root
 |-- id: string (nullable = true)
 |-- date: string (nullable = true)
 |-- store_nbr: string (nullable = true)
 |-- family: string (nullable = true)
 |-- onpromotion: string (nullable = true)



In [0]:
df = df.withColumn("date", f.to_date("date", "yyyy-MM-dd"))

In [0]:
df.printSchema()

root
 |-- id: string (nullable = true)
 |-- date: date (nullable = true)
 |-- store_nbr: string (nullable = true)
 |-- family: string (nullable = true)
 |-- onpromotion: string (nullable = true)



## Sorting

### Documentation

In [0]:
help(df.sort)

Help on method sort in module pyspark.sql.dataframe:

sort(*cols: Union[str, pyspark.sql.column.Column, List[Union[str, pyspark.sql.column.Column]]], **kwargs: Any) -> 'DataFrame' method of pyspark.sql.dataframe.DataFrame instance
    Returns a new :class:`DataFrame` sorted by the specified column(s).
    
    .. versionadded:: 1.3.0
    
    Parameters
    ----------
    cols : str, list, or :class:`Column`, optional
         list of :class:`Column` or column names to sort by.
    
    Other Parameters
    ----------------
    ascending : bool or list, optional
        boolean or list of boolean (default ``True``).
        Sort ascending vs. descending. Specify list for multiple sort orders.
        If a list is specified, length of the list must equal length of the `cols`.
    
    Examples
    --------
    >>> df.sort(df.age.desc()).collect()
    [Row(age=5, name='Bob'), Row(age=2, name='Alice')]
    >>> df.sort("age", ascending=False).collect()
    [Row(age=5, name='Bob'), Row(age=

In [0]:
help(df.orderBy)

Help on method sort in module pyspark.sql.dataframe:

sort(*cols: Union[str, pyspark.sql.column.Column, List[Union[str, pyspark.sql.column.Column]]], **kwargs: Any) -> 'DataFrame' method of pyspark.sql.dataframe.DataFrame instance
    Returns a new :class:`DataFrame` sorted by the specified column(s).
    
    .. versionadded:: 1.3.0
    
    Parameters
    ----------
    cols : str, list, or :class:`Column`, optional
         list of :class:`Column` or column names to sort by.
    
    Other Parameters
    ----------------
    ascending : bool or list, optional
        boolean or list of boolean (default ``True``).
        Sort ascending vs. descending. Specify list for multiple sort orders.
        If a list is specified, length of the list must equal length of the `cols`.
    
    Examples
    --------
    >>> df.sort(df.age.desc()).collect()
    [Row(age=5, name='Bob'), Row(age=2, name='Alice')]
    >>> df.sort("age", ascending=False).collect()
    [Row(age=5, name='Bob'), Row(age=

## Examples
```sort``` and ```orderBy``` are alias. So they both work the same way. Just change the name of function and it will work as is.

In [0]:
df.show(5)

+-------+----------+---------+----------+-----------+
|     id|      date|store_nbr|    family|onpromotion|
+-------+----------+---------+----------+-----------+
|3000888|2017-08-16|        1|AUTOMOTIVE|          0|
|3000889|2017-08-16|        1| BABY CARE|          0|
|3000890|2017-08-16|        1|    BEAUTY|          2|
|3000891|2017-08-16|        1| BEVERAGES|         20|
|3000892|2017-08-16|        1|     BOOKS|          0|
+-------+----------+---------+----------+-----------+
only showing top 5 rows



In [0]:
df.sort("family").show(10)

+-------+----------+---------+----------+-----------+
|     id|      date|store_nbr|    family|onpromotion|
+-------+----------+---------+----------+-----------+
|3001053|2017-08-16|       14|AUTOMOTIVE|          0|
|3001218|2017-08-16|       19|AUTOMOTIVE|          0|
|3000888|2017-08-16|        1|AUTOMOTIVE|          0|
|3000987|2017-08-16|       12|AUTOMOTIVE|          0|
|3001020|2017-08-16|       13|AUTOMOTIVE|          0|
|3000921|2017-08-16|       10|AUTOMOTIVE|          0|
|3000954|2017-08-16|       11|AUTOMOTIVE|          0|
|3001086|2017-08-16|       15|AUTOMOTIVE|          0|
|3001119|2017-08-16|       16|AUTOMOTIVE|          0|
|3001152|2017-08-16|       17|AUTOMOTIVE|          0|
+-------+----------+---------+----------+-----------+
only showing top 10 rows



In [0]:
df.sort(df["family"]).show(10)

+-------+----------+---------+----------+-----------+
|     id|      date|store_nbr|    family|onpromotion|
+-------+----------+---------+----------+-----------+
|3001053|2017-08-16|       14|AUTOMOTIVE|          0|
|3001218|2017-08-16|       19|AUTOMOTIVE|          0|
|3000888|2017-08-16|        1|AUTOMOTIVE|          0|
|3000987|2017-08-16|       12|AUTOMOTIVE|          0|
|3001020|2017-08-16|       13|AUTOMOTIVE|          0|
|3000921|2017-08-16|       10|AUTOMOTIVE|          0|
|3000954|2017-08-16|       11|AUTOMOTIVE|          0|
|3001086|2017-08-16|       15|AUTOMOTIVE|          0|
|3001119|2017-08-16|       16|AUTOMOTIVE|          0|
|3001152|2017-08-16|       17|AUTOMOTIVE|          0|
+-------+----------+---------+----------+-----------+
only showing top 10 rows



In [0]:
df.sort(f.col("family")).show(10)

+-------+----------+---------+----------+-----------+
|     id|      date|store_nbr|    family|onpromotion|
+-------+----------+---------+----------+-----------+
|3001053|2017-08-16|       14|AUTOMOTIVE|          0|
|3001218|2017-08-16|       19|AUTOMOTIVE|          0|
|3000888|2017-08-16|        1|AUTOMOTIVE|          0|
|3000987|2017-08-16|       12|AUTOMOTIVE|          0|
|3001020|2017-08-16|       13|AUTOMOTIVE|          0|
|3000921|2017-08-16|       10|AUTOMOTIVE|          0|
|3000954|2017-08-16|       11|AUTOMOTIVE|          0|
|3001086|2017-08-16|       15|AUTOMOTIVE|          0|
|3001119|2017-08-16|       16|AUTOMOTIVE|          0|
|3001152|2017-08-16|       17|AUTOMOTIVE|          0|
+-------+----------+---------+----------+-----------+
only showing top 10 rows



In [0]:
df.sort(f.asc("family")).show(10)

+-------+----------+---------+----------+-----------+
|     id|      date|store_nbr|    family|onpromotion|
+-------+----------+---------+----------+-----------+
|3001053|2017-08-16|       14|AUTOMOTIVE|          0|
|3001218|2017-08-16|       19|AUTOMOTIVE|          0|
|3000888|2017-08-16|        1|AUTOMOTIVE|          0|
|3000987|2017-08-16|       12|AUTOMOTIVE|          0|
|3001020|2017-08-16|       13|AUTOMOTIVE|          0|
|3000921|2017-08-16|       10|AUTOMOTIVE|          0|
|3000954|2017-08-16|       11|AUTOMOTIVE|          0|
|3001086|2017-08-16|       15|AUTOMOTIVE|          0|
|3001119|2017-08-16|       16|AUTOMOTIVE|          0|
|3001152|2017-08-16|       17|AUTOMOTIVE|          0|
+-------+----------+---------+----------+-----------+
only showing top 10 rows



In [0]:
df.orderBy("date").show(10)

+-------+----------+---------+------------+-----------+
|     id|      date|store_nbr|      family|onpromotion|
+-------+----------+---------+------------+-----------+
|3000888|2017-08-16|        1|  AUTOMOTIVE|          0|
|3000889|2017-08-16|        1|   BABY CARE|          0|
|3000890|2017-08-16|        1|      BEAUTY|          2|
|3000891|2017-08-16|        1|   BEVERAGES|         20|
|3000892|2017-08-16|        1|       BOOKS|          0|
|3000893|2017-08-16|        1|BREAD/BAKERY|         12|
|3000894|2017-08-16|        1| CELEBRATION|          0|
|3000895|2017-08-16|        1|    CLEANING|         25|
|3000896|2017-08-16|        1|       DAIRY|         45|
|3000897|2017-08-16|        1|        DELI|         18|
+-------+----------+---------+------------+-----------+
only showing top 10 rows



### Descending

In [0]:
df.sort(f.desc(f.col("family"))).show(10)

+-------+----------+---------+-------+-----------+
|     id|      date|store_nbr| family|onpromotion|
+-------+----------+---------+-------+-----------+
|3001217|2017-08-16|       18|SEAFOOD|          0|
|3001547|2017-08-16|       27|SEAFOOD|          0|
|3001250|2017-08-16|       19|SEAFOOD|          0|
|3000920|2017-08-16|        1|SEAFOOD|          0|
|3001283|2017-08-16|        2|SEAFOOD|          0|
|3000986|2017-08-16|       11|SEAFOOD|          0|
|3001316|2017-08-16|       20|SEAFOOD|          0|
|3001085|2017-08-16|       14|SEAFOOD|          0|
|3001349|2017-08-16|       21|SEAFOOD|          0|
|3001151|2017-08-16|       16|SEAFOOD|          0|
+-------+----------+---------+-------+-----------+
only showing top 10 rows



In [0]:
df.sort("family", ascending=False).show(10)

+-------+----------+---------+-------+-----------+
|     id|      date|store_nbr| family|onpromotion|
+-------+----------+---------+-------+-----------+
|3001217|2017-08-16|       18|SEAFOOD|          0|
|3001547|2017-08-16|       27|SEAFOOD|          0|
|3001250|2017-08-16|       19|SEAFOOD|          0|
|3000920|2017-08-16|        1|SEAFOOD|          0|
|3001283|2017-08-16|        2|SEAFOOD|          0|
|3000986|2017-08-16|       11|SEAFOOD|          0|
|3001316|2017-08-16|       20|SEAFOOD|          0|
|3001085|2017-08-16|       14|SEAFOOD|          0|
|3001349|2017-08-16|       21|SEAFOOD|          0|
|3001151|2017-08-16|       16|SEAFOOD|          0|
+-------+----------+---------+-------+-----------+
only showing top 10 rows



In [0]:
df.sort(df["family"], ascending=False).show(10)

+-------+----------+---------+-------+-----------+
|     id|      date|store_nbr| family|onpromotion|
+-------+----------+---------+-------+-----------+
|3001217|2017-08-16|       18|SEAFOOD|          0|
|3001547|2017-08-16|       27|SEAFOOD|          0|
|3001250|2017-08-16|       19|SEAFOOD|          0|
|3000920|2017-08-16|        1|SEAFOOD|          0|
|3001283|2017-08-16|        2|SEAFOOD|          0|
|3000986|2017-08-16|       11|SEAFOOD|          0|
|3001316|2017-08-16|       20|SEAFOOD|          0|
|3001085|2017-08-16|       14|SEAFOOD|          0|
|3001349|2017-08-16|       21|SEAFOOD|          0|
|3001151|2017-08-16|       16|SEAFOOD|          0|
+-------+----------+---------+-------+-----------+
only showing top 10 rows



In [0]:
df.sort(f.col("family"), ascending=False).show(10)

+-------+----------+---------+-------+-----------+
|     id|      date|store_nbr| family|onpromotion|
+-------+----------+---------+-------+-----------+
|3001217|2017-08-16|       18|SEAFOOD|          0|
|3001547|2017-08-16|       27|SEAFOOD|          0|
|3001250|2017-08-16|       19|SEAFOOD|          0|
|3000920|2017-08-16|        1|SEAFOOD|          0|
|3001283|2017-08-16|        2|SEAFOOD|          0|
|3000986|2017-08-16|       11|SEAFOOD|          0|
|3001316|2017-08-16|       20|SEAFOOD|          0|
|3001085|2017-08-16|       14|SEAFOOD|          0|
|3001349|2017-08-16|       21|SEAFOOD|          0|
|3001151|2017-08-16|       16|SEAFOOD|          0|
+-------+----------+---------+-------+-----------+
only showing top 10 rows



In [0]:
df.sort(f.col("family").desc()).show(10)

+-------+----------+---------+-------+-----------+
|     id|      date|store_nbr| family|onpromotion|
+-------+----------+---------+-------+-----------+
|3001217|2017-08-16|       18|SEAFOOD|          0|
|3001547|2017-08-16|       27|SEAFOOD|          0|
|3001250|2017-08-16|       19|SEAFOOD|          0|
|3000920|2017-08-16|        1|SEAFOOD|          0|
|3001283|2017-08-16|        2|SEAFOOD|          0|
|3000986|2017-08-16|       11|SEAFOOD|          0|
|3001316|2017-08-16|       20|SEAFOOD|          0|
|3001085|2017-08-16|       14|SEAFOOD|          0|
|3001349|2017-08-16|       21|SEAFOOD|          0|
|3001151|2017-08-16|       16|SEAFOOD|          0|
+-------+----------+---------+-------+-----------+
only showing top 10 rows



## Sort on derived column

In [0]:
df1.show()

+---+-----+-----------------+------+--------+
| id| Name|        Companies|salary|currency|
+---+-----+-----------------+------+--------+
|  1|Name1|[meta, microsoft]|  5000|     USD|
|  2|Name2|         [google]|  3000|     USD|
|  3|Name3|         [openai]|  6000|     USD|
|  4|Name4|  [tesla, google]| 10000|     USD|
|  5|Name5|            [tcs]| 50000|     USD|
|  6|Name6|   [infosys, l&t]| 15000|     USD|
|  7|Name7|               []|  null|    null|
|  8|Name8|               []|  null|    null|
+---+-----+-----------------+------+--------+



In [0]:
df1.withColumn("Companies_worked", f.size("Companies")).\
select("id", "name", "Companies","Companies_worked").\
sort(f.desc("Companies_worked")).show()

+---+-----+-----------------+----------------+
| id| name|        Companies|Companies_worked|
+---+-----+-----------------+----------------+
|  4|Name4|  [tesla, google]|               2|
|  6|Name6|   [infosys, l&t]|               2|
|  1|Name1|[meta, microsoft]|               2|
|  5|Name5|            [tcs]|               1|
|  3|Name3|         [openai]|               1|
|  2|Name2|         [google]|               1|
|  8|Name8|               []|               0|
|  7|Name7|               []|               0|
+---+-----+-----------------+----------------+



In [0]:
df1.orderBy(f.size("Companies").desc()).show()

+---+-----+-----------------+------+--------+
| id| Name|        Companies|salary|currency|
+---+-----+-----------------+------+--------+
|  4|Name4|  [tesla, google]| 10000|     USD|
|  1|Name1|[meta, microsoft]|  5000|     USD|
|  6|Name6|   [infosys, l&t]| 15000|     USD|
|  2|Name2|         [google]|  3000|     USD|
|  5|Name5|            [tcs]| 50000|     USD|
|  3|Name3|         [openai]|  6000|     USD|
|  8|Name8|               []|  null|    null|
|  7|Name7|               []|  null|    null|
+---+-----+-----------------+------+--------+



## Dealing with null
default behavior is ```asc``` -> ```asc_nulls_first``` and ```desc``` -> ```desc_nulls_last```

In [0]:
df1.show()

+---+-----+-----------------+------+--------+
| id| Name|        Companies|salary|currency|
+---+-----+-----------------+------+--------+
|  1|Name1|[meta, microsoft]|  5000|     USD|
|  2|Name2|         [google]|  3000|     USD|
|  3|Name3|         [openai]|  6000|     USD|
|  4|Name4|  [tesla, google]| 10000|     USD|
|  5|Name5|            [tcs]| 50000|     USD|
|  6|Name6|   [infosys, l&t]| 15000|     USD|
|  7|Name7|               []|  null|    null|
|  8|Name8|               []|  null|    null|
+---+-----+-----------------+------+--------+



In [0]:
df1.sort("salary").show()

+---+-----+-----------------+------+--------+
| id| Name|        Companies|salary|currency|
+---+-----+-----------------+------+--------+
|  7|Name7|               []|  null|    null|
|  8|Name8|               []|  null|    null|
|  2|Name2|         [google]|  3000|     USD|
|  1|Name1|[meta, microsoft]|  5000|     USD|
|  3|Name3|         [openai]|  6000|     USD|
|  4|Name4|  [tesla, google]| 10000|     USD|
|  6|Name6|   [infosys, l&t]| 15000|     USD|
|  5|Name5|            [tcs]| 50000|     USD|
+---+-----+-----------------+------+--------+



In [0]:
df1.sort(f.col("salary").asc_nulls_last()).show()

+---+-----+-----------------+------+--------+
| id| Name|        Companies|salary|currency|
+---+-----+-----------------+------+--------+
|  2|Name2|         [google]|  3000|     USD|
|  1|Name1|[meta, microsoft]|  5000|     USD|
|  3|Name3|         [openai]|  6000|     USD|
|  4|Name4|  [tesla, google]| 10000|     USD|
|  6|Name6|   [infosys, l&t]| 15000|     USD|
|  5|Name5|            [tcs]| 50000|     USD|
|  8|Name8|               []|  null|    null|
|  7|Name7|               []|  null|    null|
+---+-----+-----------------+------+--------+



In [0]:
df1.sort(df1.salary.asc_nulls_last()).show()

+---+-----+-----------------+------+--------+
| id| Name|        Companies|salary|currency|
+---+-----+-----------------+------+--------+
|  2|Name2|         [google]|  3000|     USD|
|  1|Name1|[meta, microsoft]|  5000|     USD|
|  3|Name3|         [openai]|  6000|     USD|
|  4|Name4|  [tesla, google]| 10000|     USD|
|  6|Name6|   [infosys, l&t]| 15000|     USD|
|  5|Name5|            [tcs]| 50000|     USD|
|  8|Name8|               []|  null|    null|
|  7|Name7|               []|  null|    null|
+---+-----+-----------------+------+--------+



In [0]:
df1.sort(f.col("salary").desc()).show()

+---+-----+-----------------+------+--------+
| id| Name|        Companies|salary|currency|
+---+-----+-----------------+------+--------+
|  5|Name5|            [tcs]| 50000|     USD|
|  6|Name6|   [infosys, l&t]| 15000|     USD|
|  4|Name4|  [tesla, google]| 10000|     USD|
|  3|Name3|         [openai]|  6000|     USD|
|  1|Name1|[meta, microsoft]|  5000|     USD|
|  2|Name2|         [google]|  3000|     USD|
|  7|Name7|               []|  null|    null|
|  8|Name8|               []|  null|    null|
+---+-----+-----------------+------+--------+



In [0]:
df1.sort(f.col("salary").desc_nulls_first()).show()

+---+-----+-----------------+------+--------+
| id| Name|        Companies|salary|currency|
+---+-----+-----------------+------+--------+
|  8|Name8|               []|  null|    null|
|  7|Name7|               []|  null|    null|
|  5|Name5|            [tcs]| 50000|     USD|
|  6|Name6|   [infosys, l&t]| 15000|     USD|
|  4|Name4|  [tesla, google]| 10000|     USD|
|  3|Name3|         [openai]|  6000|     USD|
|  1|Name1|[meta, microsoft]|  5000|     USD|
|  2|Name2|         [google]|  3000|     USD|
+---+-----+-----------------+------+--------+



## Sorting multiple columns

In [0]:
df1.show()

+---+-----+-----------------+------+--------+
| id| Name|        Companies|salary|currency|
+---+-----+-----------------+------+--------+
|  1|Name1|[meta, microsoft]|  5000|     USD|
|  2|Name2|         [google]|  3000|     USD|
|  3|Name3|         [openai]|  6000|     USD|
|  4|Name4|  [tesla, google]| 10000|     USD|
|  5|Name5|            [tcs]| 50000|     USD|
|  6|Name6|   [infosys, l&t]| 15000|     USD|
|  7|Name7|               []|  null|    null|
|  8|Name8|               []|  null|    null|
+---+-----+-----------------+------+--------+



In [0]:
df1.sort("salary", "Name").show()

+---+-----+-----------------+------+--------+
| id| Name|        Companies|salary|currency|
+---+-----+-----------------+------+--------+
|  7|Name7|               []|  null|    null|
|  8|Name8|               []|  null|    null|
|  2|Name2|         [google]|  3000|     USD|
|  1|Name1|[meta, microsoft]|  5000|     USD|
|  3|Name3|         [openai]|  6000|     USD|
|  4|Name4|  [tesla, google]| 10000|     USD|
|  6|Name6|   [infosys, l&t]| 15000|     USD|
|  5|Name5|            [tcs]| 50000|     USD|
+---+-----+-----------------+------+--------+



In [0]:
df1.sort(["salary", "Name"]).show()

+---+-----+-----------------+------+--------+
| id| Name|        Companies|salary|currency|
+---+-----+-----------------+------+--------+
|  7|Name7|               []|  null|    null|
|  8|Name8|               []|  null|    null|
|  2|Name2|         [google]|  3000|     USD|
|  1|Name1|[meta, microsoft]|  5000|     USD|
|  3|Name3|         [openai]|  6000|     USD|
|  4|Name4|  [tesla, google]| 10000|     USD|
|  6|Name6|   [infosys, l&t]| 15000|     USD|
|  5|Name5|            [tcs]| 50000|     USD|
+---+-----+-----------------+------+--------+



In [0]:
df1.sort([df1["salary"], df1["Name"]]).show()

+---+-----+-----------------+------+--------+
| id| Name|        Companies|salary|currency|
+---+-----+-----------------+------+--------+
|  7|Name7|               []|  null|    null|
|  8|Name8|               []|  null|    null|
|  2|Name2|         [google]|  3000|     USD|
|  1|Name1|[meta, microsoft]|  5000|     USD|
|  3|Name3|         [openai]|  6000|     USD|
|  4|Name4|  [tesla, google]| 10000|     USD|
|  6|Name6|   [infosys, l&t]| 15000|     USD|
|  5|Name5|            [tcs]| 50000|     USD|
+---+-----+-----------------+------+--------+



In [0]:
df1.sort(f.size(f.col("Companies")).desc(), f.col("salary")).show()

+---+-----+-----------------+------+--------+
| id| Name|        Companies|salary|currency|
+---+-----+-----------------+------+--------+
|  1|Name1|[meta, microsoft]|  5000|     USD|
|  4|Name4|  [tesla, google]| 10000|     USD|
|  6|Name6|   [infosys, l&t]| 15000|     USD|
|  2|Name2|         [google]|  3000|     USD|
|  3|Name3|         [openai]|  6000|     USD|
|  5|Name5|            [tcs]| 50000|     USD|
|  7|Name7|               []|  null|    null|
|  8|Name8|               []|  null|    null|
+---+-----+-----------------+------+--------+



In [0]:
df1.sort("salary", f.desc("Name")).show()

+---+-----+-----------------+------+--------+
| id| Name|        Companies|salary|currency|
+---+-----+-----------------+------+--------+
|  8|Name8|               []|  null|    null|
|  7|Name7|               []|  null|    null|
|  2|Name2|         [google]|  3000|     USD|
|  1|Name1|[meta, microsoft]|  5000|     USD|
|  3|Name3|         [openai]|  6000|     USD|
|  4|Name4|  [tesla, google]| 10000|     USD|
|  6|Name6|   [infosys, l&t]| 15000|     USD|
|  5|Name5|            [tcs]| 50000|     USD|
+---+-----+-----------------+------+--------+



In [0]:
df1.sort(f.desc(f.size(f.col("Companies"))), f.col("salary")).show()

+---+-----+-----------------+------+--------+
| id| Name|        Companies|salary|currency|
+---+-----+-----------------+------+--------+
|  1|Name1|[meta, microsoft]|  5000|     USD|
|  4|Name4|  [tesla, google]| 10000|     USD|
|  6|Name6|   [infosys, l&t]| 15000|     USD|
|  2|Name2|         [google]|  3000|     USD|
|  3|Name3|         [openai]|  6000|     USD|
|  5|Name5|            [tcs]| 50000|     USD|
|  8|Name8|               []|  null|    null|
|  7|Name7|               []|  null|    null|
+---+-----+-----------------+------+--------+



In [0]:
df1.sort(f.size(f.col("Companies")).desc(), f.col("salary").desc()).show()

+---+-----+-----------------+------+--------+
| id| Name|        Companies|salary|currency|
+---+-----+-----------------+------+--------+
|  6|Name6|   [infosys, l&t]| 15000|     USD|
|  4|Name4|  [tesla, google]| 10000|     USD|
|  1|Name1|[meta, microsoft]|  5000|     USD|
|  5|Name5|            [tcs]| 50000|     USD|
|  3|Name3|         [openai]|  6000|     USD|
|  2|Name2|         [google]|  3000|     USD|
|  8|Name8|               []|  null|    null|
|  7|Name7|               []|  null|    null|
+---+-----+-----------------+------+--------+



In [0]:
df1.sort(["salary", "Name"], ascending=[0,1]).show()

+---+-----+-----------------+------+--------+
| id| Name|        Companies|salary|currency|
+---+-----+-----------------+------+--------+
|  5|Name5|            [tcs]| 50000|     USD|
|  6|Name6|   [infosys, l&t]| 15000|     USD|
|  4|Name4|  [tesla, google]| 10000|     USD|
|  3|Name3|         [openai]|  6000|     USD|
|  1|Name1|[meta, microsoft]|  5000|     USD|
|  2|Name2|         [google]|  3000|     USD|
|  7|Name7|               []|  null|    null|
|  8|Name8|               []|  null|    null|
+---+-----+-----------------+------+--------+



In [0]:
df1.sort(["salary", "Name"], ascending=[1,0]).show()

+---+-----+-----------------+------+--------+
| id| Name|        Companies|salary|currency|
+---+-----+-----------------+------+--------+
|  8|Name8|               []|  null|    null|
|  7|Name7|               []|  null|    null|
|  2|Name2|         [google]|  3000|     USD|
|  1|Name1|[meta, microsoft]|  5000|     USD|
|  3|Name3|         [openai]|  6000|     USD|
|  4|Name4|  [tesla, google]| 10000|     USD|
|  6|Name6|   [infosys, l&t]| 15000|     USD|
|  5|Name5|            [tcs]| 50000|     USD|
+---+-----+-----------------+------+--------+



In [0]:
df1.sort(["salary", "Name"], ascending=[1,1]).show()

+---+-----+-----------------+------+--------+
| id| Name|        Companies|salary|currency|
+---+-----+-----------------+------+--------+
|  7|Name7|               []|  null|    null|
|  8|Name8|               []|  null|    null|
|  2|Name2|         [google]|  3000|     USD|
|  1|Name1|[meta, microsoft]|  5000|     USD|
|  3|Name3|         [openai]|  6000|     USD|
|  4|Name4|  [tesla, google]| 10000|     USD|
|  6|Name6|   [infosys, l&t]| 15000|     USD|
|  5|Name5|            [tcs]| 50000|     USD|
+---+-----+-----------------+------+--------+

