In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *

In [0]:
emp_data = [
(1,'manish',26,20000,'india','IT'),
(2,'rahul',None,40000,'germany','engineering'),
(3,'pawan',12,60000,'india','sales'),
(4,'roshini',44,None,'uk','engineering'),
(5,'raushan',35,70000,'india','sales'),
(6,None,29,200000,'uk','IT'),
(7,'adam',37,65000,'us','IT'),
(8,'chris',16,40000,'us','sales'),
(None,None,None,None,None,None),
(7,'adam',37,65000,'us','IT')]

schema = ['id','name','age','salary','country','dept']

emp_df = spark.createDataFrame(data=emp_data,schema=schema)


emp_df.show()




+----+-------+----+------+-------+-----------+
|  id|   name| age|salary|country|       dept|
+----+-------+----+------+-------+-----------+
|   1| manish|  26| 20000|  india|         IT|
|   2|  rahul|null| 40000|germany|engineering|
|   3|  pawan|  12| 60000|  india|      sales|
|   4|roshini|  44|  null|     uk|engineering|
|   5|raushan|  35| 70000|  india|      sales|
|   6|   null|  29|200000|     uk|         IT|
|   7|   adam|  37| 65000|     us|         IT|
|   8|  chris|  16| 40000|     us|      sales|
|null|   null|null|  null|   null|       null|
|   7|   adam|  37| 65000|     us|         IT|
+----+-------+----+------+-------+-----------+



In [0]:
emp_df.withColumn('adult',when(col('age') < 18,'No')
                  .when(col("age")> 18,'Yes')
                  .otherwise('NoVale')).show()

+----+-------+----+------+-------+-----------+------+
|  id|   name| age|salary|country|       dept| adult|
+----+-------+----+------+-------+-----------+------+
|   1| manish|  26| 20000|  india|         IT|   Yes|
|   2|  rahul|null| 40000|germany|engineering|NoVale|
|   3|  pawan|  12| 60000|  india|      sales|    No|
|   4|roshini|  44|  null|     uk|engineering|   Yes|
|   5|raushan|  35| 70000|  india|      sales|   Yes|
|   6|   null|  29|200000|     uk|         IT|   Yes|
|   7|   adam|  37| 65000|     us|         IT|   Yes|
|   8|  chris|  16| 40000|     us|      sales|    No|
|null|   null|null|  null|   null|       null|NoVale|
|   7|   adam|  37| 65000|     us|         IT|   Yes|
+----+-------+----+------+-------+-----------+------+



In [0]:
emp_df.withColumn('age',when(col('age').isNull(),lit(19)).otherwise(col('age')))\
        .withColumn("adult",when(col("age")>18,"Yes").
                    otherwise("No")).show()
                  

+----+-------+---+------+-------+-----------+-----+
|  id|   name|age|salary|country|       dept|adult|
+----+-------+---+------+-------+-----------+-----+
|   1| manish| 26| 20000|  india|         IT|  Yes|
|   2|  rahul| 19| 40000|germany|engineering|  Yes|
|   3|  pawan| 12| 60000|  india|      sales|   No|
|   4|roshini| 44|  null|     uk|engineering|  Yes|
|   5|raushan| 35| 70000|  india|      sales|  Yes|
|   6|   null| 29|200000|     uk|         IT|  Yes|
|   7|   adam| 37| 65000|     us|         IT|  Yes|
|   8|  chris| 16| 40000|     us|      sales|   No|
|null|   null| 19|  null|   null|       null|  Yes|
|   7|   adam| 37| 65000|     us|         IT|  Yes|
+----+-------+---+------+-------+-----------+-----+



In [0]:
emp_df.withColumn("age_wise",when((col("age") > 0) & (col("age") < 18), "Minor")
                            .when((col("age")> 18) & (col("age") < 30),"Mid")
                            .otherwise("Major"))\
                            .show()

+----+-------+----+------+-------+-----------+--------+
|  id|   name| age|salary|country|       dept|age_wise|
+----+-------+----+------+-------+-----------+--------+
|   1| manish|  26| 20000|  india|         IT|     Mid|
|   2|  rahul|null| 40000|germany|engineering|   Major|
|   3|  pawan|  12| 60000|  india|      sales|   Minor|
|   4|roshini|  44|  null|     uk|engineering|   Major|
|   5|raushan|  35| 70000|  india|      sales|   Major|
|   6|   null|  29|200000|     uk|         IT|     Mid|
|   7|   adam|  37| 65000|     us|         IT|   Major|
|   8|  chris|  16| 40000|     us|      sales|   Minor|
|null|   null|null|  null|   null|       null|   Major|
|   7|   adam|  37| 65000|     us|         IT|   Major|
+----+-------+----+------+-------+-----------+--------+



In [0]:
emp_df.createOrReplaceTempView("emp_tbl")

In [0]:
# In Spark SQL
spark.sql("""
          select *,
          case when age<18 then 'minor'
          when age>18 then 'major'
          else 'novalue'
          end as adult
          from  emp_tbl
          """).show()

+----+-------+----+------+-------+-----------+-------+
|  id|   name| age|salary|country|       dept|  adult|
+----+-------+----+------+-------+-----------+-------+
|   1| manish|  26| 20000|  india|         IT|  major|
|   2|  rahul|null| 40000|germany|engineering|novalue|
|   3|  pawan|  12| 60000|  india|      sales|  minor|
|   4|roshini|  44|  null|     uk|engineering|  major|
|   5|raushan|  35| 70000|  india|      sales|  major|
|   6|   null|  29|200000|     uk|         IT|  major|
|   7|   adam|  37| 65000|     us|         IT|  major|
|   8|  chris|  16| 40000|     us|      sales|  minor|
|null|   null|null|  null|   null|       null|novalue|
|   7|   adam|  37| 65000|     us|         IT|  major|
+----+-------+----+------+-------+-----------+-------+

