In [79]:
import pyspark
import pyspark.sql.types as Types
from pyspark import SQLContext
from pyspark.sql import SparkSession

sc = SparkContext.getOrCreate();
sql = SQLContext(sc)

df = (sql.read
         .format("com.databricks.spark.csv")
         .option("header", "true")
         .load("covid_dataset/country_wise_latest.csv"))

appName = "PySpark Example - JSON file to Spark Data Frame"
master = "local"


spark = SparkSession.builder \
    .appName(appName) \
    .master(master) \
    .getOrCreate()


schema = Types.StructType([
    Types.StructField('Country/Region', Types.StringType(), True),
    Types.StructField('Population', Types.StringType(), True),
    Types.StructField('Urban Pop %', Types.StringType(), True),
    Types.StructField('World Share %', Types.StringType(), True),
    Types.StructField('Med. Age', Types.StringType(), True)
])


df = spark\
    .read\
    .json("world_population/population.json", schema, multiLine=True)

df.orderBy(df['Population'].cast("int").desc()).limit(4).show()
df.sort(df['Population'].cast("int").desc()).limit(4).show()
"""
+--------------+----------+-----------+-------------+--------+
|Country/Region|Population|Urban Pop %|World Share %|Med. Age|
+--------------+----------+-----------+-------------+--------+
|         China|1440297825|         61|        18.47|      38|
|         India|1382345085|         35|        17.70|      28|
| United States| 331341050|         83|         4.25|      38|
|     Indonesia| 274021604|         56|         3.51|      30|
+--------------+----------+-----------+-------------+--------+
"""
df.orderBy(df['Population'].cast("int").asc()).limit(4).show()
df.sort(df['Population'].cast("int").asc()).limit(4).show()
"""
+----------------+----------+-----------+-------------+--------+
|  Country/Region|Population|Urban Pop %|World Share %|Med. Age|
+----------------+----------+-----------+-------------+--------+
|        Holy See|       801|       N.A.|         0.00|    N.A.|
|         Tokelau|      1360|          0|         0.00|    N.A.|
|            Niue|      1628|         46|         0.00|    N.A.|
|Falkland Islands|      3497|         66|         0.00|    N.A.|
+----------------+----------+-----------+-------------+--------+
"""

df = (sql.read
         .format("com.databricks.spark.csv")
         .option("header", "true")
         .load("world_population/worldcities.csv"))

df = df.where(df['population'].cast("int") > 10000000)

df.orderBy(df['country'].desc(), df['population'].cast("int").desc()).limit(6)\
  .select(["city", "country", "population"]).show()
"""
+-----------+-------------+----------+
|       city|      country|population|
+-----------+-------------+----------+
|   New York|United States|19354922.0|
|Los Angeles|United States|12815475.0|
|   Istanbul|       Turkey|  10061000|
|     Moscow|       Russia|  10452000|
|     Manila|  Philippines|  11100000|
|    Karachi|     Pakistan|  12130000|
+-----------+-------------+----------+
"""
df.sort(df['country'].desc(), df['population'].cast("int").asc()).limit(6)\
  .select(["city", "country", "population"]).show()
"""
+-----------+-------------+----------+
|       city|      country|population|
+-----------+-------------+----------+
|Los Angeles|United States|12815475.0|
|   New York|United States|19354922.0|
|   Istanbul|       Turkey|  10061000|
|     Moscow|       Russia|  10452000|
|     Manila|  Philippines|  11100000|
|    Karachi|     Pakistan|  12130000|
+-----------+-------------+----------+
"""

+--------------+----------+-----------+-------------+--------+
|Country/Region|Population|Urban Pop %|World Share %|Med. Age|
+--------------+----------+-----------+-------------+--------+
|         China|1440297825|         61|        18.47|      38|
|         India|1382345085|         35|        17.70|      28|
| United States| 331341050|         83|         4.25|      38|
|     Indonesia| 274021604|         56|         3.51|      30|
+--------------+----------+-----------+-------------+--------+

+--------------+----------+-----------+-------------+--------+
|Country/Region|Population|Urban Pop %|World Share %|Med. Age|
+--------------+----------+-----------+-------------+--------+
|         China|1440297825|         61|        18.47|      38|
|         India|1382345085|         35|        17.70|      28|
| United States| 331341050|         83|         4.25|      38|
|     Indonesia| 274021604|         56|         3.51|      30|
+--------------+----------+-----------+-------------+-

'\n+-----------+-------------+----------+\n|       city|      country|population|\n+-----------+-------------+----------+\n|Los Angeles|United States|12815475.0|\n|   New York|United States|19354922.0|\n|   Istanbul|       Turkey|  10061000|\n|     Moscow|       Russia|  10452000|\n|     Manila|  Philippines|  11100000|\n|    Karachi|     Pakistan|  12130000|\n+-----------+-------------+----------+\n'