In [0]:
# Install Java, Spark, and Findspark
!apt-get install openjdk-8-jdk-headless -qq > /dev/null
!wget -q http://www-us.apache.org/dist/spark/spark-2.4.5/spark-2.4.5-bin-hadoop2.7.tgz
!tar xf spark-2.4.5-bin-hadoop2.7.tgz
!pip install -q findspark

# Set Environment Variables
import os
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"
os.environ["SPARK_HOME"] = "/content/spark-2.4.5-bin-hadoop2.7"

# Start a SparkSession
import findspark
findspark.init()

In [0]:
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("Q2").getOrCreate()

In [3]:
from pyspark import SparkFiles
# Load in user_data.csv from S3 into a DataFrame
url = "https://s3.amazonaws.com/dataviz-curriculum/day_3/checkpoints/question_two.csv"
spark.sparkContext.addFile(url)

df = spark.read.option('header', 'true').csv(SparkFiles.get("question_two.csv"), inferSchema=True, sep=',', timestampFormat="mm/dd/yy")
df.show(10)

+--------------------+----------+
|    coffee_shop_name|num_rating|
+--------------------+----------+
|The Factory - Caf...|         5|
|The Factory - Caf...|         4|
|The Factory - Caf...|         4|
|The Factory - Caf...|         2|
|The Factory - Caf...|         4|
|The Factory - Caf...|         4|
|The Factory - Caf...|         4|
|The Factory - Caf...|         5|
|The Factory - Caf...|         3|
|The Factory - Caf...|         5|
+--------------------+----------+
only showing top 10 rows



# Which coffee shop had the fewest number of ratings?

In [4]:
coffee_ratings_df = df.groupby("coffee_shop_name").agg({"coffee_shop_name":"count"})\

coffee_ratings_df.orderBy("count(coffee_shop_name)").show(truncate=False)

+------------------------------------+-----------------------+
|coffee_shop_name                    |count(coffee_shop_name)|
+------------------------------------+-----------------------+
|Lola Savannah Coffee Downtown       |4                      |
|The Marvelous Vintage Tea Party Co. |10                     |
|Kowabunga Coffee                    |16                     |
|Sister Coffee                       |17                     |
|Lucky Lab Coffee                    |25                     |
|Flitch Coffee                       |28                     |
|Legend Coffee                       |28                     |
|Friends & Neighbors                 |29                     |
|Holy Grounds                        |30                     |
|My Sweet Austin                     |31                     |
|Tuscany At 360                      |33                     |
|Ma√±ana Coffee & Juice              |33                     |
|Brian's Brew                        |45               