<a href="https://colab.research.google.com/github/vamshap/PySpark-Challenges/blob/main/NumberofVisitsandResources.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import count, col, collect_list, concat_ws
# Create Spark session
spark = SparkSession.builder.appName("EntriesTable").getOrCreate()

# Identify the Total NumberofVisits By a Person,MostVisited Floor By a Person , Resources Used by a Person

# Define schema and data for the Entries table
data = [
    ('A', 'Bangalore', 'A@gmail.com', 1, 'CPU'),
    ('A', 'Bangalore', 'A1@gmail.com', 1, 'CPU'),
    ('A', 'Bangalore', 'A2@gmail.com', 2, 'DESKTOP'),
    ('B', 'Bangalore', 'B@gmail.com', 2, 'DESKTOP'),
    ('B', 'Bangalore', 'B1@gmail.com', 2, 'DESKTOP'),
    ('B', 'Bangalore', 'B2@gmail.com', 1, 'MONITOR')
]

columns = ["name", "address", "email", "floor", "resources"]

# Create DataFrame
entries_df = spark.createDataFrame(data, columns)

# Show the DataFrame
entries_df.show()

name_and_floor_df = entries_df.select("name", "floor").distinct()

# Step 2: Number of Visits
number_of_visits_df = entries_df.groupBy("name").agg(count("*").alias("NoofVisits"))

# Step 3: Most Visited Floor
most_visited_floor_df = (
    entries_df.groupBy("name", "floor")
    .agg(count("floor").alias("MostVisitedFloor"))
)

# Step 4: Machines
machines_df = (
    entries_df.groupBy("name")
    .agg(concat_ws(",", collect_list("resources")).alias("Resources"))
)

# Step 5: Final Join
result_df = (
    name_and_floor_df
    .join(number_of_visits_df, "name", "left")
    .join(most_visited_floor_df, ["name", "floor"], "left")
    .join(machines_df, "name", "left")
)

# Display the final result
result_df.show()



+----+---------+------------+-----+---------+
|name|  address|       email|floor|resources|
+----+---------+------------+-----+---------+
|   A|Bangalore| A@gmail.com|    1|      CPU|
|   A|Bangalore|A1@gmail.com|    1|      CPU|
|   A|Bangalore|A2@gmail.com|    2|  DESKTOP|
|   B|Bangalore| B@gmail.com|    2|  DESKTOP|
|   B|Bangalore|B1@gmail.com|    2|  DESKTOP|
|   B|Bangalore|B2@gmail.com|    1|  MONITOR|
+----+---------+------------+-----+---------+

+----+-----+----------+----------------+--------------------+
|name|floor|NoofVisits|MostVisitedFloor|           Resources|
+----+-----+----------+----------------+--------------------+
|   B|    1|         3|               1|DESKTOP,DESKTOP,M...|
|   B|    2|         3|               2|DESKTOP,DESKTOP,M...|
|   A|    2|         3|               1|     CPU,CPU,DESKTOP|
|   A|    1|         3|               2|     CPU,CPU,DESKTOP|
+----+-----+----------+----------------+--------------------+

