In [None]:
pip install graphframes



In [None]:
from pyspark.sql import SparkSession, functions as F
from graphframes import GraphFrame

spark = SparkSession.builder \
    .appName("PageRank with GraphFrames") \
    .config("spark.jars.packages", "graphframes:graphframes:0.8.4-spark3.5-s_2.12") \
    .getOrCreate()

vertices = spark.createDataFrame([(0,), (1,), (2,)], ["id"])
edges = spark.createDataFrame([
    (0, 1),
    (0, 2),
    (1, 2),
    (2, 0)
], ["src", "dst"])

gf = GraphFrame(vertices, edges)

damping_factor = 0.85
reset_prob = 1.0 - damping_factor
tolerance = 1.0e-6

pagerank_results = gf.pageRank(resetProbability=reset_prob, tol=tolerance)

pagerank_df = pagerank_results.vertices

sum_rank = pagerank_df.agg(F.sum("pagerank").alias("total")).collect()[0]["total"]

pagerank_df = pagerank_df.withColumn("pagerank", F.col("pagerank") / F.lit(sum_rank))

pagerank_df.show()




+---+-------------------+
| id|           pagerank|
+---+-------------------+
|  0|0.38778955828885675|
|  1|0.21481090558570037|
|  2| 0.3973995361254429|
+---+-------------------+

