In [1]:
// $example on$
import org.apache.spark.graphx.GraphLoader


/**
 * A connected components algorithm example.
 * The connected components algorithm labels each connected component of the graph
 * with the ID of its lowest-numbered vertex.
 * For example, in a social network, connected components can approximate clusters.
 * GraphX contains an implementation of the algorithm in the
 * [`ConnectedComponents` object][ConnectedComponents],
 * and we compute the connected components of the example social network dataset.
 *
 * Run with
 * {{{
 * bin/run-example graphx.ConnectedComponentsExample
 * }}}
 */

import org.apache.spark.graphx.{Graph, VertexRDD}
import org.apache.spark.graphx.util.GraphGenerators
// $example off$
import org.apache.spark.sql.SparkSession

import org.apache.spark._
import org.apache.spark.graphx._
// To make some of the examples work we will also need RDD
import org.apache.spark.rdd.RDD

// Assume the SparkContext has already been constructed
// initialisation du contexte spark
val sparkConf = new SparkConf().setMaster("local").setAppName("pagerank")
val sc = new SparkContext(sparkConf)



    // $example on$
    // Load the graph as in the PageRank example
    val graph = GraphLoader.edgeListFile(sc, "data/followers.txt")
    // Find the connected components
    val cc = graph.connectedComponents().vertices
    // Join the connected components with the usernames
    val users = sc.textFile("data/users.txt").map { line =>
      val fields = line.split(",")
      (fields(0).toLong, fields(1))
    }
    val ccByUsername = users.join(cc).map {
      case (id, (username, cc)) => (username, cc)
    }
    // Print the result
    println(ccByUsername.collect().mkString("\n"))
    // $example off$
    sc.stop()
 

(justinbieber,1)
(BarackObama,1)
(matei_zaharia,3)
(jeresig,3)
(odersky,3)
(ladygaga,1)


sparkConf = org.apache.spark.SparkConf@27922c96
sc = org.apache.spark.SparkContext@274b39fc
graph = org.apache.spark.graphx.impl.GraphImpl@7b416fd5
cc = VertexRDDImpl[33] at RDD at VertexRDD.scala:57
users = MapPartitionsRDD[51] at map at <console>:71


ccByUsername: org.apache.spark.rdd.RDD[(String, org.apache...


MapPartitionsRDD[51] at map at <console>:71