In [1]:

// $example on$
import org.apache.spark.graphx.{GraphLoader, PartitionStrategy}
// $example off$
import org.apache.spark.sql.SparkSession

/**
 * A vertex is part of a triangle when it has two adjacent vertices with an edge between them.
 * GraphX implements a triangle counting algorithm in the [`TriangleCount` object][TriangleCount]
 * that determines the number of triangles passing through each vertex,
 * providing a measure of clustering.
 * We compute the triangle count of the social network dataset.
 *
 * Note that `TriangleCount` requires the edges to be in canonical orientation (`srcId < dstId`)
 * and the graph to be partitioned using [`Graph.partitionBy`][Graph.partitionBy].
 *
 * Run with
 * {{{
 * bin/run-example graphx.TriangleCountingExample
 * }}}
 */
import org.apache.spark.graphx.{Graph, VertexRDD}
import org.apache.spark.graphx.util.GraphGenerators
// $example off$
import org.apache.spark.sql.SparkSession

import org.apache.spark._
import org.apache.spark.graphx._
// To make some of the examples work we will also need RDD
import org.apache.spark.rdd.RDD


// Assume the SparkContext has already been constructed
// initialisation du contexte spark
val sparkConf = new SparkConf().setMaster("local").setAppName("pagerank")
val sc = new SparkContext(sparkConf)

    // $example on$
    // Load the edges in canonical order and partition the graph for triangle count
    val graph = GraphLoader.edgeListFile(sc, "data/followers.txt", true)
      .partitionBy(PartitionStrategy.RandomVertexCut)
    // Find the triangle count for each vertex
    val triCounts = graph.triangleCount().vertices
    // Join the triangle counts with the usernames
    val users = sc.textFile("data/users.txt").map { line =>
      val fields = line.split(",")
      (fields(0).toLong, fields(1))
    }
    val triCountByUsername = users.join(triCounts).map { case (id, (username, tc)) =>
      (username, tc)
    }
    // Print the result
    println(triCountByUsername.collect().mkString("\n"))
    // $example off$
    sc.stop()

// scalastyle:on println


(justinbieber,0)
(BarackObama,0)
(matei_zaharia,1)
(jeresig,1)
(odersky,1)
(ladygaga,0)


sparkConf = org.apache.spark.SparkConf@1bc8e0df
sc = org.apache.spark.SparkContext@4be49570
graph = org.apache.spark.graphx.impl.GraphImpl@1e9dca9d
triCounts = VertexRDDImpl[69] at RDD at VertexRDD.scala:57
users = MapPartitionsRDD[74] at map at <console>:70


triCountByUsername...


MapPartitionsRDD[74] at map at <console>:70