## Example de Transformer

In [2]:
import org.apache.spark.ml.feature.Binarizer

val arrival_data = spark.createDataFrame(Seq(("SFO", "B737", 18, 95.1, "late"), ("SEA", "A319", 5, 65.7, "ontime"),
("LAX", "B747", 15, 31.5, "late"), ("ATL", "A319", 14, 40.5, "late") )) .toDF("origin", "model", "hour", "temperature", "arrival")

// seuillage de temperature et création d'une colonne de sortie
//définition de la transformation
val binarizer = new Binarizer().setInputCol("temperature")
                               .setOutputCol("freezing")
                               .setThreshold(35.6)

binarizer.transform(arrival_data).show

// show the current values of the parameters in binarizer transformer
binarizer.explainParams

//inputCol: input column name (current: temperature)
//outputCol: output column name (default: binarizer_60430bb4e97f__output, current: freezing)
//threshold: threshold used to binarize continuous features (default: 0.0, current: 35.6)

// show the transformation result
// avec selection des colonnes temperature et freezing
binarizer.transform(arrival_data).select("temperature", "freezing").show

+------+-----+----+-----------+-------+--------+
|origin|model|hour|temperature|arrival|freezing|
+------+-----+----+-----------+-------+--------+
|   SFO| B737|  18|       95.1|   late|     1.0|
|   SEA| A319|   5|       65.7| ontime|     1.0|
|   LAX| B747|  15|       31.5|   late|     0.0|
|   ATL| A319|  14|       40.5|   late|     1.0|
+------+-----+----+-----------+-------+--------+

+-----------+--------+
|temperature|freezing|
+-----------+--------+
|       95.1|     1.0|
|       65.7|     1.0|
|       31.5|     0.0|
|       40.5|     1.0|
+-----------+--------+



arrival_data = [origin: string, model: string ... 3 more fields]
binarizer = binarizer_b49743b287ab


binarizer_b49743b287ab

## seuillage à plusieurs niveaux

In [3]:
import org.apache.spark.ml.feature.Bucketizer

val bucketBorders = Array(-1.0, 32.0, 70.0, 150.0)

val bucketer = new Bucketizer().setSplits(bucketBorders)
                               .setInputCol("temperature")
                               .setOutputCol("intensity")
val output = bucketer.transform(arrival_data)
output.select("temperature", "intensity")
          .orderBy("temperature")
          .show

[Stage 0:>                                                          (0 + 4) / 4]+-----------+---------+
|temperature|intensity|
+-----------+---------+
|       31.5|      0.0|
|       40.5|      1.0|
|       65.7|      1.0|
|       95.1|      2.0|
+-----------+---------+



bucketBorders = Array(-1.0, 32.0, 70.0, 150.0)
bucketer = bucketizer_114084ae720c
output = [origin: string, model: string ... 4 more fields]


[origin: string, model: string ... 4 more fields]

In [4]:
import org.apache.spark.ml.feature.OneHotEncoder
val student_major_data = spark.createDataFrame(Seq(("John", "Math", 3),
                                              ("Mary", "Engineering", 2),
                                              ("Jeff", "Philosophy", 7),
                                              ("Jane", "Math", 3),
                                              ("Lyna", "Nursing", 4) ))
                                              .toDF("user", "major",
                                              "majorIdx")
// majorVect est un sparse vector
// la valeur max n'a pas de colonne
// majorIdx varie de 0 à 7 (sauf si préciser)
// valeur de 0 à 6 : 7 colonnes valeur 7 : pas de colonne
val oneHotEncoder = new OneHotEncoder().setInputCol("majorIdx")
                                       .setOutputCol("majorVect")
oneHotEncoder.transform(student_major_data).show()

+----+-----------+--------+-------------+
|user|      major|majorIdx|    majorVect|
+----+-----------+--------+-------------+
|John|       Math|       3|(7,[3],[1.0])|
|Mary|Engineering|       2|(7,[2],[1.0])|
|Jeff| Philosophy|       7|    (7,[],[])|
|Jane|       Math|       3|(7,[3],[1.0])|
|Lyna|    Nursing|       4|(7,[4],[1.0])|
+----+-----------+--------+-------------+



student_major_data = [user: string, major: string ... 1 more field]
oneHotEncoder = oneHot_f8da1cbec062


oneHot_f8da1cbec062

## Example tokenizer

In [1]:
import org.apache.spark.ml.feature.Tokenizer
import org.apache.spark.sql.functions._
val text_data = spark.createDataFrame(Seq(
                                     (1, "Spark spark is a unified data analytics engine"),
                                     (2, "It is fun to work with Spark"),
                                     (3, "There is a lot of exciting sessions at upcoming Spark summit"),
                                     (4, "mllib transformer estimator evaluator and pipelines")  )
                         ).toDF("id", "line")
val tokenizer = new Tokenizer().setInputCol("line").setOutputCol("words")
val tokenized = tokenizer.transform(text_data)
tokenized.select("words").withColumn("tokens", size(col("words"))).
show(false)   

+------------------------------------------------------------------------+------+
|words                                                                   |tokens|
+------------------------------------------------------------------------+------+
|[spark, spark, is, a, unified, data, analytics, engine]                 |8     |
|[it, is, fun, to, work, with, spark]                                    |7     |
|[there, is, a, lot, of, exciting, sessions, at, upcoming, spark, summit]|11    |
|[mllib, transformer, estimator, evaluator, and, pipelines]              |6     |
+------------------------------------------------------------------------+------+



text_data = [id: int, line: string]
tokenizer = tok_12313160b45b
tokenized = [id: int, line: string ... 1 more field]


[id: int, line: string ... 1 more field]

## Suppression de mots

In [2]:
import org.apache.spark.ml.feature.StopWordsRemover
val enStopWords = StopWordsRemover.loadDefaultStopWords("english")
// enStopWords contients les mots à supprimer
// words colonne qui contient les mots à filtrer
// filtered colonne qui contient le résultat
val remover = new StopWordsRemover().setStopWords(enStopWords)
                                    .setInputCol("words")
                                    .setOutputCol("filtered")
// use the tokenized from Listing 8-5 example
val cleanedTokens = remover.transform(tokenized)
cleanedTokens.select("words","filtered").show(false)

+------------------------------------------------------------------------+-----------------------------------------------------+
|words                                                                   |filtered                                             |
+------------------------------------------------------------------------+-----------------------------------------------------+
|[spark, spark, is, a, unified, data, analytics, engine]                 |[spark, spark, unified, data, analytics, engine]     |
|[it, is, fun, to, work, with, spark]                                    |[fun, work, spark]                                   |
|[there, is, a, lot, of, exciting, sessions, at, upcoming, spark, summit]|[lot, exciting, sessions, upcoming, spark, summit]   |
|[mllib, transformer, estimator, evaluator, and, pipelines]              |[mllib, transformer, estimator, evaluator, pipelines]|
+------------------------------------------------------------------------+-----------------------

enStopWords = Array(i, me, my, myself, we, our, ours, ourselves, you, your, yours, yourself, yourselves, he, him, his, himself, she, her, hers, herself, it, its, itself, they, them, their, theirs, themselves, what, which, who, whom, this, that, these, those, am, is, are, was, were, be, been, being, have, has, had, having, do, does, did, doing, a, an, the, and, but, if, or, because, as, until, while, of, at, by, for, with, about, against, between, into, through, during, before, after, above, below, to, from, up, down, in, out, on, off, over, under, again, further, then, once, here, there, when, where, why, how, all, any, both, each, few, more, most, other, some, such, no, nor, not, only, own, same, so, than, too, very, s,...


Array(i, me, my, myself, we, our, ours, ourselves, you, your, yours, yourself, yourselves, he, him, his, himself, she, her, hers, herself, it, its, itself, they, them, their, theirs, themselves, what, which, who, whom, this, that, these, those, am, is, are, was, were, be, been, being, have, has, had, having, do, does, did, doing, a, an, the, and, but, if, or, because, as, until, while, of, at, by, for, with, about, against, between, into, through, during, before, after, above, below, to, from, up, down, in, out, on, off, over, under, again, further, then, once, here, there, when, where, why, how, all, any, both, each, few, more, most, other, some, such, no, nor, not, only, own, same, so, than, too, very, s,...

## Hashing

a chaque mot est associé un nombre
on compte l'occurence de chaque mot (dans la ligne)

In [4]:
import org.apache.spark.ml.feature.HashingTF
val tf = new HashingTF().setInputCol("filtered")
                        .setOutputCol("TFOut")
                        .setNumFeatures(4096)
val tfResult = tf.transform(cleanedTokens)
tfResult.select("filtered", "TFOut").show(false)

+-----------------------------------------------------+---------------------------------------------------------------+
|filtered                                             |TFOut                                                          |
+-----------------------------------------------------+---------------------------------------------------------------+
|[spark, spark, unified, data, analytics, engine]     |(4096,[991,1185,1461,3377,3717],[1.0,2.0,1.0,1.0,1.0])         |
|[fun, work, spark]                                   |(4096,[251,1185,1575],[1.0,1.0,1.0])                           |
|[lot, exciting, sessions, upcoming, spark, summit]   |(4096,[724,1185,1255,1962,2966,3023],[1.0,1.0,1.0,1.0,1.0,1.0])|
|[mllib, transformer, estimator, evaluator, pipelines]|(4096,[994,2132,2697,3522,3894],[1.0,1.0,1.0,1.0,1.0])         |
+-----------------------------------------------------+---------------------------------------------------------------+



tf = hashingTF_3cd9525647dc
tfResult = [id: int, line: string ... 3 more fields]


[id: int, line: string ... 3 more fields]

## Example où les caractéristiques sont sauvés sous la forme de vector avec des doubles

In [5]:
import org.apache.spark.ml.feature.VectorAssembler
val arrival_features  = spark.createDataFrame(Seq(
                                               (18, 95.1, true),
                                               (5, 65.7, true), (15, 31.5,
                                               false),
                                               (14, 40.5, false) ))
                                            .toDF("hour", "temperature",
                                            "on_time")
val assembler = new VectorAssembler().setInputCols(Array("hour",
"temperature", "on_time"))
                                     .setOutputCol("features")
val output = assembler.transform(arrival_features)
output.show

+----+-----------+-------+---------------+
|hour|temperature|on_time|       features|
+----+-----------+-------+---------------+
|  18|       95.1|   true|[18.0,95.1,1.0]|
|   5|       65.7|   true| [5.0,65.7,1.0]|
|  15|       31.5|  false|[15.0,31.5,0.0]|
|  14|       40.5|  false|[14.0,40.5,0.0]|
+----+-----------+-------+---------------+



arrival_features = [hour: int, temperature: double ... 1 more field]
assembler = vecAssembler_4845635224ad
output = [hour: int, temperature: double ... 2 more fields]


[hour: int, temperature: double ... 2 more fields]

## Example RFormula

In [11]:
import org.apache.spark.ml.feature.RFormula
val arrival_data = spark.createDataFrame(Seq(("SFO", "B737", 18, 95.1, "late"),
("SEA", "A319", 5, 65.7, "ontime"), ("LAX", "B747", 15, 31.5, "late"), ("ATL", "A319", 14, 40.5, "late") )) .toDF("origin", "model", "hour", "temperature", "arrival")
// arrival est le label
// on prend les caractéristiques on ajoute hour*temperature
// par la création du vector feature
// les doubles restent doublent
// les valeurs discretees sont hotencoder
// la colonne arrival est 
val formula = new RFormula().setFormula("arrival ~ . + hour:temperature")
                            .setFeaturesCol("features")
                            .setLabelCol("label")
// call fit function first, which returns a model (type of transformer), then call transform
// attention au mélange vector dense et vector sparse
val output = formula.fit(arrival_data).transform(arrival_data)
output.select("*").show(false)

+------+-----+----+-----------+-------+-------------------------------------+-----+
|origin|model|hour|temperature|arrival|features                             |label|
+------+-----+----+-----------+-------+-------------------------------------+-----+
|SFO   |B737 |18  |95.1       |late   |(8,[0,5,6,7],[1.0,18.0,95.1,1711.8]) |0.0  |
|SEA   |A319 |5   |65.7       |ontime |[0.0,0.0,1.0,1.0,0.0,5.0,65.7,328.5] |1.0  |
|LAX   |B747 |15  |31.5       |late   |(8,[4,5,6,7],[1.0,15.0,31.5,472.5])  |0.0  |
|ATL   |A319 |14  |40.5       |late   |[0.0,1.0,0.0,1.0,0.0,14.0,40.5,567.0]|0.0  |
+------+-----+----+-----------+-------+-------------------------------------+-----+



arrival_data = [origin: string, model: string ... 3 more fields]
formula = RFormula(arrival ~ . + hour:temperature) (uid=rFormula_8349a1b200bd)
output = [origin: string, model: string ... 5 more fields]


[origin: string, model: string ... 5 more fields]

## Example TFDIDF

In [16]:
import org.apache.spark.ml.feature.Tokenizer
import org.apache.spark.ml.feature.HashingTF
import org.apache.spark.ml.feature.IDF
// texte 4 lignes
val text_data = spark.createDataFrame(Seq(
                                     (1, "Spark is a unified data analytics engine"),
                                     (2, "Spark is cool and it is fun to work with Spark"),
                                     (3, "There is a lot of exciting sessions at upcoming Spark summit"),
                                     (4, "mllib transformer estimator evaluator and pipelines")  ))
                                   .toDF("id", "line")
// tokenize de chaque line
val tokenizer = new Tokenizer().setInputCol("line")
                               .setOutputCol("words")
// the output column of the Tokenizer transformer is the input to HashingTF
// hash
val tf = new HashingTF().setInputCol("words")
                        .setOutputCol("wordFreqVect")
                        .setNumFeatures(4096)
val tfResult = tf.transform(tokenizer.transform(text_data))
// the output of the HashingTF transformer is the input to IDF estimator
val idf = new IDF().setInputCol("wordFreqVect")
                   .setOutputCol("features")
// since IDF is an estimator, call the fit function
val idfModel = idf.fit(tfResult)
// the returned object is a Model, which is of type Transformer
val weightedWords = idfModel.transform(tfResult)

weightedWords.select("words", "features").show(false)

weightedWords.printSchema
// attention résultat en mode vector sparse
// les mots sont "des doubles"
// the feature column contains a vector for the weight of each word, since it is long, the output is not included //below
weightedWords.select("wordFreqVect", "features").show(false)


+------------------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|words                                                                   |features                                                                                                                                                                                                                                                                         |
+------------------------------------------------------------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

text_data = [id: int, line: string]
tokenizer = tok_2135c5547bc0
tf = hashingTF_0b7bcd542cc2
tfResult = [id: int, line: string ... 2 more fields]
idf = idf_cc6983da6d17
idfModel = idf_cc6983da6d17
weightedWords = [id: int, line: string ... 3 more fields]


lastException: Throwable = null


[id: int, line: string ... 3 more fields]

## Example StringIndexer

In [2]:
import org.apache.spark.ml.feature.StringIndexer
val movie_data = spark.createDataFrame(Seq(
                                              (1, "Comedy"),
                                              (2, "Action"),
                                              (3, "Comedy"),
                                              (4, "Horror"),
                                              (5, "Action"),
                                              (6, "Comedy")  )
                                     ).toDF("id", "genre")
val movieIndexer = new StringIndexer().setInputCol("genre")
                                      .setOutputCol("genreIdx")
// first fit the data
val movieIndexModel = movieIndexer.fit(movie_data)
// use returned transformer to transform the data
val indexedMovie = movieIndexModel.transform(movie_data)
indexedMovie.orderBy("genreIdx").show()

+---+------+--------+
| id| genre|genreIdx|
+---+------+--------+
|  1|Comedy|     0.0|
|  6|Comedy|     0.0|
|  3|Comedy|     0.0|
|  5|Action|     1.0|
|  2|Action|     1.0|
|  4|Horror|     2.0|
+---+------+--------+



movie_data = [id: int, genre: string]
movieIndexer = strIdx_d3aafb9a72fc
movieIndexModel = strIdx_d3aafb9a72fc
indexedMovie = [id: int, genre: string ... 1 more field]


[id: int, genre: string ... 1 more field]

## Example oneHotEncoder (définition - model - application model)

In [3]:
import org.apache.spark.ml.feature.OneHotEncoderEstimator
// the input column genreIdx is the output column of StringIndex in listing 8-9
val oneHotEncoderEst = new OneHotEncoderEstimator().setInputCols(Array("genreIdx"))
                                   .setOutputCols(Array("genreIdxVector"))
// fit the indexedMovie data produced in listing 8-10
val oneHotEncoderModel = oneHotEncoderEst.fit(indexedMovie)
val oneHotEncoderVect = oneHotEncoderModel.transform(indexedMovie)
oneHotEncoderVect .orderBy("genre").show()


+---+------+--------+--------------+
| id| genre|genreIdx|genreIdxVector|
+---+------+--------+--------------+
|  2|Action|     1.0| (2,[1],[1.0])|
|  5|Action|     1.0| (2,[1],[1.0])|
|  3|Comedy|     0.0| (2,[0],[1.0])|
|  6|Comedy|     0.0| (2,[0],[1.0])|
|  1|Comedy|     0.0| (2,[0],[1.0])|
|  4|Horror|     2.0|     (2,[],[])|
+---+------+--------+--------------+



oneHotEncoderEst = oneHotEncoder_a67e6d8e8674
oneHotEncoderModel = oneHotEncoder_a67e6d8e8674
oneHotEncoderVect = [id: int, genre: string ... 2 more fields]


[id: int, genre: string ... 2 more fields]

## Example word2Vec

In [1]:
import org.apache.spark.ml.feature.Word2Vec
val documentDF = spark.createDataFrame(Seq(
                                   "Unified data analytics engine Spark".
                                   split(" "),
                                   "People use Hive for data analytics".
                                   split(" "),"MapReduce is not fading away".split(" ") ).map(Tuple1.apply)).toDF("word")
// initialisation
val word2Vec = new Word2Vec().setInputCol("word")
                             .setOutputCol("feature") .setVectorSize(3)
                             .setMinCount(0)
// recherche model
val model = word2Vec.fit(documentDF)
// application model
val result = model.transform(documentDF)
result.show(false)

+-----------------------------------------+-------------------------------------------------------------------+
|word                                     |feature                                                            |
+-----------------------------------------+-------------------------------------------------------------------+
|[Unified, data, analytics, engine, Spark]|[-0.04857720620930195,-0.039790508151054386,-0.0047628857195377355]|
|[People, use, Hive, for, data, analytics]|[-0.019269779634972412,-0.0019863341003656387,0.04896292210711787] |
|[MapReduce, is, not, fading, away]       |[0.09048619866371155,0.02390633299946785,0.004982998222112656]     |
+-----------------------------------------+-------------------------------------------------------------------+



documentDF = [word: array<string>]
word2Vec = w2v_7365afe6ef06
model = w2v_7365afe6ef06
result = [word: array<string>, feature: vector]


[word: array<string>, feature: vector]

In [2]:
// find similar words to Spark, the result shows both Hive and MapReduce are similar.
model.findSynonyms("Spark", 3).show


+---------+------------------+
|     word|        similarity|
+---------+------------------+
|   engine|0.9133241772651672|
|MapReduce|0.7623026967048645|
|     Hive|0.7179173827171326|
+---------+------------------+



In [3]:
// find similar words to Hive, the result shows Spark is similar
model.findSynonyms("Hive", 3).show

+------+-------------------+
|  word|         similarity|
+------+-------------------+
| Spark| 0.7179174423217773|
|fading| 0.5859972238540649|
|engine|0.43200281262397766|
+------+-------------------+



In [5]:
import org.apache.spark.ml.feature.MinMaxScaler
import org.apache.spark.ml.linalg.Vectors
val employee_data = spark.createDataFrame(Seq(
                                    (1, Vectors.dense(125400, 5.3)),
                                    (2, Vectors.dense(179100, 6.9)),
                                    (3, Vectors.dense(154770, 5.2)),
                                    (4, Vectors.dense(199650, 4.11))))
                                  .toDF("empId", "features")
val minMaxScaler = new MinMaxScaler().setMin(0.0)
                                     .setMax(5.0)
                                     .setInputCol("features")
                                     .setOutputCol("scaledFeatures")
val scalerModel = minMaxScaler.fit(employee_data)
val scaledData = scalerModel.transform(employee_data)
println(s"Features scaled to range: [${minMaxScaler.getMin},${minMaxScaler.getMax}]")


Features scaled to range: [0.0,5.0]


employee_data = [empId: int, features: vector]
minMaxScaler = minMaxScal_c9c0173078dc
scalerModel = minMaxScal_c9c0173078dc
scaledData = [empId: int, features: vector ... 1 more field]


[empId: int, features: vector ... 1 more field]

In [6]:
scaledData.select("features", "scaledFeatures").show(false)


+---------------+---------------------------------------+
|features       |scaledFeatures                         |
+---------------+---------------------------------------+
|[125400.0,5.3] |[0.0,2.1326164874551963]               |
|[179100.0,6.9] |[3.616161616161616,5.0]                |
|[154770.0,5.2] |[1.9777777777777779,1.9534050179211468]|
|[199650.0,4.11]|[5.0,0.0]                              |
+---------------+---------------------------------------+



In [7]:
import org.apache.spark.ml.feature.StandardScaler
import org.apache.spark.ml.linalg.Vectors
val employee_data = spark.createDataFrame(Seq(
                                         (1, Vectors.dense(125400, 5.3)),
                                         (2, Vectors.dense(179100, 6.9)),
                                         (3, Vectors.dense(154770, 5.2)),
                                         (4, Vectors.dense(199650, 4.11))))
                                    .toDF("empId", "features")
// set the unit standard deviation to true and center around the mean
val standardScaler = new StandardScaler().setWithStd(true)
                                         .setWithMean(true)                                         .setInputCol("features")
                                         .setOutputCol("scaledFeatures")
val standardMode = standardScaler.fit(employee_data)
val standardData = standardMode.transform(employee_data)
standardData.show(false)


+-----+---------------+------------------------------------------+
|empId|features       |scaledFeatures                            |
+-----+---------------+------------------------------------------+
|1    |[125400.0,5.3] |[-1.2290717420781212,-0.06743742573177663]|
|2    |[179100.0,6.9] |[0.4490658767775897,1.3248191055048923]   |
|3    |[154770.0,5.2] |[-0.3112523404805006,-0.15445345893406812]|
|4    |[199650.0,4.11]|[1.091258205781032,-1.1029282208390485]   |
+-----+---------------+------------------------------------------+



employee_data = [empId: int, features: vector]
standardScaler = stdScal_4b7eb83adfc1
standardMode = stdScal_4b7eb83adfc1
standardData = [empId: int, features: vector ... 1 more field]


[empId: int, features: vector ... 1 more field]

In [1]:
import org.apache.spark.{SparkContext, SparkConf}
import org.apache.spark.ml.{Pipeline, PipelineModel}
import org.apache.spark.ml.classification.LogisticRegression
import org.apache.spark.ml.feature.{HashingTF, Tokenizer}

val text_data = spark.createDataFrame(Seq(
                                  (1, "Spark is a unified data analytics engine", 0.0),
                                  (2, "Spark is cool and it is fun to work with Spark", 0.0),
                                  (3, "There is a lot of exciting sessions at upcoming Spark summit", 0.0),
                                  (4, "signup to win a million dollars", 0.0)  )
                                ).toDF("id", "line", "label")
val tokenizer = new Tokenizer().setInputCol("line").setOutputCol("words")
val hashingTF = new HashingTF().setInputCol(tokenizer.getOutputCol)
                               .setOutputCol("features")
                               .setNumFeatures(4096)
val logisticReg = new LogisticRegression().setMaxIter(5).setRegParam(0.01)

val pipeline = new Pipeline().setStages(Array(tokenizer, hashingTF, logisticReg))

val logisticRegModel = pipeline.fit(text_data)

// persist model and pipeline
logisticRegModel.write.overwrite().save("tmp/spark-logistic-regression-model")
pipeline.write.overwrite().save("tmp/logistic-regression-pipeline")
//logisticRegModel.save("/Users/lecornu/tmp")
// load model and pipeline
val prevModel = PipelineModel.load("tmp/spark-logistic-regression-model")
val prevPipeline = Pipeline.load("tmp/logistic-regression-pipeline")

text_data = [id: int, line: string ... 1 more field]
tokenizer = tok_ffd4311802bf
hashingTF = hashingTF_1d4db814d98c
logisticReg = logreg_5f2970095a9a
pipeline = pipeline_324d8d59bf80
logisticRegModel = pipeline_324d8d59bf80
prevModel = pipeline_324d8d59bf80
prevPipeline = pipeli...


pipeli...

In [2]:
sparkConf

Name: Unknown Error
Message: lastException: Throwable = null
<console>:26: error: not found: value sparkConf
       sparkConf
       ^

StackTrace: 

In [3]:
sc

org.apache.spark.SparkContext@61cb1242

In [13]:
sc.stop()

lastException: Throwable = null
