/
SpecialOutPutWordApp.scala
57 lines (51 loc) · 1.48 KB
/
SpecialOutPutWordApp.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
package com.yjf.learning.spark.core
import com.yjf.learning.common.format.RDDSpecialTextOutputFormat
import org.apache.spark.{SparkConf, SparkContext}
/**
* @author Created by Jeff Yang on 2019-09-26 15:05.
* Update date:
* Project: learningsparkgroup
* Package: com.yjf.learning.spark.core
* Describe : 基础学习
* Dependency :
* Frequency: Calculate once a day.
* Result of Test: test ok
* Command:
*
* Email: highfei2011@126.com
* Status:Using online
*
* Please note:
* Must be checked once every time you submit a configuration file is correct!
* Data is priceless! Accidentally deleted the consequences!
*
*/
object SpecialOutPutWordApp {
def main(args: Array[String]): Unit = {
val inputPath = "public/data/city/provice.txt"
val outputPath = s"public/output/spark/${System.currentTimeMillis()}"
// 1 、初始化 SparkContext
val spark = new SparkContext(
new SparkConf().setMaster("local[2]").setAppName("TestWC")
)
// 2、创建 RDD(读取数据源)
val wordRDD = spark.textFile(inputPath)
// 3、Transformation
val mapRDD = wordRDD
.filter(_.split(",").length >= 2)
.map(x => {
val arrays = x.split(",")
// (省,市)
(arrays(0), arrays(1))
})
// 4、Action
mapRDD.saveAsHadoopFile(
outputPath,
classOf[String],
classOf[String],
classOf[RDDSpecialTextOutputFormat]
)
// 5、停止 SparkContext
spark.stop()
}
}