-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathApacheWordCount.java
62 lines (45 loc) Β· 1.53 KB
/
ApacheWordCount.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import java.io.FileInputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.util.*;
/*
* @author Thomas George Thomas
* Computing word count and execution time of Apache hadoop wikipedia text file.
*/
public class ApacheWordCount {
static long startTime = System.currentTimeMillis();
public static void main(String[] args) throws IOException {
// Create the FileInputStream and the scanner
FileInputStream fin = new FileInputStream("Input-Files/apache-hadoop-wiki.txt");
Scanner sc = new Scanner(fin);
// Create the linkedHashMap
LinkedHashMap<String, Integer> words = new LinkedHashMap<String, Integer>();
while (sc.hasNext()) {
String nextWord = sc.next();
// if the word already exists
if (words.containsKey(nextWord)) {
words.put(nextWord, words.get(nextWord) + 1);
} else {
words.put(nextWord, 1);
}
}
// close everything
fin.close();
sc.close();
// print values
try {
// Write the file
FileWriter fw = new FileWriter("Java-Word-Count-and-Analysis/WordCount_java/Result/Word-count-apache-hadoop-wiki.txt", false);
for (Map.Entry<String, Integer> entry : words.entrySet()) {
fw.write("(" + entry.getKey() + "," + entry.getValue() + ")");
fw.write(System.lineSeparator());
}
fw.close();
} catch (Exception e) {
System.out.println("Exception occurred: " + e.getMessage());
}
// Find the execution time
long endTime = System.currentTimeMillis();
System.out.println("Execution time: " + ((endTime - startTime) / 1000.0) + " seconds");
}
}