-
Notifications
You must be signed in to change notification settings - Fork 7
/
TextFileStreamer.java
139 lines (127 loc) · 4.84 KB
/
TextFileStreamer.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
package edu.usfca.cs272;
import static java.nio.charset.StandardCharsets.UTF_8;
import java.io.BufferedReader;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.TreeSet;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import java.util.stream.Stream;
/**
* Demonstrates how to implement a text parser using streams.
*
* @author CS 272 Software Development (University of San Francisco)
* @version Fall 2022
*/
public class TextFileStreamer {
/*
* An initial approach trying to implement text parsing using streams instead.
* However, when we use this to add to a collection defined outside our
* stream, we are creating code with side-effects!
*/
/**
* Streams through a text file at the specified path, using the tokenize
* function to split lines into tokens, applies the clean method to each
* token, and then uses the consumer to collect the results.
*
* @param path the path to read
* @param clean the function to apply to each token
* @param tokenize the function to split lines into tokens
* @param consumer the consumer to collect the results
* @throws IOException if an I/O error occurs
*/
public static void consumeTextFile(Path path, Function<String, String> clean,
Function<String, String[]> tokenize, Consumer<String> consumer) throws IOException {
try (
BufferedReader reader = Files.newBufferedReader(path, UTF_8);
Stream<String> lines = reader.lines();
) {
lines.map(clean) // string --> string
.flatMap(line -> Stream.of(tokenize.apply(line))) // string --> array of strings
.forEach(consumer);
}
}
/**
* Splits each line at the specified path by whitespace, applies the provided
* clean method to each token, and returns the result as a list.
*
* @param path the path to read
* @param clean the function to apply to each token
* @return list of split and cleaned words
* @throws IOException if an I/O error occurs
*/
public static List<String> consumeTextAsList(Path path,
Function<String, String> clean) throws IOException {
List<String> words = new ArrayList<String>();
consumeTextFile(path, clean, s -> s.split("\\s+"), words::add);
return words;
}
/*
* A better approach that has no dangerous side-effects.
*/
/**
* Streams through a text file at the specified path, using the tokenize
* function to split lines into tokens, applies the clean method to each
* token, and then uses the consumer to collect the results.
*
* @param <C> the type of {@link Collection} to return
* @param path the path to read
* @param clean the function to apply to each token
* @param tokenize the function to split lines into tokens
* @param collector the function to collect the results
* @return the collection of results
* @throws IOException if an I/O error occurs
*/
public static <C extends Collection<String>> C collectTextFile(Path path,
Function<String, String> clean, Function<String, String[]> tokenize,
Supplier<C> collector) throws IOException {
try (
BufferedReader reader = Files.newBufferedReader(path, UTF_8);
Stream<String> lines = reader.lines();
) {
return lines.map(clean)
.flatMap(line -> Stream.of(tokenize.apply(line)))
.collect(Collectors.toCollection(collector));
}
}
/**
* Splits each line at the specified path by whitespace, applies the provided
* clean method to each token, and returns the result as a list.
*
* @param path the path to read
* @param clean the function to apply to each token
* @return list of split and cleaned words
* @throws IOException if an I/O error occurs
*/
public static List<String> collectTextAsList(Path path,
Function<String, String> clean) throws IOException {
return collectTextFile(path, clean, s -> s.split("\\s+"), ArrayList::new);
}
/**
* Demonstrates this class.
*
* @param args unused
* @throws IOException if an I/O error occurs
*/
public static void main(String[] args) throws IOException {
Path sally = Path.of("src", "main", "resources", "sally.txt");
Function<String, String> clean = s -> s.toLowerCase().replaceAll("[^A-z\\s]+", " ");
System.out.println(consumeTextAsList(sally, clean));
System.out.println(collectTextAsList(sally, clean));
Function<String, String[]> tokenize = (String s) -> s.split("\\s+");
TreeSet<String> set = collectTextFile(sally, clean, tokenize, TreeSet<String>::new);
System.out.println(set);
}
/*
* Have we gone too far? This does demonstrate how we could use streams and
* lambdas to continue and generalize our approach to text parsing, but
* honestly the core operations in streamTextFile are compact and simple
* enough that you really could just recreate and customize it as necessary.
*/
}