/
TwitterTextConfiguration.java
283 lines (247 loc) · 8.76 KB
/
TwitterTextConfiguration.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
// Copyright 2018 Twitter, Inc.
// Licensed under the Apache License, Version 2.0
// http://www.apache.org/licenses/LICENSE-2.0
package com.twitter.twittertext;
import java.util.ArrayList;
import java.util.List;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
/**
* A class that represents the different configurations used by {@link TwitterTextParser}
* to parse a tweet.
*/
public final class TwitterTextConfiguration {
/**
* The following unicode code point blocks are defined:
* 0x0000 (0) - 0x10FF (4351) Basic Latin to Georgian block: Weight 100
* 0x2000 (8192) - 0x200D (8205) Spaces in the General Punctuation Block: Weight 100
* 0x2010 (8208) - 0x201F (8223) Hyphens & Quotes in the General Punctuation Block: Weight 100
* 0x2032 (8242) - 0x2037 (8247) Quotes in the General Punctuation Block: Weight 100
* supports counting emoji as one weighted character
*/
private static final int DEFAULT_VERSION = 3;
private static final int DEFAULT_WEIGHTED_LENGTH = 280;
private static final int DEFAULT_SCALE = 100;
private static final int DEFAULT_WEIGHT = 200;
private static final boolean DEFAULT_EMOJI_PARSING_ENABLED = true;
private static final int DEFAULT_TRANSFORMED_URL_LENGTH = 23;
private static final List<TwitterTextWeightedRange> DEFAULT_RANGES = new ArrayList<>();
static {
DEFAULT_RANGES.add(new TwitterTextWeightedRange().setStart(0).setEnd(4351).setWeight(100));
DEFAULT_RANGES.add(new TwitterTextWeightedRange().setStart(8192).setEnd(8205).setWeight(100));
DEFAULT_RANGES.add(new TwitterTextWeightedRange().setStart(8208).setEnd(8223).setWeight(100));
DEFAULT_RANGES.add(new TwitterTextWeightedRange().setStart(8242).setEnd(8247).setWeight(100));
}
private final int version;
private final int maxWeightedTweetLength;
private final int scale;
private final int defaultWeight;
private final boolean emojiParsingEnabled;
private final int transformedURLLength;
@Nonnull
private final List<TwitterTextWeightedRange> ranges;
public static TwitterTextConfiguration getDefaultConfig() {
return new TwitterTextConfiguration.Builder()
.setVersion(DEFAULT_VERSION)
.setMaxWeightedTweetLength(DEFAULT_WEIGHTED_LENGTH)
.setScale(DEFAULT_SCALE)
.setDefaultWeight(DEFAULT_WEIGHT)
.setEmojiParsingEnabled(DEFAULT_EMOJI_PARSING_ENABLED)
.setRanges(DEFAULT_RANGES)
.setTransformedURLLength(DEFAULT_TRANSFORMED_URL_LENGTH)
.build();
}
private TwitterTextConfiguration(@Nonnull Builder builder) {
version = builder.version;
maxWeightedTweetLength = builder.maxWeightedTweetLength;
scale = builder.scale;
defaultWeight = builder.defaultWeight;
emojiParsingEnabled = builder.emojiParsingEnabled;
transformedURLLength = builder.transformedURLLength;
ranges = builder.ranges;
}
public static final class Builder {
private int version;
private int maxWeightedTweetLength;
private int scale;
private int defaultWeight;
private boolean emojiParsingEnabled;
private int transformedURLLength;
@Nonnull
private List<TwitterTextWeightedRange> ranges = new ArrayList<>();
public Builder setVersion(int version) {
this.version = version;
return this;
}
public Builder setMaxWeightedTweetLength(int maxWeightedTweetLength) {
this.maxWeightedTweetLength = maxWeightedTweetLength;
return this;
}
public Builder setScale(int scale) {
this.scale = scale;
return this;
}
public Builder setDefaultWeight(int defaultWeight) {
this.defaultWeight = defaultWeight;
return this;
}
public Builder setEmojiParsingEnabled(boolean emojiParsingEnabled) {
this.emojiParsingEnabled = emojiParsingEnabled;
return this;
}
public Builder setTransformedURLLength(int urlLength) {
this.transformedURLLength = urlLength;
return this;
}
public Builder setRanges(@Nonnull List<TwitterTextWeightedRange> ranges) {
this.ranges = ranges;
return this;
}
public TwitterTextConfiguration build() {
return new TwitterTextConfiguration(this);
}
}
/**
* Get the current version. This is an integer that will monotonically
* increase in future releases. The legacy version of the string is version 1;
* weighted code point ranges and 280-character “long” tweets are supported in version 2.
*
* @return The version for the configuration string.
*/
public int getVersion() {
return version;
}
/**
* Get the maximum weighted length in the config. Legacy v1 tweets had a maximum
* weighted length of 140 and all characters were weighted the same.
* In the new configuration format, this is represented as a {@link maxWeightedTweetLength} of 140
* and a {@link defaultWeight} of 1 for all code points.
* @return The maximum length of the tweet, weighted.
*/
public int getMaxWeightedTweetLength() {
return maxWeightedTweetLength;
}
/**
* Get the scale.
*
* @return The Tweet length is the (weighted length / scale).
*/
public int getScale() {
return scale;
}
/**
* Get the default weight. This is overridden in one or more range items.
*
* @return The default weight applied to all code points.
*/
public int getDefaultWeight() {
return defaultWeight;
}
/**
* Get whether emoji parsing is enabled.
*
* @return true if emoji parsing is enabled, otherwise false.
*/
public boolean getEmojiParsingEnabled() {
return emojiParsingEnabled;
}
/**
* In previous versions of twitter-text, which was the "shortened URL length."
* Differentiating between the http and https shortened length for URLs has been deprecated
* (https is used for all t.co URLs). The default value is 23.
*
* @return The length counted for URLs against the total weight of the Tweet.
*/
public int getTransformedURLLength() {
return transformedURLLength;
}
/**
* Get an array of range items that describe ranges of Unicode code points and the weight to
* apply to each code point. Each range is defined by its start, end, and weight.
* Surrogate pairs have a length that is equivalent to the length of the first code unit in the
* surrogate pair. Note that certain graphemes are the result of joining code points together,
* such as by a zero-width joiner; unlike a surrogate pair, the length of such a grapheme will be
* the sum of the weighted length of all included code points.
*
* @return An array of range items.
*/
@Nonnull
public List<TwitterTextWeightedRange> getRanges() {
return ranges;
}
@Override
public int hashCode() {
int result = 17;
result = result * 31 + version;
result = result * 31 + maxWeightedTweetLength;
result = result * 31 + scale;
result = result * 31 + defaultWeight;
result = result * 31 + (emojiParsingEnabled ? 1 : 0);
result = result * 31 + transformedURLLength;
result = result * 31 + ranges.hashCode();
return result;
}
@Override
public boolean equals(@Nullable Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
final TwitterTextConfiguration that = (TwitterTextConfiguration) o;
return version == that.version && maxWeightedTweetLength == that.maxWeightedTweetLength &&
scale == that.scale && defaultWeight == that.defaultWeight &&
emojiParsingEnabled == that.emojiParsingEnabled &&
transformedURLLength == that.transformedURLLength && ranges.equals(that.ranges);
}
public static class TwitterTextWeightedRange {
private int start;
private int end;
private int weight;
private TwitterTextWeightedRange setStart(int start) {
this.start = start;
return this;
}
private TwitterTextWeightedRange setEnd(int end) {
this.end = end;
return this;
}
private TwitterTextWeightedRange setWeight(int weight) {
this.weight = weight;
return this;
}
/**
* Get the contiguous unicode region
*
* @return range object
*/
@Nonnull
public Range getRange() {
return new Range(start, end);
}
/**
* Get the Weight for each unicode point in the region
*
* @return integer indicating the weight
*/
public int getWeight() {
return weight;
}
@Override
public int hashCode() {
return 31 * start + 31 * end + 31 * weight;
}
@Override
public boolean equals(@Nullable Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
final TwitterTextWeightedRange that = (TwitterTextWeightedRange) o;
return start == that.start && end == that.end && weight == that.weight;
}
}
}