Permalink
Browse files

Add support for extracting/auto-linking cashtags

  • Loading branch information...
1 parent 2dba3dc commit 0bd5d14b9eec84e5784daca540082b93d294f076 keita committed May 3, 2012
@@ -16,12 +16,16 @@
public static final String DEFAULT_USERNAME_CLASS = "username";
/** Default CSS class for auto-linked hashtag URLs */
public static final String DEFAULT_HASHTAG_CLASS = "hashtag";
+ /** Default CSS class for auto-linked cashtag URLs */
+ public static final String DEFAULT_CASHTAG_CLASS = "cashtag";
/** Default href for username links (the username without the @ will be appended) */
public static final String DEFAULT_USERNAME_URL_BASE = "https://twitter.com/";
/** Default href for list links (the username/list without the @ will be appended) */
public static final String DEFAULT_LIST_URL_BASE = "https://twitter.com/";
/** Default href for hashtag links (the hashtag without the # will be appended) */
public static final String DEFAULT_HASHTAG_URL_BASE = "https://twitter.com/#!/search?q=%23";
+ /** Default href for cashtag links (the cashtag without the $ will be appended) */
+ public static final String DEFAULT_CASHTAG_URL_BASE = "https://twitter.com/#!/search?q=%24";
/** HTML attribute to add when noFollow is true (default) */
public static final String NO_FOLLOW_HTML_ATTRIBUTE = " rel=\"nofollow\"";
/** Default attribute for invisible span tag */
@@ -31,9 +35,11 @@
protected String listClass;
protected String usernameClass;
protected String hashtagClass;
+ protected String cashtagClass;
protected String usernameUrlBase;
protected String listUrlBase;
protected String hashtagUrlBase;
+ protected String cashtagUrlBase;
protected String invisibleTagAttrs;
protected boolean noFollow = true;
protected boolean usernameIncludeSymbol = false;
@@ -60,9 +66,11 @@ public Autolink() {
listClass = DEFAULT_LIST_CLASS;
usernameClass = DEFAULT_USERNAME_CLASS;
hashtagClass = DEFAULT_HASHTAG_CLASS;
+ cashtagClass = DEFAULT_CASHTAG_CLASS;
usernameUrlBase = DEFAULT_USERNAME_URL_BASE;
listUrlBase = DEFAULT_LIST_URL_BASE;
hashtagUrlBase = DEFAULT_HASHTAG_URL_BASE;
+ cashtagUrlBase = DEFAULT_CASHTAG_URL_BASE;
invisibleTagAttrs = DEFAULT_INVISIBLE_TAG_ATTRS;
extractor.setExtractURLWithoutProtocol(false);
@@ -202,6 +210,17 @@ public void linkToURL(Entity entity, String text, StringBuilder builder) {
builder.append(">").append(linkText).append("</a>");
}
+ public void linkToCashtag(Entity entity, String text, StringBuilder builder) {
+ builder.append("<a href=\"").append(cashtagUrlBase).append(entity.getValue()).append("\"");
+ builder.append(" title=\"$").append(entity.getValue()).append("\"");
+ builder.append(" class=\"").append(urlClass).append(" ").append(cashtagClass).append("\"");
+ if (noFollow) {
+ builder.append(NO_FOLLOW_HTML_ATTRIBUTE);
+ }
+ builder.append(">$");
+ builder.append(entity.getValue()).append("</a>");
+ }
+
public String autoLinkEntities(String text, List<Entity> entities) {
StringBuilder builder = new StringBuilder(text.length() * 2);
int beginIndex = 0;
@@ -218,6 +237,10 @@ public String autoLinkEntities(String text, List<Entity> entities) {
break;
case MENTION:
linkToMentionAndList(entity, text, builder);
+ break;
+ case CASHTAG:
+ linkToCashtag(entity, text, builder);
+ break;
}
beginIndex = entity.end;
}
@@ -275,6 +298,17 @@ public String autoLinkURLs(String text) {
return autoLinkEntities(text, extractor.extractURLsWithIndices(text));
}
+ /**
+ * Auto-link $cashtag references in the provided Tweet text. The $cashtag links will have the cashtagClass CSS class
+ * added.
+ *
+ * @param text of the Tweet to auto-link
+ * @return text with auto-link HTML added
+ */
+ public String autoLinkCashtags(String text) {
+ return autoLinkEntities(text, extractor.extractCashtagsWithIndices(text));
+ }
+
/**
* @return CSS class for auto-linked URLs
*/
@@ -339,6 +373,22 @@ public void setHashtagClass(String hashtagClass) {
this.hashtagClass = hashtagClass;
}
+ /**
+ * @return CSS class for auto-linked cashtag URLs
+ */
+ public String getCashtagClass() {
+ return cashtagClass;
+ }
+
+ /**
+ * Set the CSS class for auto-linked cashtag URLs
+ *
+ * @param cashtagClass new CSS value.
+ */
+ public void setCashtagClass(String cashtagClass) {
+ this.cashtagClass = cashtagClass;
+ }
+
/**
* @return the href value for username links (to which the username will be appended)
*/
@@ -387,6 +437,22 @@ public void setHashtagUrlBase(String hashtagUrlBase) {
this.hashtagUrlBase = hashtagUrlBase;
}
+ /**
+ * @return the href value for cashtag links (to which the cashtag will be appended)
+ */
+ public String getCashtagUrlBase() {
+ return cashtagUrlBase;
+ }
+
+ /**
+ * Set the href base for cashtag links.
+ *
+ * @param cashtagUrlBase new href base value
+ */
+ public void setCashtagUrlBase(String cashtagUrlBase) {
+ this.cashtagUrlBase = cashtagUrlBase;
+ }
+
/**
* @return if the current URL links will include rel="nofollow" (true by default)
*/
@@ -10,7 +10,7 @@
public class Extractor {
public static class Entity {
public enum Type {
- URL, HASHTAG, MENTION
+ URL, HASHTAG, MENTION, CASHTAG
}
protected int start;
protected int end;
@@ -43,6 +43,7 @@ public Entity(Matcher matcher, Type type, int groupNumber, int startOffset) {
this(matcher.start(groupNumber) + startOffset, matcher.end(groupNumber), matcher.group(groupNumber), type);
}
+ @Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
@@ -64,10 +65,16 @@ public boolean equals(Object obj) {
}
}
+ @Override
public int hashCode() {
return this.type.hashCode() + this.value.hashCode() + this.start + this.end;
}
+ @Override
+ public String toString() {
+ return value + "(" + type +") [" +start + "," + end +"]";
+ }
+
public Integer getStart() {
return start;
}
@@ -149,6 +156,7 @@ public int compare(Entity e1, Entity e2) {
entities.addAll(extractURLsWithIndices(text));
entities.addAll(extractHashtagsWithIndices(text, false));
entities.addAll(extractMentionsOrListsWithIndices(text));
+ entities.addAll(extractCashtagsWithIndices(text));
removeOverlappingEntities(entities);
return entities;
@@ -400,6 +408,54 @@ public String extractReplyScreenname(String text) {
return extracted;
}
+ /**
+ * Extract $cashtag references from Tweet text.
+ *
+ * @param text of the tweet from which to extract cashtags
+ * @return List of cashtags referenced (without the leading $ sign)
+ */
+ public List<String> extractCashtags(String text) {
+ if (text == null || text.isEmpty()) {
+ return Collections.emptyList();
+ }
+
+ List<String> extracted = new ArrayList<String>();
+ for (Entity entity : extractCashtagsWithIndices(text)) {
+ extracted.add(entity.value);
+ }
+
+ return extracted;
+ }
+
+ /**
+ * Extract $cashtag references from Tweet text.
+ *
+ * @param text of the tweet from which to extract cashtags
+ * @return List of cashtags referenced (without the leading $ sign)
+ */
+ public List<Entity> extractCashtagsWithIndices(String text) {
+ if (text == null || text.isEmpty()) {
+ return Collections.emptyList();
+ }
+
+ // Performance optimization.
+ // If text doesn't contain $, text doesn't contain
+ // cashtag, so we can simply return an empty list.
+ if (text.indexOf('$') == -1) {
+ return Collections.emptyList();
+
+ }
+
+ List<Entity> extracted = new ArrayList<Entity>();
+ Matcher matcher = Regex.VALID_CASHTAG.matcher(text);
+
+ while (matcher.find()) {
+ extracted.add(new Entity(matcher, Entity.Type.CASHTAG, Regex.VALID_CASHTAG_GROUP_CASHTAG));
+ }
+
+ return extracted;
+ }
+
public void setExtractURLWithoutProtocol(boolean extractURLWithoutProtocol) {
this.extractURLWithoutProtocol = extractURLWithoutProtocol;
}
@@ -134,6 +134,8 @@
private static String AT_SIGNS_CHARS = "@\uFF20";
+ private static final String DOLLAR_SIGN_CHAR = "\\$";
+ private static final String CASHTAG = "[a-z]{1,6}(?:[._][a-z]{1,2})?";
/* Begin public constants */
@@ -167,4 +169,7 @@
public static final Pattern VALID_TCO_URL = Pattern.compile("^https?:\\/\\/t\\.co\\/[a-z0-9]+", Pattern.CASE_INSENSITIVE);
public static final Pattern INVALID_URL_WITHOUT_PROTOCOL_MATCH_BEGIN = Pattern.compile("[-_./]$");
+
+ public static final Pattern VALID_CASHTAG = Pattern.compile("(?:^|" + UNICODE_SPACES + ")" + DOLLAR_SIGN_CHAR + "(" + CASHTAG + ")" +"(?=$|\\s|\\p{Punct})", Pattern.CASE_INSENSITIVE);
+ public static final int VALID_CASHTAG_GROUP_CASHTAG = 1;
}
@@ -81,6 +81,33 @@ public void testURLsExtractor() throws Exception {
}
}
+ public void testCashtagsExtractor() throws Exception {
+ File yamlFile = new File(conformanceDir, "extract.yml");
+ List testCases = loadConformanceData(yamlFile, "cashtags");
+ for (Map testCase : (List<Map>)testCases) {
+ assertEquals((String)testCase.get(KEY_DESCRIPTION),
+ (List)testCase.get(KEY_EXPECTED_OUTPUT),
+ extractor.extractCashtags((String)testCase.get(KEY_INPUT)));
+ }
+ }
+
+ public void testCashtagsWithIndicesExtractor() throws Exception {
+ File yamlFile = new File(conformanceDir, "extract.yml");
+ List testCases = loadConformanceData(yamlFile, "cashtags_with_indices");
+ for (Map testCase : (List<Map>)testCases) {
+ List<Map<String, Object>> expectedConfig = (List)testCase.get(KEY_EXPECTED_OUTPUT);
+ List<Extractor.Entity> expected = new ArrayList<Extractor.Entity>();
+ for (Map<String, Object> configEntry : expectedConfig) {
+ List<Integer> indices = (List<Integer>)configEntry.get("indices");
+ expected.add(new Extractor.Entity(indices.get(0), indices.get(1), configEntry.get("cashtag").toString(), Entity.Type.CASHTAG));
+ }
+
+ assertEquals((String)testCase.get(KEY_DESCRIPTION),
+ expected,
+ extractor.extractCashtagsWithIndices((String)testCase.get(KEY_INPUT)));
+ }
+ }
+
public void testUsernameAutolinking() throws Exception {
File yamlFile = new File(conformanceDir, "autolink.yml");
@@ -122,6 +149,16 @@ public void testURLAutolinking() throws Exception {
}
}
+ public void testCashtagAutolinking() throws Exception {
+ File yamlFile = new File(conformanceDir, "autolink.yml");
+ List testCases = loadConformanceData(yamlFile, "cashtags");
+ for (Map testCase : (List<Map>) testCases) {
+ assertEquals((String) testCase.get(KEY_DESCRIPTION),
+ (String) testCase.get(KEY_EXPECTED_OUTPUT),
+ linker.autoLinkCashtags((String) testCase.get(KEY_INPUT)));
+ }
+ }
+
public void testAllAutolinking() throws Exception {
File yamlFile = new File(conformanceDir, "autolink.yml");
List testCases = loadConformanceData(yamlFile, "all");

0 comments on commit 0bd5d14

Please sign in to comment.