Skip to content
This repository has been archived by the owner on Sep 18, 2021. It is now read-only.

Commit

Permalink
Merge pull request #52 from twitter/kl_autolink_urls_with_nested_parens
Browse files Browse the repository at this point in the history
Allow one nested level of balanced parentheses.
  • Loading branch information
KL-7 committed Nov 13, 2013
2 parents dc3115b + f1f2bb6 commit 539cff6
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 36 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@ build
build/*
out
dist
Twitter-text-java.iml
twitter-text-java.ipr
twitter-text-java.iws
32 changes: 0 additions & 32 deletions Twitter-text-java.iml

This file was deleted.

21 changes: 18 additions & 3 deletions src/com/twitter/Regex.java
Original file line number Diff line number Diff line change
Expand Up @@ -97,12 +97,27 @@ public class Regex {
private static final String URL_VALID_PORT_NUMBER = "[0-9]++";

private static final String URL_VALID_GENERAL_PATH_CHARS = "[a-z0-9!\\*';:=\\+,.\\$/%#\\[\\]\\-_~\\|&@" + LATIN_ACCENTS_CHARS + "]";
/** Allow URL paths to contain balanced parens
/** Allow URL paths to contain up to two nested levels of balanced parens
* 1. Used in Wikipedia URLs like /Primer_(film)
* 2. Used in IIS sessions like /S(dfd346)/
* 3. Used in Rdio URLs like /track/We_Up_(Album_Version_(Edited))/
**/
private static final String URL_BALANCED_PARENS = "\\(" + URL_VALID_GENERAL_PATH_CHARS + "+\\)";
/** Valid end-of-path chracters (so /foo. does not gobble the period).
private static final String URL_BALANCED_PARENS = "\\(" +
"(?:" +
URL_VALID_GENERAL_PATH_CHARS + "+" +
"|" +
// allow one nested level of balanced parentheses
"(?:" +
URL_VALID_GENERAL_PATH_CHARS + "*" +
"\\(" +
URL_VALID_GENERAL_PATH_CHARS + "+" +
"\\)" +
URL_VALID_GENERAL_PATH_CHARS + "*" +
")" +
")" +
"\\)";

/** Valid end-of-path characters (so /foo. does not gobble the period).
* 2. Allow =&# for empty URL parameters and other URL-join artifacts
**/
private static final String URL_VALID_PATH_ENDING_CHARS = "[a-z0-9=_#/\\-\\+" + LATIN_ACCENTS_CHARS + "]|(?:" + URL_BALANCED_PARENS +")";
Expand Down
2 changes: 1 addition & 1 deletion test-data/twitter-text-conformance
Submodule twitter-text-conformance updated from 682d05 to 7ebf8f

0 comments on commit 539cff6

Please sign in to comment.