Skip to content
Permalink
Browse files

Decode blacklist entries for easier edition of non ascii chars

Not using the JDK URLDecoder.decode() function, as it strips '+'
characters when they occur after '?' (both characters having regular
expression semantics when used in blacklist path patterns)
  • Loading branch information...
luccioman committed Oct 4, 2018
1 parent ed93221 commit 61c337f29a90d28728235d1cec11ea5958f82706
@@ -419,10 +419,21 @@ public static serverObjects respond(final RequestHeader header, final serverObje
}

for (int j = offset; j < to; ++j){
final String nextEntry = sortedlist[j];
String nextEntry = sortedlist[j];

if (nextEntry.isEmpty()) continue;
if (nextEntry.charAt(0) == '#') continue;
if (nextEntry.isEmpty()) {
continue;
}
if (nextEntry.charAt(0) == '#') {
continue;
}

/** Decode the entry for easier reading of paths with non ascii characters */
final int slashPos = nextEntry.indexOf('/', 0);
if(slashPos > 0) {
nextEntry = nextEntry.substring(0, slashPos + 1) + MultiProtocolURL.unescapePath(nextEntry.substring(slashPos + 1));
}

prop.put(DISABLED + EDIT + "Itemlist_" + entryCount + "_dark", dark ? "1" : "0");
dark = !dark;
/* We do not use here putHTML as we don't want '+' characters to be interpreted as application/x-www-form-urlencoded encoding */
@@ -37,6 +37,7 @@
import java.net.InetAddress;
import java.net.MalformedURLException;
import java.net.URLDecoder;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.BitSet;
import java.util.LinkedHashMap;
@@ -706,6 +707,60 @@ private static String escapePath(final String pathToEscape, final boolean isPatt
}
return pathToEscape;
}

/**
* Decode UTF-8 percent-encoded characters eventually found in the given path.
* <ul>
* Differences with {@link URLDecoder#decode(String, String)} :
* <li>the '+' character is not decoded to space character</li>
* <li>no exception is thrown when invalid hexadecimal digits are found after a '%' character</li>
* </ul>
*
* @param path an URL path eventually escaped
* @return return the unescaped path or null when path is null.
*/
public static final String unescapePath(final String escaped) {
if (escaped == null) {
return escaped;
}
boolean modified = false;
final int len = escaped.length();
final StringBuilder unescaped = new StringBuilder(len > 500 ? len / 2 : len);
ByteBuffer utf8Bytes = null;
int i = 0;
while (i < len) {
final char ch = escaped.charAt(i);
if (ch == '%' && (i + 2) < len) {
final char digit1 = escaped.charAt(i + 1);
final char digit2 = escaped.charAt(i + 2);
if (isHexDigit(digit1) && isHexDigit(digit2)) {
if (utf8Bytes == null) {
utf8Bytes = ByteBuffer.allocate((len - i) / 3);
}
/* Percent-encoded character UTF-8 byte */
int hexaValue = Integer.parseInt(escaped.substring(i + 1, i + 3), 16);
utf8Bytes.put((byte) hexaValue);
modified = true;
i += 2;
} else {
/* Not a valid percent-encoded character : we append it as is */
unescaped.append(ch);
}
} else {
if (utf8Bytes != null && utf8Bytes.position() > 0) {
unescaped.append(new String(utf8Bytes.array(), 0, utf8Bytes.position(), StandardCharsets.UTF_8));
utf8Bytes.position(0);
}
unescaped.append(ch);
}
i++;
}
if (utf8Bytes != null && utf8Bytes.position() > 0) {
unescaped.append(new String(utf8Bytes.array(), 0, utf8Bytes.position(), StandardCharsets.UTF_8));
}

return modified ? unescaped.toString() : escaped;
}

/**
* @param character a character to test
@@ -275,53 +275,79 @@ public final void loadList(final BlacklistType blacklistType, final String fileN
public final void remove(final BlacklistType blacklistType, final String blacklistToUse, final String host, final String path) {

final Map<String, Set<Pattern>> blacklistMap = getBlacklistMap(blacklistType, true);
Set<Pattern> hostList = blacklistMap.get(host);
if (hostList != null) {
// remove pattern from list (by comparing patternstring with path, remove(path) will not match path)
for (Pattern hp : hostList) {
String hpxs = hp.pattern();
if (hpxs.equals(path)) {
hostList.remove(hp);
break;
}
}
if (hostList.isEmpty()) {
blacklistMap.remove(host);
}
}
removePatternFromMap(host, path, blacklistMap);

final Map<String, Set<Pattern>> blacklistMapNotMatch = getBlacklistMap(blacklistType, false);
hostList = blacklistMapNotMatch.get(host);
if (hostList != null) {
// remove pattern from list
for (Pattern hp : hostList) {
String hpxs = hp.pattern();
if (hpxs.equals(path)) {
hostList.remove(hp);
break;
}
}
if (hostList.isEmpty()) {
blacklistMapNotMatch.remove(host);
}
}
removePatternFromMap(host, path, blacklistMapNotMatch);

//TODO: check if delete from blacklist is desired, on reload entry will not be available in any blacklist
// even if remove (above) from internal maps (at runtime) is only done for given blacklistType
// load blacklist data from file
final List<String> list = FileUtils.getListArray(new File(ListManager.listsPath, blacklistToUse));

// delete the old entry from file
/* delete the old entry from file, in any normalized or not normalized possible combinations */
final Set<String> entriesToDelete = new HashSet<>();
final String normalizedPathPattern = MultiProtocolURL.escapePathPattern(path);
entriesToDelete.add(host + "/" + path);
entriesToDelete.add(host + "/" + normalizedPathPattern);
if (!Punycode.isBasic(host)) {
try {
final String normalizedHost = MultiProtocolURL.toPunycode(host);
entriesToDelete.add(normalizedHost + "/" + path);
entriesToDelete.add(normalizedHost + "/" + normalizedPathPattern);
} catch (final PunycodeException ignored) {
/* We continue even if a punycode flavor can not be produced */
}
}
if (list != null) {
for (final String e : list) {
if (e.equals(host + "/" + path)) {
if (entriesToDelete.contains(e)) {
list.remove(e);
break;
}
}
FileUtils.writeList(new File(ListManager.listsPath, blacklistToUse), list.toArray(new String[list.size()]));
}
}

/**
* Remove the (host, pathPattern) entries eventually found in the given
* blacklist map.
*
* @param host the host part of the entry to remove
* @param pathPattern the path pattern part of the entry to remove
* @param blacklistMap a blacklist map to update
*/
private void removePatternFromMap(final String host, final String pathPattern,
final Map<String, Set<Pattern>> blacklistMap) {
final String normalizedPathPattern = MultiProtocolURL.escapePathPattern(pathPattern);
final Set<String> hosts = new HashSet<>();
hosts.add(host);
if (!Punycode.isBasic(host)) {
try {
hosts.add(MultiProtocolURL.toPunycode(host));
} catch (final PunycodeException ignored) {
/* We continue even if a punycode flavor can not be produced */
}
}
for (final String hostKey : hosts) {
final Set<Pattern> hostList = blacklistMap.get(hostKey);
if (hostList != null) {
// remove pattern from list (by comparing patternstring with path, remove(path)
// will not match path)
for (Pattern hp : hostList) {
String hpxs = hp.pattern();
if (hpxs.equals(pathPattern) || hpxs.equals(normalizedPathPattern)) {
hostList.remove(hp);
break;
}
}
if (hostList.isEmpty()) {
blacklistMap.remove(host);
}
}
}
}

/**
* Adds entries to a given blacklist internal data and updates the source
@@ -391,6 +391,36 @@ public void testEscapePath() {
}
}

/**
* Unit tests for {@link MultiProtocolURL#unescapePath(String)}
*/
@Test
public void testUnescapePath() {
String[][] testStrings = new String[][] {
// "test string", "expected unescaped result"
new String[] { "", "" }, new String[] { "/", "/" }, new String[] { "/ascii/path", "/ascii/path" },
new String[] { "/latin/chars/%C3%A0%C3%A4%C3%A2%C3%A9%C3%A8%C3%AF%C3%AE%C3%B4%C3%B6%C3%B9",
"/latin/chars/àäâéèïîôöù" },
new String[] { "/wiki/%25", "/wiki/%" },
new String[] { "/logograms/%E6%AD%A3%E9%AB%94%E5%AD%97/%E7%B9%81%E9%AB%94%E5%AD%97",
"/logograms/正體字/繁體字" },
new String[] { "/bad/hexaDigits/%GH%-1%èà/file", "/bad/hexaDigits/%GH%-1%èà/file" },
new String[] { "/missing/hexaDigit/%2", "/missing/hexaDigit/%2" },
new String[] { "/missing/hexaDigits/%", "/missing/hexaDigits/%" },
new String[] { "/unescaped/logograms/正體字/繁體字", "/unescaped/logograms/正體字/繁體字" },
new String[] { "/unescaped/rfc3986/unreserved/path/chars/-._~",
"/unescaped/rfc3986/unreserved/path/chars/-._~" },
new String[] { "/unescaped/rfc3986/subdelims/!$&'()*+,;=", "/unescaped/rfc3986/subdelims/!$&'()*+,;=" },
new String[] { "/unescaped/rfc3986/pchar/additional/:@", "/unescaped/rfc3986/pchar/additional/:@" },
new String[] { "/unescaped/regex/metacharacters/<([{\\^-=$!|]})?*+.>",
"/unescaped/regex/metacharacters/<([{\\^-=$!|]})?*+.>" } };
for (int i = 0; i < testStrings.length; i++) {
String[] testString = testStrings[i];
final String decoded = MultiProtocolURL.unescapePath(testString[0]);
assertEquals(testString[1], decoded);
}
}

/**
* Unit tests for {@link MultiProtocolURL#escapePathPattern(String)}
*/

0 comments on commit 61c337f

Please sign in to comment.
You can’t perform that action at this time.