Permalink
Browse files

Do locale neutral case conversions in MultiProtocolURL

For any relevant URL parts : host name, URL scheme, session ids or
technical parts (see https://url.spec.whatwg.org/#url-writing and
https://tools.ietf.org/html/rfc3986 for current standard references).

Remaining locale sensitive conversion used for detection of URL word
components in urlComps() makes sense but using detected language would
be preferable than using the default system locale.
  • Loading branch information...
luccioman committed Nov 20, 2017
1 parent 9531b83 commit 398c66f06c9d2db6a2bbd1e873316baebd04a914
@@ -216,7 +216,7 @@ public MultiProtocolURL(String url) throws MalformedURLException {
p = 4;
}
}
this.protocol = url.substring(0, p).toLowerCase().trim().intern();
this.protocol = url.substring(0, p).toLowerCase(Locale.ROOT).trim().intern();
if (url.length() < p + 4) throw new MalformedURLException("URL not parseable: '" + url + "'");
if (!this.protocol.equals("file") && url.substring(p + 1, p + 3).equals("//")) {
// identify host, userInfo and file for http and ftp protocol
@@ -417,7 +417,7 @@ public MultiProtocolURL(final MultiProtocolURL baseURL, String relPath) throws M
// a relative path that uses the protocol from the base url
relPath = baseURL.protocol + ":" + relPath;
}
if (relPath.toLowerCase().startsWith("javascript:")) {
if (relPath.toLowerCase(Locale.ROOT).startsWith("javascript:")) {
this.path = baseURL.path;
} else if (
isHTTP(relPath) ||
@@ -426,7 +426,7 @@ public MultiProtocolURL(final MultiProtocolURL baseURL, String relPath) throws M
isFile(relPath) ||
isSMB(relPath)) {
this.path = baseURL.path;
} else if (relPath.contains(":") && patternMail.matcher(relPath.toLowerCase()).find()) { // discards also any unknown protocol from previous if
} else if (relPath.contains(":") && patternMail.matcher(relPath.toLowerCase(Locale.ROOT)).find()) { // discards also any unknown protocol from previous if
throw new MalformedURLException("relative path malformed: " + relPath);
} else if (relPath.length() > 0 && relPath.charAt(0) == '/') {
this.path = relPath;
@@ -647,7 +647,7 @@ public static StringBuilder escape(final String s) {
sbuf.append("%25"); // '%' RFC 1738 2.2 unsafe char shall be encoded
}
} else if (ch == '&') {
if (i < len - 6 && "amp;".equals(s.substring(i + 1, i + 5).toLowerCase())) {
if (i < len - 6 && "amp;".equals(s.substring(i + 1, i + 5).toLowerCase(Locale.ROOT))) {
sbuf.append((char)ch); // leave it that way, it is used the right way
} else {
sbuf.append("%26"); // this must be urlencoded
@@ -799,7 +799,7 @@ public String getFile(final boolean excludeAnchor, final boolean removeSessionID
String q = this.searchpart;
if (removeSessionID) {
for (final String sid: sessionIDnames.keySet()) {
if (q.toLowerCase().startsWith(sid.toLowerCase() + "=")) {
if (q.toLowerCase(Locale.ROOT).startsWith(sid.toLowerCase(Locale.ROOT) + "=")) {
final int p = q.indexOf('&');
if (p < 0) {
if (excludeAnchor || this.anchor == null) return this.path;
@@ -812,7 +812,7 @@ public String getFile(final boolean excludeAnchor, final boolean removeSessionID
q = q.substring(p + 1);
continue;
}
final int p = q.toLowerCase().indexOf("&" + sid.toLowerCase() + "=",0);
final int p = q.toLowerCase(Locale.ROOT).indexOf("&" + sid.toLowerCase(Locale.ROOT) + "=",0);
if (p < 0) continue;
final int p1 = q.indexOf('&', p+1);
if (p1 < 0) {
@@ -852,14 +852,14 @@ public static String getFileExtension(final String fileName) {
if (p < 0) return "";
final int q = fileName.lastIndexOf('?');
if (q < 0) {
return fileName.substring(p + 1).toLowerCase();
return fileName.substring(p + 1).toLowerCase(Locale.ROOT);
}
// check last dot in query part
if (p > q) {
p = fileName.lastIndexOf('.', q);
if (p < 0) return "";
}
return fileName.substring(p + 1, q).toLowerCase();
return fileName.substring(p + 1, q).toLowerCase(Locale.ROOT);
}
/**
@@ -933,7 +933,7 @@ public String getTLD() {
public InetAddress getInetAddress() {
if (this.hostAddress != null) return this.hostAddress;
if (this.host == null) return null; // this may happen for file:// urls
this.hostAddress = Domains.dnsResolve(this.host.toLowerCase());
this.hostAddress = Domains.dnsResolve(this.host.toLowerCase(Locale.ROOT));
return this.hostAddress;
}
@@ -1117,7 +1117,7 @@ public String toNormalform(final boolean excludeAnchor, final boolean removeSess
u.append(this.userInfo);
u.append("@");
}
u.append(h.toLowerCase());
u.append(h.toLowerCase(Locale.ROOT));
}
if (!defaultPort) {
u.append(":");
@@ -1165,7 +1165,7 @@ public String urlstub(final boolean excludeAnchor, final boolean removeSessionID
u.append(this.userInfo);
u.append("@");
}
u.append(h.toLowerCase());
u.append(h.toLowerCase(Locale.ROOT));
}
if (!defaultPort) {
u.append(":");
@@ -1224,22 +1224,22 @@ public boolean isPOST() {
}
public static final boolean isCGI(final String extension) {
return extension != null && extension.length() > 0 && "cgi.exe".indexOf(extension.toLowerCase()) >= 0;
return extension != null && extension.length() > 0 && "cgi.exe".indexOf(extension.toLowerCase(Locale.ROOT)) >= 0;
}
/**
* @deprecated use a mimetype considering methode (e.g. Document.getContentDomain() == ContentDomain.IMAGE or else Classification.isImageExtension() )
*/
@Deprecated
public static final boolean isImage(final String extension) {
return extension != null && extension.length() > 0 && Response.docTypeExt(extension.toLowerCase()) == Response.DT_IMAGE;
return extension != null && extension.length() > 0 && Response.docTypeExt(extension.toLowerCase(Locale.ROOT)) == Response.DT_IMAGE;
}
public final boolean isIndividual() {
final String q = unescape(this.path.toLowerCase());
final String q = unescape(this.path.toLowerCase(Locale.ROOT));
for (final String sid: sessionIDnames.keySet()) {
if (q.startsWith(sid.toLowerCase() + "=")) return true;
final int p = q.indexOf("&" + sid.toLowerCase() + "=",0);
if (q.startsWith(sid.toLowerCase(Locale.ROOT) + "=")) return true;
final int p = q.indexOf("&" + sid.toLowerCase(Locale.ROOT) + "=",0);
if (p >= 0) return true;
}
int pos;
@@ -1273,7 +1273,7 @@ public final String language() {
String language = "en";
if (this.host == null) return language;
final int pos = this.host.lastIndexOf('.');
String host_tld = this.host.substring(pos + 1).toLowerCase();
String host_tld = this.host.substring(pos + 1).toLowerCase(Locale.ROOT);
if (pos == 0) return language;
int length = this.host.length() - pos - 1;
switch (length) {
@@ -2395,6 +2395,7 @@ public Locale getLocale() {
public static String[] urlComps(String normalizedURL) {
final int p = normalizedURL.indexOf("//",0);
if (p > 0) normalizedURL = normalizedURL.substring(p + 2);
// TODO lowering case in a locale sensitive manner makes sense here, but the used language locale should not dependant on the default system locale
return splitpattern.split(normalizedURL.toLowerCase()); // word components of the url
}
@@ -14,10 +14,12 @@
import java.util.TreeSet;
import org.junit.Test;
//import junit.framework.TestCase;
/**
* Automated unit tests for the {@link MultiProtocolURL} class.
*/
public class MultiProtocolURLTest {
@Test
public void testSessionIdRemoval() throws MalformedURLException {
String[][] testURIs = new String[][]{
@@ -169,11 +171,19 @@ public void testGetProtocol() throws MalformedURLException {
Map<String, String> testurls = new HashMap<String, String>();
// ( 1. parameter = urlstring to test, 2. parameter = expected protocol)
testurls.put("http://host.com", "http");
testurls.put("HTTP://EXAMPLE.COM", "http");
testurls.put("https://host.com", "https");
testurls.put("HTTPS://host.com", "https");
testurls.put("Ftp://example.org", "ftp");
testurls.put("FTP://EXAMPLE.ORG", "ftp");
testurls.put("Ftp://host.com", "ftp");
testurls.put("smb://host.com", "smb");
testurls.put("SMB://host.com", "smb");
testurls.put("/file.com", "file");
testurls.put("file://host.com/file.com", "file");
testurls.put("file:///file1.txt", "file");
testurls.put("FILE:///file2.txt", "file");
testurls.put("MAILTO:Abc@host.com", "mailto");
testurls.put("MailTo:Abc@host.com", "mailto");
for (String txt : testurls.keySet()) {
@@ -258,10 +268,12 @@ public void testGetFileExtension() {
Map<String, String> testurls = new HashMap<String, String>();
// key=testurl, value=result
testurls.put("path/file.xml","xml"); // easiest
testurls.put("/FILE.GIF","gif"); // easy upper case
testurls.put("path/file?h.pdf",""); // file w/o extension
testurls.put("file.html?param=h.pdf","html"); // dot in query part
testurls.put("url?param=h.pdf",""); // dot in query part
testurls.put("file.html?param", "html");
testurls.put("FILE.GIF?param", "gif");
testurls.put("/path/","");
for (String s : testurls.keySet()) {
System.out.println("test getFileExtension: " + s + " -> " + testurls.get(s));

0 comments on commit 398c66f

Please sign in to comment.