Do locale neutral case conversions in MultiProtocolURL

For any relevant URL parts : host name, URL scheme, session ids or
technical parts (see https://url.spec.whatwg.org/#url-writing and
https://tools.ietf.org/html/rfc3986 for current standard references).

Remaining locale sensitive conversion used for detection of URL word
components in urlComps() makes sense but using detected language would
be preferable than using the default system locale.
pull/144/head
luccioman 8 years ago
parent 9531b83598
commit 398c66f06c

@ -216,7 +216,7 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
p = 4; p = 4;
} }
} }
this.protocol = url.substring(0, p).toLowerCase().trim().intern(); this.protocol = url.substring(0, p).toLowerCase(Locale.ROOT).trim().intern();
if (url.length() < p + 4) throw new MalformedURLException("URL not parseable: '" + url + "'"); if (url.length() < p + 4) throw new MalformedURLException("URL not parseable: '" + url + "'");
if (!this.protocol.equals("file") && url.substring(p + 1, p + 3).equals("//")) { if (!this.protocol.equals("file") && url.substring(p + 1, p + 3).equals("//")) {
// identify host, userInfo and file for http and ftp protocol // identify host, userInfo and file for http and ftp protocol
@ -417,7 +417,7 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
// a relative path that uses the protocol from the base url // a relative path that uses the protocol from the base url
relPath = baseURL.protocol + ":" + relPath; relPath = baseURL.protocol + ":" + relPath;
} }
if (relPath.toLowerCase().startsWith("javascript:")) { if (relPath.toLowerCase(Locale.ROOT).startsWith("javascript:")) {
this.path = baseURL.path; this.path = baseURL.path;
} else if ( } else if (
isHTTP(relPath) || isHTTP(relPath) ||
@ -426,7 +426,7 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
isFile(relPath) || isFile(relPath) ||
isSMB(relPath)) { isSMB(relPath)) {
this.path = baseURL.path; this.path = baseURL.path;
} else if (relPath.contains(":") && patternMail.matcher(relPath.toLowerCase()).find()) { // discards also any unknown protocol from previous if } else if (relPath.contains(":") && patternMail.matcher(relPath.toLowerCase(Locale.ROOT)).find()) { // discards also any unknown protocol from previous if
throw new MalformedURLException("relative path malformed: " + relPath); throw new MalformedURLException("relative path malformed: " + relPath);
} else if (relPath.length() > 0 && relPath.charAt(0) == '/') { } else if (relPath.length() > 0 && relPath.charAt(0) == '/') {
this.path = relPath; this.path = relPath;
@ -647,7 +647,7 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
sbuf.append("%25"); // '%' RFC 1738 2.2 unsafe char shall be encoded sbuf.append("%25"); // '%' RFC 1738 2.2 unsafe char shall be encoded
} }
} else if (ch == '&') { } else if (ch == '&') {
if (i < len - 6 && "amp;".equals(s.substring(i + 1, i + 5).toLowerCase())) { if (i < len - 6 && "amp;".equals(s.substring(i + 1, i + 5).toLowerCase(Locale.ROOT))) {
sbuf.append((char)ch); // leave it that way, it is used the right way sbuf.append((char)ch); // leave it that way, it is used the right way
} else { } else {
sbuf.append("%26"); // this must be urlencoded sbuf.append("%26"); // this must be urlencoded
@ -799,7 +799,7 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
String q = this.searchpart; String q = this.searchpart;
if (removeSessionID) { if (removeSessionID) {
for (final String sid: sessionIDnames.keySet()) { for (final String sid: sessionIDnames.keySet()) {
if (q.toLowerCase().startsWith(sid.toLowerCase() + "=")) { if (q.toLowerCase(Locale.ROOT).startsWith(sid.toLowerCase(Locale.ROOT) + "=")) {
final int p = q.indexOf('&'); final int p = q.indexOf('&');
if (p < 0) { if (p < 0) {
if (excludeAnchor || this.anchor == null) return this.path; if (excludeAnchor || this.anchor == null) return this.path;
@ -812,7 +812,7 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
q = q.substring(p + 1); q = q.substring(p + 1);
continue; continue;
} }
final int p = q.toLowerCase().indexOf("&" + sid.toLowerCase() + "=",0); final int p = q.toLowerCase(Locale.ROOT).indexOf("&" + sid.toLowerCase(Locale.ROOT) + "=",0);
if (p < 0) continue; if (p < 0) continue;
final int p1 = q.indexOf('&', p+1); final int p1 = q.indexOf('&', p+1);
if (p1 < 0) { if (p1 < 0) {
@ -852,14 +852,14 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
if (p < 0) return ""; if (p < 0) return "";
final int q = fileName.lastIndexOf('?'); final int q = fileName.lastIndexOf('?');
if (q < 0) { if (q < 0) {
return fileName.substring(p + 1).toLowerCase(); return fileName.substring(p + 1).toLowerCase(Locale.ROOT);
} }
// check last dot in query part // check last dot in query part
if (p > q) { if (p > q) {
p = fileName.lastIndexOf('.', q); p = fileName.lastIndexOf('.', q);
if (p < 0) return ""; if (p < 0) return "";
} }
return fileName.substring(p + 1, q).toLowerCase(); return fileName.substring(p + 1, q).toLowerCase(Locale.ROOT);
} }
/** /**
@ -933,7 +933,7 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
public InetAddress getInetAddress() { public InetAddress getInetAddress() {
if (this.hostAddress != null) return this.hostAddress; if (this.hostAddress != null) return this.hostAddress;
if (this.host == null) return null; // this may happen for file:// urls if (this.host == null) return null; // this may happen for file:// urls
this.hostAddress = Domains.dnsResolve(this.host.toLowerCase()); this.hostAddress = Domains.dnsResolve(this.host.toLowerCase(Locale.ROOT));
return this.hostAddress; return this.hostAddress;
} }
@ -1117,7 +1117,7 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
u.append(this.userInfo); u.append(this.userInfo);
u.append("@"); u.append("@");
} }
u.append(h.toLowerCase()); u.append(h.toLowerCase(Locale.ROOT));
} }
if (!defaultPort) { if (!defaultPort) {
u.append(":"); u.append(":");
@ -1165,7 +1165,7 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
u.append(this.userInfo); u.append(this.userInfo);
u.append("@"); u.append("@");
} }
u.append(h.toLowerCase()); u.append(h.toLowerCase(Locale.ROOT));
} }
if (!defaultPort) { if (!defaultPort) {
u.append(":"); u.append(":");
@ -1224,7 +1224,7 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
} }
public static final boolean isCGI(final String extension) { public static final boolean isCGI(final String extension) {
return extension != null && extension.length() > 0 && "cgi.exe".indexOf(extension.toLowerCase()) >= 0; return extension != null && extension.length() > 0 && "cgi.exe".indexOf(extension.toLowerCase(Locale.ROOT)) >= 0;
} }
/** /**
@ -1232,14 +1232,14 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
*/ */
@Deprecated @Deprecated
public static final boolean isImage(final String extension) { public static final boolean isImage(final String extension) {
return extension != null && extension.length() > 0 && Response.docTypeExt(extension.toLowerCase()) == Response.DT_IMAGE; return extension != null && extension.length() > 0 && Response.docTypeExt(extension.toLowerCase(Locale.ROOT)) == Response.DT_IMAGE;
} }
public final boolean isIndividual() { public final boolean isIndividual() {
final String q = unescape(this.path.toLowerCase()); final String q = unescape(this.path.toLowerCase(Locale.ROOT));
for (final String sid: sessionIDnames.keySet()) { for (final String sid: sessionIDnames.keySet()) {
if (q.startsWith(sid.toLowerCase() + "=")) return true; if (q.startsWith(sid.toLowerCase(Locale.ROOT) + "=")) return true;
final int p = q.indexOf("&" + sid.toLowerCase() + "=",0); final int p = q.indexOf("&" + sid.toLowerCase(Locale.ROOT) + "=",0);
if (p >= 0) return true; if (p >= 0) return true;
} }
int pos; int pos;
@ -1273,7 +1273,7 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
String language = "en"; String language = "en";
if (this.host == null) return language; if (this.host == null) return language;
final int pos = this.host.lastIndexOf('.'); final int pos = this.host.lastIndexOf('.');
String host_tld = this.host.substring(pos + 1).toLowerCase(); String host_tld = this.host.substring(pos + 1).toLowerCase(Locale.ROOT);
if (pos == 0) return language; if (pos == 0) return language;
int length = this.host.length() - pos - 1; int length = this.host.length() - pos - 1;
switch (length) { switch (length) {
@ -2395,6 +2395,7 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
public static String[] urlComps(String normalizedURL) { public static String[] urlComps(String normalizedURL) {
final int p = normalizedURL.indexOf("//",0); final int p = normalizedURL.indexOf("//",0);
if (p > 0) normalizedURL = normalizedURL.substring(p + 2); if (p > 0) normalizedURL = normalizedURL.substring(p + 2);
// TODO lowering case in a locale sensitive manner makes sense here, but the used language locale should not dependant on the default system locale
return splitpattern.split(normalizedURL.toLowerCase()); // word components of the url return splitpattern.split(normalizedURL.toLowerCase()); // word components of the url
} }

@ -14,10 +14,12 @@ import java.util.Map;
import java.util.TreeSet; import java.util.TreeSet;
import org.junit.Test; import org.junit.Test;
//import junit.framework.TestCase;
/**
* Automated unit tests for the {@link MultiProtocolURL} class.
*/
public class MultiProtocolURLTest { public class MultiProtocolURLTest {
@Test @Test
public void testSessionIdRemoval() throws MalformedURLException { public void testSessionIdRemoval() throws MalformedURLException {
String[][] testURIs = new String[][]{ String[][] testURIs = new String[][]{
@ -169,11 +171,19 @@ public class MultiProtocolURLTest {
Map<String, String> testurls = new HashMap<String, String>(); Map<String, String> testurls = new HashMap<String, String>();
// ( 1. parameter = urlstring to test, 2. parameter = expected protocol) // ( 1. parameter = urlstring to test, 2. parameter = expected protocol)
testurls.put("http://host.com", "http"); testurls.put("http://host.com", "http");
testurls.put("HTTP://EXAMPLE.COM", "http");
testurls.put("https://host.com", "https");
testurls.put("HTTPS://host.com", "https"); testurls.put("HTTPS://host.com", "https");
testurls.put("Ftp://example.org", "ftp");
testurls.put("FTP://EXAMPLE.ORG", "ftp");
testurls.put("Ftp://host.com", "ftp"); testurls.put("Ftp://host.com", "ftp");
testurls.put("smb://host.com", "smb");
testurls.put("SMB://host.com", "smb"); testurls.put("SMB://host.com", "smb");
testurls.put("/file.com", "file"); testurls.put("/file.com", "file");
testurls.put("file://host.com/file.com", "file"); testurls.put("file://host.com/file.com", "file");
testurls.put("file:///file1.txt", "file");
testurls.put("FILE:///file2.txt", "file");
testurls.put("MAILTO:Abc@host.com", "mailto");
testurls.put("MailTo:Abc@host.com", "mailto"); testurls.put("MailTo:Abc@host.com", "mailto");
for (String txt : testurls.keySet()) { for (String txt : testurls.keySet()) {
@ -258,10 +268,12 @@ public class MultiProtocolURLTest {
Map<String, String> testurls = new HashMap<String, String>(); Map<String, String> testurls = new HashMap<String, String>();
// key=testurl, value=result // key=testurl, value=result
testurls.put("path/file.xml","xml"); // easiest testurls.put("path/file.xml","xml"); // easiest
testurls.put("/FILE.GIF","gif"); // easy upper case
testurls.put("path/file?h.pdf",""); // file w/o extension testurls.put("path/file?h.pdf",""); // file w/o extension
testurls.put("file.html?param=h.pdf","html"); // dot in query part testurls.put("file.html?param=h.pdf","html"); // dot in query part
testurls.put("url?param=h.pdf",""); // dot in query part testurls.put("url?param=h.pdf",""); // dot in query part
testurls.put("file.html?param", "html"); testurls.put("file.html?param", "html");
testurls.put("FILE.GIF?param", "gif");
testurls.put("/path/",""); testurls.put("/path/","");
for (String s : testurls.keySet()) { for (String s : testurls.keySet()) {
System.out.println("test getFileExtension: " + s + " -> " + testurls.get(s)); System.out.println("test getFileExtension: " + s + " -> " + testurls.get(s));

Loading…
Cancel
Save