Do locale neutral case conversions in MultiProtocolURL

For any relevant URL parts : host name, URL scheme, session ids or
technical parts (see https://url.spec.whatwg.org/#url-writing and
https://tools.ietf.org/html/rfc3986 for current standard references).

Remaining locale sensitive conversion used for detection of URL word
components in urlComps() makes sense but using detected language would
be preferable than using the default system locale.
pull/144/head
luccioman 8 years ago
parent 9531b83598
commit 398c66f06c

@ -216,7 +216,7 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
p = 4;
}
}
this.protocol = url.substring(0, p).toLowerCase().trim().intern();
this.protocol = url.substring(0, p).toLowerCase(Locale.ROOT).trim().intern();
if (url.length() < p + 4) throw new MalformedURLException("URL not parseable: '" + url + "'");
if (!this.protocol.equals("file") && url.substring(p + 1, p + 3).equals("//")) {
// identify host, userInfo and file for http and ftp protocol
@ -417,7 +417,7 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
// a relative path that uses the protocol from the base url
relPath = baseURL.protocol + ":" + relPath;
}
if (relPath.toLowerCase().startsWith("javascript:")) {
if (relPath.toLowerCase(Locale.ROOT).startsWith("javascript:")) {
this.path = baseURL.path;
} else if (
isHTTP(relPath) ||
@ -426,7 +426,7 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
isFile(relPath) ||
isSMB(relPath)) {
this.path = baseURL.path;
} else if (relPath.contains(":") && patternMail.matcher(relPath.toLowerCase()).find()) { // discards also any unknown protocol from previous if
} else if (relPath.contains(":") && patternMail.matcher(relPath.toLowerCase(Locale.ROOT)).find()) { // discards also any unknown protocol from previous if
throw new MalformedURLException("relative path malformed: " + relPath);
} else if (relPath.length() > 0 && relPath.charAt(0) == '/') {
this.path = relPath;
@ -647,7 +647,7 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
sbuf.append("%25"); // '%' RFC 1738 2.2 unsafe char shall be encoded
}
} else if (ch == '&') {
if (i < len - 6 && "amp;".equals(s.substring(i + 1, i + 5).toLowerCase())) {
if (i < len - 6 && "amp;".equals(s.substring(i + 1, i + 5).toLowerCase(Locale.ROOT))) {
sbuf.append((char)ch); // leave it that way, it is used the right way
} else {
sbuf.append("%26"); // this must be urlencoded
@ -799,7 +799,7 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
String q = this.searchpart;
if (removeSessionID) {
for (final String sid: sessionIDnames.keySet()) {
if (q.toLowerCase().startsWith(sid.toLowerCase() + "=")) {
if (q.toLowerCase(Locale.ROOT).startsWith(sid.toLowerCase(Locale.ROOT) + "=")) {
final int p = q.indexOf('&');
if (p < 0) {
if (excludeAnchor || this.anchor == null) return this.path;
@ -812,7 +812,7 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
q = q.substring(p + 1);
continue;
}
final int p = q.toLowerCase().indexOf("&" + sid.toLowerCase() + "=",0);
final int p = q.toLowerCase(Locale.ROOT).indexOf("&" + sid.toLowerCase(Locale.ROOT) + "=",0);
if (p < 0) continue;
final int p1 = q.indexOf('&', p+1);
if (p1 < 0) {
@ -852,14 +852,14 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
if (p < 0) return "";
final int q = fileName.lastIndexOf('?');
if (q < 0) {
return fileName.substring(p + 1).toLowerCase();
return fileName.substring(p + 1).toLowerCase(Locale.ROOT);
}
// check last dot in query part
if (p > q) {
p = fileName.lastIndexOf('.', q);
if (p < 0) return "";
}
return fileName.substring(p + 1, q).toLowerCase();
return fileName.substring(p + 1, q).toLowerCase(Locale.ROOT);
}
/**
@ -933,7 +933,7 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
public InetAddress getInetAddress() {
if (this.hostAddress != null) return this.hostAddress;
if (this.host == null) return null; // this may happen for file:// urls
this.hostAddress = Domains.dnsResolve(this.host.toLowerCase());
this.hostAddress = Domains.dnsResolve(this.host.toLowerCase(Locale.ROOT));
return this.hostAddress;
}
@ -1117,7 +1117,7 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
u.append(this.userInfo);
u.append("@");
}
u.append(h.toLowerCase());
u.append(h.toLowerCase(Locale.ROOT));
}
if (!defaultPort) {
u.append(":");
@ -1165,7 +1165,7 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
u.append(this.userInfo);
u.append("@");
}
u.append(h.toLowerCase());
u.append(h.toLowerCase(Locale.ROOT));
}
if (!defaultPort) {
u.append(":");
@ -1224,7 +1224,7 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
}
public static final boolean isCGI(final String extension) {
return extension != null && extension.length() > 0 && "cgi.exe".indexOf(extension.toLowerCase()) >= 0;
return extension != null && extension.length() > 0 && "cgi.exe".indexOf(extension.toLowerCase(Locale.ROOT)) >= 0;
}
/**
@ -1232,14 +1232,14 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
*/
@Deprecated
public static final boolean isImage(final String extension) {
return extension != null && extension.length() > 0 && Response.docTypeExt(extension.toLowerCase()) == Response.DT_IMAGE;
return extension != null && extension.length() > 0 && Response.docTypeExt(extension.toLowerCase(Locale.ROOT)) == Response.DT_IMAGE;
}
public final boolean isIndividual() {
final String q = unescape(this.path.toLowerCase());
final String q = unescape(this.path.toLowerCase(Locale.ROOT));
for (final String sid: sessionIDnames.keySet()) {
if (q.startsWith(sid.toLowerCase() + "=")) return true;
final int p = q.indexOf("&" + sid.toLowerCase() + "=",0);
if (q.startsWith(sid.toLowerCase(Locale.ROOT) + "=")) return true;
final int p = q.indexOf("&" + sid.toLowerCase(Locale.ROOT) + "=",0);
if (p >= 0) return true;
}
int pos;
@ -1273,7 +1273,7 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
String language = "en";
if (this.host == null) return language;
final int pos = this.host.lastIndexOf('.');
String host_tld = this.host.substring(pos + 1).toLowerCase();
String host_tld = this.host.substring(pos + 1).toLowerCase(Locale.ROOT);
if (pos == 0) return language;
int length = this.host.length() - pos - 1;
switch (length) {
@ -2395,6 +2395,7 @@ public class MultiProtocolURL implements Serializable, Comparable<MultiProtocolU
public static String[] urlComps(String normalizedURL) {
final int p = normalizedURL.indexOf("//",0);
if (p > 0) normalizedURL = normalizedURL.substring(p + 2);
// TODO lowering case in a locale sensitive manner makes sense here, but the used language locale should not dependant on the default system locale
return splitpattern.split(normalizedURL.toLowerCase()); // word components of the url
}

@ -14,8 +14,10 @@ import java.util.Map;
import java.util.TreeSet;
import org.junit.Test;
//import junit.framework.TestCase;
/**
* Automated unit tests for the {@link MultiProtocolURL} class.
*/
public class MultiProtocolURLTest {
@Test
@ -169,11 +171,19 @@ public class MultiProtocolURLTest {
Map<String, String> testurls = new HashMap<String, String>();
// ( 1. parameter = urlstring to test, 2. parameter = expected protocol)
testurls.put("http://host.com", "http");
testurls.put("HTTP://EXAMPLE.COM", "http");
testurls.put("https://host.com", "https");
testurls.put("HTTPS://host.com", "https");
testurls.put("Ftp://example.org", "ftp");
testurls.put("FTP://EXAMPLE.ORG", "ftp");
testurls.put("Ftp://host.com", "ftp");
testurls.put("smb://host.com", "smb");
testurls.put("SMB://host.com", "smb");
testurls.put("/file.com", "file");
testurls.put("file://host.com/file.com", "file");
testurls.put("file:///file1.txt", "file");
testurls.put("FILE:///file2.txt", "file");
testurls.put("MAILTO:Abc@host.com", "mailto");
testurls.put("MailTo:Abc@host.com", "mailto");
for (String txt : testurls.keySet()) {
@ -258,10 +268,12 @@ public class MultiProtocolURLTest {
Map<String, String> testurls = new HashMap<String, String>();
// key=testurl, value=result
testurls.put("path/file.xml","xml"); // easiest
testurls.put("/FILE.GIF","gif"); // easy upper case
testurls.put("path/file?h.pdf",""); // file w/o extension
testurls.put("file.html?param=h.pdf","html"); // dot in query part
testurls.put("url?param=h.pdf",""); // dot in query part
testurls.put("file.html?param", "html");
testurls.put("FILE.GIF?param", "gif");
testurls.put("/path/","");
for (String s : testurls.keySet()) {
System.out.println("test getFileExtension: " + s + " -> " + testurls.get(s));

Loading…
Cancel
Save