diff --git a/source/net/yacy/cora/document/id/DigestURL.java b/source/net/yacy/cora/document/id/DigestURL.java index 40328b1c4..21dd3a04b 100644 --- a/source/net/yacy/cora/document/id/DigestURL.java +++ b/source/net/yacy/cora/document/id/DigestURL.java @@ -244,11 +244,12 @@ public class DigestURL extends MultiProtocolURL implements Serializable { // find rootpath int rootpathStart = 0; int rootpathEnd = this.path.length() - 1; - if (!this.path.isEmpty() && this.path.charAt(0) == '/') + if (!this.path.isEmpty() && (this.path.charAt(0) == '/' || this.path.charAt(0) == '\\')) rootpathStart = 1; if (this.path.endsWith("/")) rootpathEnd = this.path.length() - 2; p = this.path.indexOf('/', rootpathStart); + if (this.isFile() && p < 0) p = this.path.indexOf('\\', rootpathStart); // double-check for windows path (if it's a file url) String rootpath = ""; if (p > 0 && p < rootpathEnd) { rootpath = this.path.substring(rootpathStart, p); @@ -264,7 +265,7 @@ public class DigestURL extends MultiProtocolURL implements Serializable { final StringBuilder hashs = new StringBuilder(12); assert hashs.length() == 0; // form the 'local' part of the hash - final String normalform = toNormalform(true, true); + final String normalform = toNormalform(true, true); // normalizes also Windows backslash in path to '/' for file url final String b64l = Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(normalform)); if (b64l.length() < 5) return null; hashs.append(b64l.substring(0, 5)); // 5 chars diff --git a/source/net/yacy/cora/document/id/MultiProtocolURL.java b/source/net/yacy/cora/document/id/MultiProtocolURL.java index 64570dd4c..640bc5451 100644 --- a/source/net/yacy/cora/document/id/MultiProtocolURL.java +++ b/source/net/yacy/cora/document/id/MultiProtocolURL.java @@ -832,7 +832,7 @@ public class MultiProtocolURL implements Serializable, Comparable= 0) { // normalize windows backslash (important for hash computation) + urlPath = urlPath.replace('\\', '/'); + } u.append(urlPath); String result = u.toString(); diff --git a/test/java/net/yacy/cora/document/id/DigestURLTest.java b/test/java/net/yacy/cora/document/id/DigestURLTest.java index 587a4ee27..33eaec695 100644 --- a/test/java/net/yacy/cora/document/id/DigestURLTest.java +++ b/test/java/net/yacy/cora/document/id/DigestURLTest.java @@ -2,6 +2,7 @@ package net.yacy.cora.document.id; import java.net.MalformedURLException; import junit.framework.TestCase; +import net.yacy.cora.document.encoding.ASCII; import org.junit.Test; public class DigestURLTest extends TestCase { @@ -30,4 +31,23 @@ public class DigestURLTest extends TestCase { } } + /** + * Test hash() of DigestURL and File protocol to deliver same hash for + * allowed Windows or Java notation of same file + */ + @Test + public void testHash_ForFile() throws MalformedURLException { + String winUrlStr = "file:///C:\\tmp\\test.html"; // allowed Windows notation + String javaUrlStr = "file:///C:/tmp/test.html"; // allowed Java notation for Windows file system + + DigestURL winUrl = new DigestURL(winUrlStr); + DigestURL javaUrl = new DigestURL(javaUrlStr); + + String winHashResult = ASCII.String(winUrl.hash()); + String javaHashResult = ASCII.String(javaUrl.hash()); + + assertEquals("hash for same file url", javaHashResult, winHashResult); + + } + }