diff --git a/source/net/yacy/document/parser/html/ContentScraper.java b/source/net/yacy/document/parser/html/ContentScraper.java
index b4a657672..217db08eb 100644
--- a/source/net/yacy/document/parser/html/ContentScraper.java
+++ b/source/net/yacy/document/parser/html/ContentScraper.java
@@ -31,6 +31,7 @@ import java.io.IOException;
import java.io.Writer;
import java.net.MalformedURLException;
import java.nio.charset.Charset;
+import java.text.NumberFormat;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Date;
@@ -366,13 +367,15 @@ public class ContentScraper extends AbstractScraper implements Scraper {
if (src.length() > 0) {
final AnchorURL url = absolutePath(src);
if (url != null) {
- final int width = Integer.parseInt(tag.opts.getProperty("width", "-1"));
- final int height = Integer.parseInt(tag.opts.getProperty("height", "-1"));
+ // use Numberformat.parse to allow parse of "550px"
+ NumberFormat intnum = NumberFormat.getIntegerInstance ();
+ final int width = intnum.parse(tag.opts.getProperty("width", "-1")).intValue(); // Integer.parseInt fails on "200px"
+ final int height = intnum.parse(tag.opts.getProperty("height", "-1")).intValue();
final ImageEntry ie = new ImageEntry(url, tag.opts.getProperty("alt", EMPTY_STRING), width, height, -1);
this.images.add(ie);
}
}
- } catch (final NumberFormatException e) {}
+ } catch (final ParseException e) {}
this.evaluationScores.match(Element.imgpath, src);
} else if(tag.name.equalsIgnoreCase("base")) {
try {
diff --git a/test/net/yacy/cora/document/id/DigestURLTest.java b/test/net/yacy/cora/document/id/DigestURLTest.java
index 059650bac..227fe9858 100644
--- a/test/net/yacy/cora/document/id/DigestURLTest.java
+++ b/test/net/yacy/cora/document/id/DigestURLTest.java
@@ -10,7 +10,7 @@ public class DigestURLTest extends TestCase {
public void testIdentPort() throws MalformedURLException {
String[][] testStrings = new String[][]{
new String[]{"http://www.yacy.net:", "http://www.yacy.net/"},
- new String[]{"http://www.yacy.net:-1", "http://www.yacy.net/"},
+ new String[]{"http://www.yacy.net:80", "http://www.yacy.net/"},
new String[]{"http://www.yacy.net:/", "http://www.yacy.net/"},
new String[]{"http://www.yacy.net: /", "http://www.yacy.net/"}
};
diff --git a/test/net/yacy/document/parser/htmlParserTest.java b/test/net/yacy/document/parser/htmlParserTest.java
index 9c0fafd93..beece1948 100644
--- a/test/net/yacy/document/parser/htmlParserTest.java
+++ b/test/net/yacy/document/parser/htmlParserTest.java
@@ -13,6 +13,7 @@ import net.yacy.cora.document.id.AnchorURL;
import net.yacy.document.Document;
import net.yacy.document.Parser;
import net.yacy.document.parser.html.ContentScraper;
+import net.yacy.document.parser.html.ImageEntry;
import static net.yacy.document.parser.htmlParser.parseToScraper;
import org.junit.Test;
@@ -94,10 +95,11 @@ public class htmlParserTest extends TestCase {
// expectation to deliver pure text as it is possibly indexed in outboundlinks_anchortext_txt/inboundlinks_anchortext_txt
final AnchorURL url = new AnchorURL("http://localhost/");
final String mimetype = "text/html";
- final String testhtml = ""
+ final String testhtml = ""
+ "testtext" // "testtext"
+ " Start" // "Start"
+ "
" // "" + image
+ + "
" // + img width 550 (+html5 figure)
+ "";
ContentScraper scraper = parseToScraper(url, mimetype, testhtml, 10);
@@ -113,6 +115,8 @@ public class htmlParserTest extends TestCase {
assertEquals("", linktxt);
int cnt = scraper.getImages().size();
- assertEquals(1,cnt);
+ assertEquals(2,cnt);
+ ImageEntry img = scraper.getImages().get(1);
+ assertEquals(550,img.width());
}
}