diff --git a/source/net/yacy/document/parser/html/ContentScraper.java b/source/net/yacy/document/parser/html/ContentScraper.java index b4a657672..217db08eb 100644 --- a/source/net/yacy/document/parser/html/ContentScraper.java +++ b/source/net/yacy/document/parser/html/ContentScraper.java @@ -31,6 +31,7 @@ import java.io.IOException; import java.io.Writer; import java.net.MalformedURLException; import java.nio.charset.Charset; +import java.text.NumberFormat; import java.text.ParseException; import java.util.ArrayList; import java.util.Date; @@ -366,13 +367,15 @@ public class ContentScraper extends AbstractScraper implements Scraper { if (src.length() > 0) { final AnchorURL url = absolutePath(src); if (url != null) { - final int width = Integer.parseInt(tag.opts.getProperty("width", "-1")); - final int height = Integer.parseInt(tag.opts.getProperty("height", "-1")); + // use Numberformat.parse to allow parse of "550px" + NumberFormat intnum = NumberFormat.getIntegerInstance (); + final int width = intnum.parse(tag.opts.getProperty("width", "-1")).intValue(); // Integer.parseInt fails on "200px" + final int height = intnum.parse(tag.opts.getProperty("height", "-1")).intValue(); final ImageEntry ie = new ImageEntry(url, tag.opts.getProperty("alt", EMPTY_STRING), width, height, -1); this.images.add(ie); } } - } catch (final NumberFormatException e) {} + } catch (final ParseException e) {} this.evaluationScores.match(Element.imgpath, src); } else if(tag.name.equalsIgnoreCase("base")) { try { diff --git a/test/net/yacy/cora/document/id/DigestURLTest.java b/test/net/yacy/cora/document/id/DigestURLTest.java index 059650bac..227fe9858 100644 --- a/test/net/yacy/cora/document/id/DigestURLTest.java +++ b/test/net/yacy/cora/document/id/DigestURLTest.java @@ -10,7 +10,7 @@ public class DigestURLTest extends TestCase { public void testIdentPort() throws MalformedURLException { String[][] testStrings = new String[][]{ new String[]{"http://www.yacy.net:", "http://www.yacy.net/"}, - new String[]{"http://www.yacy.net:-1", "http://www.yacy.net/"}, + new String[]{"http://www.yacy.net:80", "http://www.yacy.net/"}, new String[]{"http://www.yacy.net:/", "http://www.yacy.net/"}, new String[]{"http://www.yacy.net: /", "http://www.yacy.net/"} }; diff --git a/test/net/yacy/document/parser/htmlParserTest.java b/test/net/yacy/document/parser/htmlParserTest.java index 9c0fafd93..beece1948 100644 --- a/test/net/yacy/document/parser/htmlParserTest.java +++ b/test/net/yacy/document/parser/htmlParserTest.java @@ -13,6 +13,7 @@ import net.yacy.cora.document.id.AnchorURL; import net.yacy.document.Document; import net.yacy.document.Parser; import net.yacy.document.parser.html.ContentScraper; +import net.yacy.document.parser.html.ImageEntry; import static net.yacy.document.parser.htmlParser.parseToScraper; import org.junit.Test; @@ -94,10 +95,11 @@ public class htmlParserTest extends TestCase { // expectation to deliver pure text as it is possibly indexed in outboundlinks_anchortext_txt/inboundlinks_anchortext_txt final AnchorURL url = new AnchorURL("http://localhost/"); final String mimetype = "text/html"; - final String testhtml = "" + final String testhtml = "" + "testtext" // "testtext" + " Start" // "Start" + "" // "" + image + + "
\"image" // + img width 550 (+html5 figure) + ""; ContentScraper scraper = parseToScraper(url, mimetype, testhtml, 10); @@ -113,6 +115,8 @@ public class htmlParserTest extends TestCase { assertEquals("", linktxt); int cnt = scraper.getImages().size(); - assertEquals(1,cnt); + assertEquals(2,cnt); + ImageEntry img = scraper.getImages().get(1); + assertEquals(550,img.width()); } }