diff --git a/source/net/yacy/document/TextParser.java b/source/net/yacy/document/TextParser.java
index f077feb7c..b4e27a421 100644
--- a/source/net/yacy/document/TextParser.java
+++ b/source/net/yacy/document/TextParser.java
@@ -66,6 +66,7 @@ import net.yacy.document.parser.xlsParser;
import net.yacy.document.parser.zipParser;
import net.yacy.document.parser.images.genericImageParser;
import net.yacy.document.parser.images.metadataImageParser;
+import net.yacy.document.parser.images.svgParser;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.kelondro.util.MemoryControl;
@@ -105,6 +106,7 @@ public final class TextParser {
initParser(new rtfParser());
initParser(new sevenzipParser());
initParser(new sidAudioParser());
+ initParser(new svgParser());
initParser(new swfParser());
initParser(new tarParser());
initParser(new torrentParser());
diff --git a/source/net/yacy/document/parser/images/svgParser.java b/source/net/yacy/document/parser/images/svgParser.java
new file mode 100644
index 000000000..dda4ff7b5
--- /dev/null
+++ b/source/net/yacy/document/parser/images/svgParser.java
@@ -0,0 +1,257 @@
+/**
+ * svgParser.java
+ * Copyright 2015 by Burkhard Buelte
+ * First released 26.09.2015 at http://yacy.net
+ *
+ * This library is free software; you can redistribute it and/or modify it under
+ * the terms of the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 2.1 of the License, or (at your option)
+ * any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program in the file lgpl21.txt If not, see
+ * .
+ */
+package net.yacy.document.parser.images;
+
+import java.io.EOFException;
+import java.io.InputStream;
+import java.util.LinkedHashMap;
+
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
+
+import net.yacy.cora.document.id.AnchorURL;
+import net.yacy.cora.document.id.DigestURL;
+import net.yacy.cora.document.id.MultiProtocolURL;
+import net.yacy.cora.util.ConcurrentLog;
+import net.yacy.cora.util.NumberTools;
+import net.yacy.document.AbstractParser;
+import net.yacy.document.Document;
+import net.yacy.document.Parser;
+import net.yacy.document.VocabularyScraper;
+import net.yacy.document.parser.html.ImageEntry;
+
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+/**
+ * Metadata parser for svg image files (which are xml files) SVG 1.1 (Second Edition)
+ * http://www.w3.org/TR/SVG/metadata.html#MetadataElement according to SVG 1.1
+ * parser stops parsing after the first metadata elment has been read and
+ * document level metadata are expected picture data (as proposed in spec) like
+ *
+ */
+public class svgParser extends AbstractParser implements Parser {
+
+ public svgParser() {
+ super("SVG Image Parser");
+ this.SUPPORTED_EXTENSIONS.add("svg");
+ this.SUPPORTED_MIME_TYPES.add("image/svg+xml");
+ }
+
+ private static final ThreadLocal tlSax = new ThreadLocal();
+
+ private static SAXParser getParser() throws SAXException {
+ SAXParser parser = tlSax.get();
+ if (parser == null) {
+ try {
+ parser = SAXParserFactory.newInstance().newSAXParser();
+ } catch (final ParserConfigurationException e) {
+ throw new SAXException(e.getMessage(), e);
+ }
+ tlSax.set(parser);
+ }
+ return parser;
+ }
+
+ @Override
+ public Document[] parse(
+ final AnchorURL location,
+ final String mimeType,
+ final String charset,
+ final VocabularyScraper scraper,
+ final int timezoneOffset,
+ final InputStream source) throws Parser.Failure, InterruptedException {
+
+ try {
+ final SAXParser saxParser = getParser();
+ final svgMetaDataHandler metaData = new svgMetaDataHandler();
+ try {
+ saxParser.parse(source, metaData);
+ } catch (SAXException e) {
+ // catch EOFException which is intentionally thrown after capturing metadata to skip further reading (not a error, just a way to get out of SAX)
+ if (e.getException() == null || !(e.getException() instanceof EOFException)) {
+ throw new Parser.Failure("Unexpected error while parsing svg file. " + e.getMessage(), location);
+ }
+ }
+
+ String docTitle = metaData.getTitle();
+ if (docTitle == null) { // use filename like in genericParser
+ docTitle = location.getFileName().isEmpty() ? location.toTokens() : MultiProtocolURL.unescape(location.getFileName()); //
+ }
+ String docDescription = metaData.getDescription();
+ if (docDescription == null) { // use url token as in genericParser
+ docDescription = location.toTokens();
+ }
+
+ LinkedHashMap images = null;
+ // add this image to the map of images to register size (as in genericImageParser)
+ if (metaData.getHeight() != null && metaData.getWidth() != null) {
+ images = new LinkedHashMap();
+ images.put(location, new ImageEntry(location, "", metaData.getWidth(), metaData.getHeight(), -1));
+ }
+
+ // create the parser document
+ Document[] docs = new Document[]{new Document(
+ location,
+ mimeType,
+ "UTF-8",
+ this,
+ null,
+ null,
+ AbstractParser.singleList(docTitle),
+ null,
+ "",
+ null,
+ null,
+ 0.0f, 0.0f,
+ docDescription, // text - for this image description is best text we have
+ null,
+ null,
+ images,
+ false,
+ null)};
+ return docs;
+ } catch (final Exception e) {
+ if (e instanceof InterruptedException) {
+ throw (InterruptedException) e;
+ }
+ if (e instanceof Parser.Failure) {
+ throw (Parser.Failure) e;
+ }
+
+ ConcurrentLog.logException(e);
+ throw new Parser.Failure("Unexpected error while parsing odt file. " + e.getMessage(), location);
+ }
+ }
+
+ /**
+ * SAX handler for svg metadata
+ */
+ public class svgMetaDataHandler extends DefaultHandler {
+
+ private final StringBuilder buffer = new StringBuilder();
+ private boolean scrapeMetaData = false; // true if within metadata tag
+
+ private String docTitle = null; // document level title
+ private String docDescription = null; // document level description
+ private String imgWidth = null; // size in pixel
+ private String imgHeight = null;
+
+ public svgMetaDataHandler() {
+ }
+
+ @Override
+ public void characters(final char ch[], final int start, final int length) {
+ buffer.append(ch, start, length);
+ }
+
+ @Override
+ public void startElement(final String uri, final String name, final String tag, final Attributes atts) throws SAXException {
+ if (scrapeMetaData) {
+ // not implemented yet TODO: interprete RDF content
+ // may contain RDF + DC, DC, CC ...
+ } else {
+ if (null != tag) {
+ switch (tag) {
+ case "svg":
+ imgHeight = atts.getValue("height");
+ imgWidth = atts.getValue("width");
+ break;
+ case "metadata":
+ scrapeMetaData = true;
+ break;
+ // some common graph elements as stop condition (skip reading remainder of input), metadata is expected before graphic content
+ case "g":
+ case "line":
+ case "path":
+ case "rect":
+ throw new SAXException("EOF svg Metadata", new EOFException());
+ }
+ }
+ }
+ buffer.delete(0, buffer.length());
+ }
+
+ @Override
+ public void endElement(final String uri, final String name, final String tag) throws SAXException {
+ if (scrapeMetaData) {
+ // stop condition, scrape only first metadata element
+ if ("metadata".equals(tag)) {
+ scrapeMetaData = false;
+ buffer.delete(0, buffer.length());
+ // we have read metadate, other data are not of interest here, end parsing
+ throw new SAXException("EOF svg Metadata", new EOFException());
+ }
+ } else if ("title".equals(tag)) {
+ this.docTitle = buffer.toString();
+ } else if ("desc".equals(tag)) {
+ this.docDescription = buffer.toString();
+ }
+ buffer.delete(0, buffer.length());
+ }
+
+ /**
+ * @return document level title or null
+ */
+ public String getTitle() {
+ return docTitle;
+ }
+
+ /**
+ * @return document level description or null
+ */
+ public String getDescription() {
+ return docDescription;
+ }
+
+ /**
+ * @return image width in pixel or null
+ */
+ public Integer getWidth() {
+ if (imgWidth != null) {
+ // return number if given in pixel or a number only, return nothing for size like "100%"
+ if ((imgWidth.indexOf("px") > 0) || ((imgWidth.charAt(imgWidth.length() - 1) >= '0' && imgWidth.charAt(imgWidth.length() - 1) <= '9'))) {
+ return NumberTools.parseIntDecSubstring(imgWidth);
+ }
+ }
+ return null;
+ }
+
+ /**
+ * @return image height in pixel or null
+ */
+ public Integer getHeight() {
+ if (imgHeight != null) {
+ // return number if given in pixel or a number only, return nothing for size like "100%"
+ if ((imgHeight.indexOf("px") > 0) || ((imgHeight.charAt(imgHeight.length() - 1) >= '0' && imgHeight.charAt(imgHeight.length() - 1) <= '9'))) {
+ return NumberTools.parseIntDecSubstring(imgHeight);
+ }
+ }
+ return null;
+ }
+ }
+}