fixed documentation and some details of handling of keywords

2 years ago · 1c0f50985c
parent 5cb7dc1fd7
commit 1c0f50985c
5 changed files with 205 additions and 181 deletions
--- a/defaults/solr.collection.schema
+++ b/defaults/solr.collection.schema
@ -180,7 +180,7 @@ description_txt
 ## flag shows if description is unique within all indexable documents of the same host with status code 200; if yes and another document appears with same description, the unique-flag is set to false, boolean
 #description_unique_b

-## content of keywords tag; words are separated by space
+## content of keywords tag; words are separated by comma, semicolon or space
 keywords

 ## character encoding, string
--- a/defaults/solr/schema.xml
+++ b/defaults/solr/schema.xml
@ -76,7 +76,7 @@
        <!-- content of author-tag -->
        <field name="author" type="text_general" indexed="true" stored="true"/>

-        <!-- content of keywords tag; words are separated by space -->
+        <!-- content of keywords tag; words are separated by comma, semicolon or spacee -->
        <field name="keywords" type="text_general" indexed="true" stored="true"/>
        
        <!-- all visible text -->
--- a/locales/uk.lng
+++ b/locales/uk.lng
--- a/source/net/yacy/document/parser/html/ContentScraper.java
+++ b/source/net/yacy/document/parser/html/ContentScraper.java
@ -80,10 +80,10 @@ public class ContentScraper extends AbstractScraper implements Scraper {

    // statics: for initialization of the HTMLFilterAbstractScraper
    /** Set of tag names processed as singletons (no end tag, or not processing the eventual end tag) */
-    private static final Set<String> linkTags0 = new HashSet<String>(12,0.99f);
+    private static final Set<String> linkTags0 = new HashSet<>(12,0.99f);

    /** Set of tag names processed by pairs of start and end tag */
-    private static final Set<String> linkTags1 = new HashSet<String>(15,0.99f);
+    private static final Set<String> linkTags1 = new HashSet<>(15,0.99f);

    private static final Pattern LB = Pattern.compile("\n");

@ -147,19 +147,19 @@ public class ContentScraper extends AbstractScraper implements Scraper {
        public Properties opts;
        public CharBuffer content;
        private TagValency tv;
-        public Tag(final String name, TagValency defaultValency) {
+        public Tag(final String name, final TagValency defaultValency) {
            this.name = name;
            this.tv = defaultValency;
            this.opts = new Properties();
            this.content = new CharBuffer(MAX_TAGSIZE);
        }
-        public Tag(final String name, TagValency defaultValency, final Properties opts) {
+        public Tag(final String name, final TagValency defaultValency, final Properties opts) {
            this.name = name;
            this.tv = defaultValency;
            this.opts = opts;
            this.content = new CharBuffer(MAX_TAGSIZE);
        }
-        public Tag(final String name, TagValency defaultValency, final Properties opts, final CharBuffer content) {
+        public Tag(final String name, final TagValency defaultValency, final Properties opts, final CharBuffer content) {
            this.name = name;
            this.tv = defaultValency;
            this.opts = opts;
@ -173,7 +173,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
        }
        @Override
        public String toString() {
-            return "<" + name + " " + opts + ">" + content + "</" + name + ">";
+            return "<" + this.name + " " + this.opts + ">" + this.content + "</" + this.name + ">";
        }

        /** @return true when this tag should be ignored from scraping */
@ -212,7 +212,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {

    private final SizeLimitedMap<String, String> metas;
    private final SizeLimitedMap<String, DigestURL> hreflang, navigation;
-    private LinkedHashSet<String> titles;
+    private final LinkedHashSet<String> titles;
    private final List<String> articles;
    private final List<Date> startDates, endDates;
    //private String headline;
@ -274,7 +274,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
            final Set<String> valencySwitchTagNames,
            final TagValency defaultValency,
            final VocabularyScraper vocabularyScraper,
-            int timezoneOffset) {
+            final int timezoneOffset) {
        // the root value here will not be used to load the resource.
        // it is only the reference for relative links
        super(linkTags0, linkTags1);
@ -285,31 +285,31 @@ public class ContentScraper extends AbstractScraper implements Scraper {
        this.defaultValency = defaultValency;
        this.timezoneOffset = timezoneOffset;
        this.evaluationScores = new Evaluation();
-        this.rss = new SizeLimitedMap<DigestURL, String>(maxLinks);
-        this.css = new SizeLimitedMap<DigestURL, String>(maxLinks);
-        this.anchors = new ArrayList<AnchorURL>();
-        this.images = new ArrayList<ImageEntry>();
+        this.rss = new SizeLimitedMap<>(maxLinks);
+        this.css = new SizeLimitedMap<>(maxLinks);
+        this.anchors = new ArrayList<>();
+        this.images = new ArrayList<>();
        this.icons = new HashMap<>();
-        this.embeds = new SizeLimitedMap<AnchorURL, EmbedEntry>(maxLinks);
-        this.frames = new SizeLimitedSet<AnchorURL>(maxLinks);
-        this.iframes = new SizeLimitedSet<AnchorURL>(maxLinks);
+        this.embeds = new SizeLimitedMap<>(maxLinks);
+        this.frames = new SizeLimitedSet<>(maxLinks);
+        this.iframes = new SizeLimitedSet<>(maxLinks);
        this.linkedDataTypes = new SizeLimitedSet<>(maxLinks);
-        this.metas = new SizeLimitedMap<String, String>(maxLinks);
-        this.hreflang = new SizeLimitedMap<String, DigestURL>(maxLinks);
-        this.navigation = new SizeLimitedMap<String, DigestURL>(maxLinks);
-        this.script = new SizeLimitedSet<AnchorURL>(maxLinks);
-        this.titles = new LinkedHashSet<String>();
-        this.articles = new ArrayList<String>();
+        this.metas = new SizeLimitedMap<>(maxLinks);
+        this.hreflang = new SizeLimitedMap<>(maxLinks);
+        this.navigation = new SizeLimitedMap<>(maxLinks);
+        this.script = new SizeLimitedSet<>(maxLinks);
+        this.titles = new LinkedHashSet<>();
+        this.articles = new ArrayList<>();
        this.startDates = new ArrayList<>();
        this.endDates = new ArrayList<>();
        this.headlines = (List<String>[]) Array.newInstance(ArrayList.class, 6);
-        for (int i = 0; i < this.headlines.length; i++) this.headlines[i] = new ArrayList<String>();
-        this.bold = new ClusteredScoreMap<String>(false);
-        this.italic = new ClusteredScoreMap<String>(false);
-        this.underline = new ClusteredScoreMap<String>(false);
-        this.li = new ArrayList<String>();
-        this.dt = new ArrayList<String>();
-        this.dd = new ArrayList<String>();
+        for (int i = 0; i < this.headlines.length; i++) this.headlines[i] = new ArrayList<>();
+        this.bold = new ClusteredScoreMap<>(false);
+        this.italic = new ClusteredScoreMap<>(false);
+        this.underline = new ClusteredScoreMap<>(false);
+        this.li = new ArrayList<>();
+        this.dt = new ArrayList<>();
+        this.dd = new ArrayList<>();
        this.content = new CharBuffer(MAX_DOCSIZE, 1024);
        this.htmlFilterEventListeners = new EventListenerList();
        this.lon = 0.0d;
@ -336,10 +336,11 @@ public class ContentScraper extends AbstractScraper implements Scraper {
            final Set<String> valencySwitchTagNames,
            final TagValency defaultValency,
            final VocabularyScraper vocabularyScraper,
-            int timezoneOffset) {
+            final int timezoneOffset) {
        this(root, Integer.MAX_VALUE, maxLinks, valencySwitchTagNames, defaultValency, vocabularyScraper, timezoneOffset);
    }

+    @Override
    public TagValency defaultValency() {
        return this.defaultValency;
    }
@ -360,7 +361,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
            }
        }
        int p, pl, q, s = 0;
-        char[] newtext = CharacterCoding.html2unicode(new String(newtext0)).toCharArray();
+        final char[] newtext = CharacterCoding.html2unicode(new String(newtext0)).toCharArray();

        // match evaluation pattern
        this.evaluationScores.match(Element.text, newtext);
@ -430,7 +431,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
        }
        // find absolute URLs inside text
        final Object[] listeners = this.htmlFilterEventListeners.getListenerList();
-        List<ContentScraperListener> anchorListeners = new ArrayList<>();
+        final List<ContentScraperListener> anchorListeners = new ArrayList<>();
        for (int i = 0; i < listeners.length; i += 2) {
            if (listeners[i] == ContentScraperListener.class) {
                anchorListeners.add((ContentScraperListener)listeners[i+1]);
@ -508,7 +509,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
                    urls.add(url);
                }
                if(listeners != null) {
-                    for(ContentScraperListener listener : listeners) {
+                    for(final ContentScraperListener listener : listeners) {
                        listener.anchorAdded(url.toNormalform(false));
                    }
                }
@ -636,7 +637,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
        this.vocabularyScraper.check(this.root, classprop, tag.content);

        // itemprop microdata property (standard definition at https://www.w3.org/TR/microdata/#dfn-attr-itemprop)
-        String itemprop = tag.opts.getProperty("itemprop");
+        final String itemprop = tag.opts.getProperty("itemprop");
        if (itemprop != null) {
            String propval = tag.opts.getProperty("content"); // value for <meta itemprop="" content=""> see https://html.spec.whatwg.org/multipage/microdata.html#values
            if (propval == null) propval = tag.opts.getProperty("datetime"); // html5 + schema.org#itemprop example: <time itemprop="startDate" datetime="2016-01-26">today</time> while each prop is optional
@ -654,16 +655,16 @@ public class ContentScraper extends AbstractScraper implements Scraper {
                    case "startDate": // <meta itemprop="startDate" content="2016-04-21T20:00">
                        try {
                            // parse ISO 8601 date
-                            Date startDate = ISO8601Formatter.FORMATTER.parse(propval, this.timezoneOffset).getTime();
+                            final Date startDate = ISO8601Formatter.FORMATTER.parse(propval, this.timezoneOffset).getTime();
                            this.startDates.add(startDate);
-                        } catch (ParseException e) {}
+                        } catch (final ParseException e) {}
                        break;
                    case "endDate":
                        try {
                            // parse ISO 8601 date
-                            Date endDate = ISO8601Formatter.FORMATTER.parse(propval, this.timezoneOffset).getTime();
+                            final Date endDate = ISO8601Formatter.FORMATTER.parse(propval, this.timezoneOffset).getTime();
                            this.endDates.add(endDate);
-                        } catch (ParseException e) {}
+                        } catch (final ParseException e) {}
                        break;
                }
            }
@ -679,16 +680,16 @@ public class ContentScraper extends AbstractScraper implements Scraper {
     *            sizes attribute string, may be null
     * @return a set of sizes eventually empty.
     */
-    public static Set<Dimension> parseSizes(String sizesAttr) {
-        Set<Dimension> sizes = new HashSet<Dimension>();
-        Set<String> tokens = parseSpaceSeparatedTokens(sizesAttr);
-        for (String token : tokens) {
+    public static Set<Dimension> parseSizes(final String sizesAttr) {
+        final Set<Dimension> sizes = new HashSet<>();
+        final Set<String> tokens = parseSpaceSeparatedTokens(sizesAttr);
+        for (final String token : tokens) {
            /*
             * "any" keyword may be present, but doesn't have to produce a
             * dimension result
             */
            if (token != null) {
-                Matcher matcher = IconEntry.SIZE_PATTERN.matcher(token);
+                final Matcher matcher = IconEntry.SIZE_PATTERN.matcher(token);
                if (matcher.matches()) {
                    /* With given pattern no NumberFormatException can occur */
                    sizes.add(new Dimension(Integer.parseInt(matcher.group(1)), Integer.parseInt(matcher.group(2))));
@ -708,11 +709,11 @@ public class ContentScraper extends AbstractScraper implements Scraper {
     * @return a set of tokens eventually empty
     */
    public static Set<String> parseSpaceSeparatedTokens(final String attr) {
-        Set<String> tokens = new HashSet<>();
+        final Set<String> tokens = new HashSet<>();
        /* Check attr string is not empty to avoid adding a single empty string
         * in result */
        if (attr != null && !attr.trim().isEmpty()) {
-            String[] items = attr.trim().split(CommonPattern.SPACES.pattern());
+            final String[] items = attr.trim().split(CommonPattern.SPACES.pattern());
            Collections.addAll(tokens, items);
        }
        return tokens;
@ -723,9 +724,9 @@ public class ContentScraper extends AbstractScraper implements Scraper {
     * @param relTokens relationship tokens (parsed from a rel attribute)
     * @return a Set of icon relations, eventually empty
     */
-    public Set<String> retainIconRelations(Collection<String> relTokens) {
-        HashSet<String> iconRels = new HashSet<>();
-        for(String token : relTokens) {
+    public Set<String> retainIconRelations(final Collection<String> relTokens) {
+        final HashSet<String> iconRels = new HashSet<>();
+        for(final String token : relTokens) {
            if(IconLinkRelations.isIconRel(token)) {
                iconRels.add(token.toLowerCase(Locale.ENGLISH));
            }
@ -803,7 +804,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
            final String href  = tag.opts.getProperty("href", EMPTY_STRING);
            if (href.length() > 0) {
                tag.opts.put("name", areatitle);
-                AnchorURL url = absolutePath(href);
+                final AnchorURL url = absolutePath(href);
                if(url != null) {
                    tag.opts.put("href", url.toNormalform(true));
                    url.setAll(tag.opts);
@ -816,19 +817,19 @@ public class ContentScraper extends AbstractScraper implements Scraper {

            if (newLink != null) {
                tag.opts.put("href", newLink.toNormalform(true));
-                String rel = tag.opts.getProperty("rel", EMPTY_STRING);
+                final String rel = tag.opts.getProperty("rel", EMPTY_STRING);
                /* Rel attribute is supposed to be a set of space-separated tokens */
-                Set<String> relTokens = parseSpaceSeparatedTokens(rel);
+                final Set<String> relTokens = parseSpaceSeparatedTokens(rel);

                final String linktitle = tag.opts.getProperty("title", EMPTY_STRING);
                final String type = tag.opts.getProperty("type", EMPTY_STRING);
                final String hreflang = tag.opts.getProperty("hreflang", EMPTY_STRING);

-                Set<String> iconRels = retainIconRelations(relTokens);
+                final Set<String> iconRels = retainIconRelations(relTokens);
                /* Distinguish icons from images. It will enable for example to later search only images and no icons */
                if (!iconRels.isEmpty()) {
-                    String sizesAttr = tag.opts.getProperty("sizes", EMPTY_STRING);
-                    Set<Dimension> sizes = parseSizes(sizesAttr);
+                    final String sizesAttr = tag.opts.getProperty("sizes", EMPTY_STRING);
+                    final Set<Dimension> sizes = parseSizes(sizesAttr);
                    IconEntry icon = this.icons.get(newLink);
                    /* There is already an icon with same URL for this document :
                     * they may have different rel attribute or different sizes (multi sizes ico file) or this may be a duplicate */
@ -880,7 +881,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
        } else if(tag.name.equalsIgnoreCase("param")) {
            final String name = tag.opts.getProperty("name", EMPTY_STRING);
            if (name.equalsIgnoreCase("movie")) {
-                AnchorURL url = absolutePath(tag.opts.getProperty("value", EMPTY_STRING));
+                final AnchorURL url = absolutePath(tag.opts.getProperty("value", EMPTY_STRING));
                if(url != null) {
                    tag.opts.put("value", url.toNormalform(true));
                    url.setAll(tag.opts);
@ -918,7 +919,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
        checkOpts(tag);
        // System.out.println("ScrapeTag1: tag.tagname=" + tag.tagname + ", opts=" + tag.opts.toString() + ", text=" + UTF8.String(text));
        if (tag.name.equalsIgnoreCase("a") && tag.content.length() < 2048) {
-            String href = tag.opts.getProperty("href", EMPTY_STRING);
+            final String href = tag.opts.getProperty("href", EMPTY_STRING);
            AnchorURL url;
            if ((href.length() > 0) && ((url = absolutePath(href)) != null)) {
                if (followDenied()) {
@ -939,7 +940,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
           this.evaluationScores.match(Element.divid, id);
           final String itemtype = tag.opts.getProperty("itemtype", EMPTY_STRING);
           if (itemtype.equals("http://data-vocabulary.org/Breadcrumb")) {
-               breadcrumbs++;
+               this.breadcrumbs++;
           }
        } else if ((tag.name.equalsIgnoreCase("h1")) && (tag.content.length() < 1024)) {
            h = cleanLine(CharacterCoding.html2unicode(stripAllTags(tag.content.getChars())));
@ -990,7 +991,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
        } else if (tag.name.equalsIgnoreCase("script")) {
            final String src = tag.opts.getProperty("src", EMPTY_STRING);
            if (src.length() > 0) {
-                AnchorURL absoluteSrc = absolutePath(src);
+                final AnchorURL absoluteSrc = absolutePath(src);
                if(absoluteSrc != null) {
                    this.script.add(absoluteSrc);
                }
@ -1005,9 +1006,9 @@ public class ContentScraper extends AbstractScraper implements Scraper {
            h = tag.opts.getProperty("datetime"); // TODO: checkOpts() also parses datetime property if in combination with schema.org itemprop=startDate/endDate
            if (h != null) { // datetime property is optional
                try {
-                    Date startDate = ISO8601Formatter.FORMATTER.parse(h, this.timezoneOffset).getTime();
+                    final Date startDate = ISO8601Formatter.FORMATTER.parse(h, this.timezoneOffset).getTime();
                    this.startDates.add(startDate);
-                } catch (ParseException ex) { }
+                } catch (final ParseException ex) { }
            }
        }

@ -1049,7 +1050,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
     * Add an anchor to the anchors list, and trigger any eventual listener
     * @param anchor anchor to add. Must not be null.
     */
-    protected void addAnchor(AnchorURL anchor) {
+    protected void addAnchor(final AnchorURL anchor) {
        if(this.anchors.size() >= this.maxAnchors) {
            this.maxAnchorsExceeded = true;
        } else {
@ -1067,7 +1068,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
    public List<String> getTitles() {

        // some documents have a title tag as meta tag
-        String s = this.metas.get("title");
+        final String s = this.metas.get("title");
        if (s != null && s.length() > 0) {
            this.titles.add(s);
        }
@ -1083,7 +1084,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
        }

        // extract headline from file name
-        ArrayList<String> t = new ArrayList<String>();
+        final ArrayList<String> t = new ArrayList<>();
        t.addAll(this.titles);
        return t;
    }
@ -1094,7 +1095,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
    }

    public String[] getBold() {
-        final List<String> a = new ArrayList<String>();
+        final List<String> a = new ArrayList<>();
        final Iterator<String> i = this.bold.keys(false);
        while (i.hasNext()) a.add(i.next());
        return a.toArray(new String[a.size()]);
@ -1107,7 +1108,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
    }

    public String[] getItalic() {
-        final List<String> a = new ArrayList<String>();
+        final List<String> a = new ArrayList<>();
        final Iterator<String> i = this.italic.keys(false);
        while (i.hasNext()) a.add(i.next());
        return a.toArray(new String[a.size()]);
@ -1120,7 +1121,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
    }

    public String[] getUnderline() {
-        final List<String> a = new ArrayList<String>();
+        final List<String> a = new ArrayList<>();
        final Iterator<String> i = this.underline.keys(false);
        while (i.hasNext()) a.add(i.next());
        return a.toArray(new String[a.size()]);
@ -1154,7 +1155,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {

    public DigestURL[] getFlash() {
        String ext;
-        ArrayList<DigestURL> f = new ArrayList<DigestURL>();
+        final ArrayList<DigestURL> f = new ArrayList<>();
        for (final DigestURL url: this.anchors) {
            ext = MultiProtocolURL.getFileExtension(url.getFileName());
            if (ext == null) continue;
@ -1323,7 +1324,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
    public List<String> getDescriptions() {
        String s = this.metas.get("description");
        if (s == null) s = this.metas.get("dc.description");
-        List<String> descriptions = new ArrayList<String>();
+        final List<String> descriptions = new ArrayList<>();
        if (s == null) return descriptions;
        descriptions.add(s);
        return descriptions;
@ -1358,7 +1359,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
        String s = this.metas.get("content-language");
        if (s == null) s = this.metas.get("dc.language");
        if (s == null) return null;
-        final Set<String> hs = new HashSet<String>();
+        final Set<String> hs = new HashSet<>();
        final String[] cl = commaSepPattern.split(s);
        int p;
        for (int i = 0; i < cl.length; i++) {
@ -1378,9 +1379,32 @@ public class ContentScraper extends AbstractScraper implements Scraper {
        if (s.isEmpty()) {
            return new String[0];
        }
-        if (s.contains(",")) return commaSepPattern.split(s);
-        if (s.contains(";")) return semicSepPattern.split(s);
-        return s.split("\\s");
+        String[] k = null;
+        if (s.contains(","))
+            k = commaSepPattern.split(s);
+        else if (s.contains(";"))
+            k = semicSepPattern.split(s);
+        else
+            k = s.split("\\s");
+
+        // trim the Strings
+        for (int i = 0; i < k.length; i++)
+            k[i] = k[i].trim();
+
+        // remove empty strings
+        int p = 0;
+        while (p < k.length) {
+            if (k[p].length() == 0) {
+                final String[] k1 = new String[k.length - 1];
+                System.arraycopy(k, 0, k1, 0, p);
+                System.arraycopy(k, p + 1, k1, p, k1.length - p);
+                k = k1;
+            } else {
+                p++;
+            }
+        }
+
+        return k;
    }

    public int getRefreshSeconds() {
@ -1412,27 +1436,27 @@ public class ContentScraper extends AbstractScraper implements Scraper {

        // <meta name="date" content="YYYY-MM-DD..." />
        content = this.metas.get("date");
-        if (content != null) try {return ISO8601Formatter.FORMATTER.parse(content, this.timezoneOffset).getTime();} catch (ParseException e) {}
+        if (content != null) try {return ISO8601Formatter.FORMATTER.parse(content, this.timezoneOffset).getTime();} catch (final ParseException e) {}

        // <meta name="DC.date.modified" content="YYYY-MM-DD" />
        content = this.metas.get("dc.date.modified");
-        if (content != null) try {return ISO8601Formatter.FORMATTER.parse(content, this.timezoneOffset).getTime();} catch (ParseException e) {}
+        if (content != null) try {return ISO8601Formatter.FORMATTER.parse(content, this.timezoneOffset).getTime();} catch (final ParseException e) {}

        // <meta name="DC.date.created" content="YYYY-MM-DD" />
        content = this.metas.get("dc.date.created");
-        if (content != null) try {return ISO8601Formatter.FORMATTER.parse(content, this.timezoneOffset).getTime();} catch (ParseException e) {}
+        if (content != null) try {return ISO8601Formatter.FORMATTER.parse(content, this.timezoneOffset).getTime();} catch (final ParseException e) {}

        // <meta name="DC.date" content="YYYY-MM-DD" />
        content = this.metas.get("dc.date");
-        if (content != null) try {return ISO8601Formatter.FORMATTER.parse(content, this.timezoneOffset).getTime();} catch (ParseException e) {}
+        if (content != null) try {return ISO8601Formatter.FORMATTER.parse(content, this.timezoneOffset).getTime();} catch (final ParseException e) {}

        // <meta name="DC:date" content="YYYY-MM-DD" />
        content = this.metas.get("dc:date");
-        if (content != null) try {return ISO8601Formatter.FORMATTER.parse(content, this.timezoneOffset).getTime();} catch (ParseException e) {}
+        if (content != null) try {return ISO8601Formatter.FORMATTER.parse(content, this.timezoneOffset).getTime();} catch (final ParseException e) {}

        // <meta http-equiv="last-modified" content="YYYY-MM-DD" />
        content = this.metas.get("last-modified");
-        if (content != null) try {return ISO8601Formatter.FORMATTER.parse(content, this.timezoneOffset).getTime();} catch (ParseException e) {}
+        if (content != null) try {return ISO8601Formatter.FORMATTER.parse(content, this.timezoneOffset).getTime();} catch (final ParseException e) {}

        return new Date();
    }
@ -1482,7 +1506,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
    }

    public String[] getEvaluationModelScoreNames(final String modelName) {
-        final List<String> a = new ArrayList<String>();
+        final List<String> a = new ArrayList<>();
        final ClusteredScoreMap<String> scores = this.evaluationScores.getScores(modelName);
        if (scores != null) {
            final Iterator<String> i = scores.keys(false);
@ -1537,7 +1561,7 @@ public class ContentScraper extends AbstractScraper implements Scraper {
    }

    public void print() {
-        for (String t: this.titles) {
+        for (final String t: this.titles) {
            System.out.println("TITLE    :" + t);
        }
        for (int i = 0; i < 4; i++) {
--- a/source/net/yacy/search/schema/CollectionSchema.java
+++ b/source/net/yacy/search/schema/CollectionSchema.java
@ -24,13 +24,13 @@ import java.util.Date;
 import java.util.List;
 import java.util.Locale;

-import net.yacy.cora.federate.solr.SchemaDeclaration;
-import net.yacy.cora.federate.solr.SolrType;
-
 import org.apache.poi.ss.formula.atp.DateParser;
 import org.apache.poi.ss.formula.eval.EvaluationException;
 import org.apache.solr.common.SolrInputDocument;

+import net.yacy.cora.federate.solr.SchemaDeclaration;
+import net.yacy.cora.federate.solr.SolrType;
+
 public enum CollectionSchema implements SchemaDeclaration {

    // mandatory
@ -97,7 +97,7 @@ public enum CollectionSchema implements SchemaDeclaration {
    description_txt(SolrType.text_general, true, true, true, false, true, "content of description-tag(s)"),
    description_exact_signature_l(SolrType.num_long, true, true, false, false, false, "the 64 bit hash of the org.apache.solr.update.processor.Lookup3Signature of description, used to compute description_unique_b"),
    description_unique_b(SolrType.bool, true, true, false, false, false, "flag shows if description is unique within all indexable documents of the same host with status code 200; if yes and another document appears with same description, the unique-flag is set to false"),
-    keywords(SolrType.text_general, true, true, false, false, true, "content of keywords tag; words are separated by space"),
+    keywords(SolrType.text_general, true, true, false, false, true, "content of keywords tag; words are separated by comma, semicolon or space"),
    charset_s(SolrType.string, true, true, false, false, false, "character encoding"),
    wordcount_i(SolrType.num_integer, true, true, false, false, false, "number of words in visible area"),
    linkscount_i(SolrType.num_integer, true, true, false, false, false, "number of all outgoing links; including linksnofollowcount_i"),
@ -286,10 +286,10 @@ public enum CollectionSchema implements SchemaDeclaration {
        this.mandatory = mandatory;
        this.docValues = (type == SolrType.string || type == SolrType.date || type.name().startsWith("num_"));
        // verify our naming scheme
-        String name = this.name();
-        int p = name.indexOf('_');
+        final String name = this.name();
+        final int p = name.indexOf('_');
        if (p > 0) {
-            String ext = name.substring(p + 1);
+            final String ext = name.substring(p + 1);
            assert !ext.equals("i") || (type == SolrType.num_integer && !multiValued) : name;
            assert !ext.equals("l") || (type == SolrType.num_long && !multiValued) : name;
            assert !ext.equals("b") || (type == SolrType.bool && !multiValued) : name;
@ -320,7 +320,7 @@ public enum CollectionSchema implements SchemaDeclaration {
     * @param theValue = the field name
     */
    @Override
-    public final void setSolrFieldName(String theValue) {
+    public final void setSolrFieldName(final String theValue) {
        // make sure no empty string is assigned
        if ( (theValue != null) && (!theValue.isEmpty()) ) {
            this.solrFieldName = theValue.toLowerCase(Locale.ROOT);
@ -444,11 +444,11 @@ public enum CollectionSchema implements SchemaDeclaration {
        } else if (this.type == SolrType.date) {
            assert (value.iterator().next() instanceof String) || (value.iterator().next() instanceof Date) : "type: " + value.iterator().next().getClass().getName();
            if (value.iterator().next() instanceof String) {
-                Date[] da = new Date[value.size()];
+                final Date[] da = new Date[value.size()];
                for (int i = 0; i < value.size(); i++) {
                    try {
                        da[i] = DateParser.parseDate((String) value.get(i)).getTime();
-                    } catch (EvaluationException e) {
+                    } catch (final EvaluationException e) {
                        da[i] = null;
                    }
                }