diff --git a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java
index b1d854387..1f584547e 100644
--- a/source/de/anomic/htmlFilter/htmlFilterContentScraper.java
+++ b/source/de/anomic/htmlFilter/htmlFilterContentScraper.java
@@ -211,7 +211,8 @@ public class htmlFilterContentScraper extends htmlFilterAbstractScraper implemen
h = cleanLine(super.stripAll(new serverByteBuffer(text)).toString());
if (h.length() > 0) headlines[3].add(h);
}
- if ((tagname.equalsIgnoreCase("title")) && (text.length < 1024)) title = cleanLine(super.stripAll(new serverByteBuffer(text)).toString()); // TODO: bugfix needed for UTF-8
+ if ((tagname.equalsIgnoreCase("title")) && (text.length < 1024))
+ title = cleanLine(super.stripAll(new serverByteBuffer(text)).toString(this.charset));
}
private static String cleanLine(String s) {
diff --git a/source/de/anomic/plasma/plasmaParser.java b/source/de/anomic/plasma/plasmaParser.java
index 9b3ffd6ab..d65573b3b 100644
--- a/source/de/anomic/plasma/plasmaParser.java
+++ b/source/de/anomic/plasma/plasmaParser.java
@@ -725,11 +725,11 @@ public final class plasmaParser {
serverFileUtils.write(contentBytes, contentFile);
}
- if ((args.length == 4)&&(args[2].equalsIgnoreCase("-m"))) {
+ if ((args.length >= 4)&&(args[2].equalsIgnoreCase("-m"))) {
contentMimeType = args[3];
}
- if ((args.length == 6)&&(args[4].equalsIgnoreCase("-c"))) {
+ if ((args.length >= 6)&&(args[4].equalsIgnoreCase("-c"))) {
charSet = args[5];
}
@@ -747,6 +747,9 @@ public final class plasmaParser {
// printing out all parsed sentences
if (document != null) {
+ System.out.print("Document titel: ");
+ System.out.println(document.getMainLongTitle());
+
// found text
String[] sentences = document.getSentences();
if (sentences != null) {