extract author and keywords in .doc and .ppt parser

pull/1/head
reger 11 years ago
parent a5707cd2eb
commit cb2c17d236

@ -86,6 +86,14 @@ public class docParser extends AbstractParser implements Parser {
if (title.length() == l) break; if (title.length() == l) break;
l = title.length(); l = title.length();
} }
// get keywords (for yacy as array)
final String keywords = extractor.getSummaryInformation().getKeywords();
final String[] keywlist;
if (keywords != null && !keywords.isEmpty()) {
keywlist = keywords.split(",");
} else {
keywlist = null;
}
Document[] docs; Document[] docs;
docs = new Document[]{new Document( docs = new Document[]{new Document(
@ -94,9 +102,9 @@ public class docParser extends AbstractParser implements Parser {
"UTF-8", "UTF-8",
this, this,
null, null,
null, keywlist,
singleList(title), singleList(title),
"", // TODO: AUTHOR extractor.getSummaryInformation().getAuthor(), // constuctor can handle null
extractor.getDocSummaryInformation().getCompany(), // publisher extractor.getDocSummaryInformation().getCompany(), // publisher
null, null,
null, null,

@ -78,6 +78,12 @@ public class pptParser extends AbstractParser implements Parser {
if (title.length() == l) break; if (title.length() == l) break;
l = title.length(); l = title.length();
} }
// get keywords (for yacy as array)
final String keywords = pptExtractor.getSummaryInformation().getKeywords();
final String[] keywlist;
if (keywords != null && !keywords.isEmpty()) {
keywlist = keywords.split(",");
} else keywlist = null;
/* /*
* create the plasmaParserDocument for the database * create the plasmaParserDocument for the database
@ -89,9 +95,9 @@ public class pptParser extends AbstractParser implements Parser {
"UTF-8", "UTF-8",
this, this,
null, null,
null, keywlist,
singleList(title), singleList(title),
"", // TODO: AUTHOR pptExtractor.getSummaryInformation().getAuthor(), // may be null
pptExtractor.getDocSummaryInformation().getCompany(), pptExtractor.getDocSummaryInformation().getCompany(),
null, null,
null, null,

Loading…
Cancel
Save