fix pdfParser not closed warning from pdfbox

for encrypted pdf on exit due to missing permission to extract
pull/1/head
reger 11 years ago
parent c798a9d1bb
commit 09f73b790f

@ -107,8 +107,10 @@ public class pdfParser extends AbstractParser implements Parser {
throw new Parser.Failure("Document is encrypted (3): " + e.getMessage(), location); throw new Parser.Failure("Document is encrypted (3): " + e.getMessage(), location);
} }
final AccessPermission perm = pdfDoc.getCurrentAccessPermission(); final AccessPermission perm = pdfDoc.getCurrentAccessPermission();
if (perm == null || !perm.canExtractContent()) if (perm == null || !perm.canExtractContent()) {
try {pdfDoc.close();} catch (final IOException ee) {}
throw new Parser.Failure("Document is encrypted and cannot be decrypted", location); throw new Parser.Failure("Document is encrypted and cannot be decrypted", location);
}
} }
// extracting some metadata // extracting some metadata
@ -131,16 +133,16 @@ public class pdfParser extends AbstractParser implements Parser {
if (docTitle == null || docTitle.isEmpty()) { if (docTitle == null || docTitle.isEmpty()) {
docTitle = MultiProtocolURL.unescape(location.getFileName()); docTitle = MultiProtocolURL.unescape(location.getFileName());
} }
final CharBuffer writer = new CharBuffer(odtParser.MAX_DOCSIZE); final CharBuffer writer = new CharBuffer(odtParser.MAX_DOCSIZE);
byte[] contentBytes = new byte[0]; byte[] contentBytes = new byte[0];
try { try {
// create a writer for output // create a writer for output
final PDFTextStripper stripper = new PDFTextStripper(); final PDFTextStripper stripper = new PDFTextStripper();
stripper.setEndPage(3); // get first 3 pages (always) stripper.setEndPage(3); // get first 3 pages (always)
writer.append(stripper.getText(pdfDoc)); writer.append(stripper.getText(pdfDoc));
contentBytes = writer.getBytes(); // remember text in case of interrupting thread contentBytes = writer.getBytes(); // remember text in case of interrupting thread
stripper.setStartPage(4); // continue with page 4 (terminated, resulting in no text) stripper.setStartPage(4); // continue with page 4 (terminated, resulting in no text)
stripper.setEndPage(Integer.MAX_VALUE); // set to default stripper.setEndPage(Integer.MAX_VALUE); // set to default
// we start the pdf parsing in a separate thread to ensure that it can be terminated // we start the pdf parsing in a separate thread to ensure that it can be terminated
@ -149,14 +151,14 @@ public class pdfParser extends AbstractParser implements Parser {
@Override @Override
public void run() { public void run() {
Thread.currentThread().setName("pdfParser.getText:" + location); Thread.currentThread().setName("pdfParser.getText:" + location);
try { try {
writer.append(stripper.getText(pdfDocC)); writer.append(stripper.getText(pdfDocC));
} catch (final Throwable e) {} } catch (final Throwable e) {}
} }
}; };
t.start(); t.start();
t.join(3000); t.join(3000);
if (t.isAlive()) t.interrupt(); if (t.isAlive()) t.interrupt();
pdfDoc.close(); pdfDoc.close();
contentBytes = writer.getBytes(); // get final text before closing writer contentBytes = writer.getBytes(); // get final text before closing writer
} catch (final Throwable e) { } catch (final Throwable e) {
@ -176,7 +178,7 @@ public class pdfParser extends AbstractParser implements Parser {
if (docTitle == null) { if (docTitle == null) {
docTitle = docSubject; docTitle = docSubject;
} }
// clear resources in pdfbox. they say that is resolved but it's not. see: // clear resources in pdfbox. they say that is resolved but it's not. see:
// https://issues.apache.org/jira/browse/PDFBOX-313 // https://issues.apache.org/jira/browse/PDFBOX-313
// https://issues.apache.org/jira/browse/PDFBOX-351 // https://issues.apache.org/jira/browse/PDFBOX-351

Loading…
Cancel
Save