Skip to content

Commit

Permalink
- Optimize performance
Browse files Browse the repository at this point in the history
- Disable commons-logging in Apache PDFBox
  • Loading branch information
hwding committed Aug 27, 2017
1 parent 6ab6df4 commit fb7cf0e
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 23 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>com.amastigote</groupId>
<artifactId>unstamper</artifactId>
<version>0.1.0</version>
<version>0.1.1</version>
<description>text stamp remover for PDF files</description>
<name>pdf-unstamper</name>
<url>https://github.com/hwding/pdf-unstamper</url>
Expand Down
2 changes: 1 addition & 1 deletion script/install
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
user_bin=`echo ~`"/bin/"
jar_name="pdf-unstamper.jar"
exe_name="unstamp"
_version="0.1.0"
_version="0.1.1"
jar_durl="https://github.com/hwding/pdf-unstamper/releases/download/$_version/$jar_name"

function chk_f() {
Expand Down
6 changes: 6 additions & 0 deletions src/com/amastigote/unstamper/Main.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@

public class Main {

static {
/* Disable Logging in Apache PDFBox */
System.setProperty("org.apache.commons.logging.Log",
"org.apache.commons.logging.impl.NoOpLog");
}

public static void main(String[] args) {
CommandLine commandLine = null;
try {
Expand Down
59 changes: 39 additions & 20 deletions src/com/amastigote/unstamper/core/Processor.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
Expand All @@ -37,33 +38,51 @@ public static void process(File file, String[] strings) {
/* START: loading font resources for further parsing */
PDFStreamParser pdfStreamParser = new PDFStreamParser(pdPage);
pdfStreamParser.parse();
List<Object> objects = pdfStreamParser.getTokens();
List<Object> cosNames = objects.parallelStream()
.filter(e -> e instanceof COSName)
.collect(Collectors.toList());
Set<PDFont> pdFonts = new HashSet<>();
cosNames.forEach(e -> {
try {
PDFont pdFont = pdPage.getResources().getFont(((COSName) e));
if (pdFont != null)
pdFonts.add(pdFont);
} catch (IOException ignored) {
}
});

List<Object> objects =
Collections.synchronizedList(pdfStreamParser.getTokens());

List<Object> cosNames =
objects.parallelStream()
.filter(e -> e instanceof COSName)
.collect(Collectors.toList());

Set<PDFont> pdFonts =
Collections.synchronizedSet(new HashSet<>());

cosNames.parallelStream()
.forEach(e -> {
/* Ignore Any Exception During Parallel Processing */
try {
PDFont pdFont = pdPage.getResources().getFont(((COSName) e));
if (pdFont != null)
pdFonts.add(pdFont);
} catch (Exception ignored) {
}
});
/* END */
for (Object o : objects) {
if (o instanceof COSString) {
if (TextStampRecognizer.recognize(strings, ((COSString) o).getBytes(), pdFonts))
((COSString) o).setValue(new byte[0]);
}
}
objects
.parallelStream()
.forEach(e -> {
if (e instanceof COSString) {
/* Ignore Any Exception During Parallel Processing */
try {
if (TextStampRecognizer.recognize(strings, ((COSString) e).getBytes(), pdFonts))
((COSString) e).setValue(new byte[0]);
} catch (Exception ignored) {
}
}
}
);

PDStream newContents = new PDStream(pdDocument);
OutputStream out = newContents.createOutputStream();
ContentStreamWriter writer = new ContentStreamWriter(out);
writer.writeTokens(objects);
out.close();

pdPage.setContents(newContents);
} catch (IOException e) {
} catch (Exception e) {
GeneralLogger.Processor.errorProcess(file.getName());
}
});
Expand Down
2 changes: 1 addition & 1 deletion src/com/amastigote/unstamper/log/GeneralLogger.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
public class GeneralLogger {
public static class Help {
private static final String usage =
"\nPDF-UnStamper ver. 0.1.0 by hwding@GitHub\n" +
"\nPDF-UnStamper ver. 0.1.1 by hwding@GitHub\n" +
"\nUsage: " +
"\n [OPTION] -i [INPUT PDF] -k [KEYWORDS...] (-o [OUTPUT PDF])" +
"\n [OPTION] -I [INPUT DIR] -k [KEYWORDS...] (-O [OUTPUT DIR])\n" +
Expand Down

0 comments on commit fb7cf0e

Please sign in to comment.