Skip to content

Commit

Permalink
Minor bug fixes
Browse files Browse the repository at this point in the history
* Add # ' and " as valid characters

* Ignore comments in content stream
  • Loading branch information
fkoyer committed Dec 6, 2023
1 parent 9d322df commit 19a00b1
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 10 deletions.
16 changes: 11 additions & 5 deletions dist/PDFInfo2.pm
Original file line number Diff line number Diff line change
Expand Up @@ -308,8 +308,9 @@ my %specials = (
my %class_map;
$class_map{$_} = CHAR_SPACE for split //, " \n\r\t\f\b";
$class_map{$_} = CHAR_NUM for split //, '0123456789.+-';
$class_map{$_} = CHAR_ALPHA for split //, 'abcdefghijklmnopqrstuvwxyz*';
$class_map{$_} = CHAR_ALPHA for split //, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ_';
$class_map{$_} = CHAR_ALPHA for split //, 'abcdefghijklmnopqrstuvwxyz';
$class_map{$_} = CHAR_ALPHA for split //, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ';
$class_map{$_} = CHAR_ALPHA for split //, '*_#"\'';
$class_map{$_} = CHAR_BEGIN_NAME for split //, '/';
$class_map{$_} = CHAR_BEGIN_ARRAY for split //, '[';
$class_map{$_} = CHAR_END_ARRAY for split //, ']';
Expand Down Expand Up @@ -389,7 +390,11 @@ sub get_name {

while (defined(my $ch = getc($fh))) {
my $class = $class_map{$ch};
if ( defined($class) && ($class == CHAR_ALPHA || $class == CHAR_NUM )) {
unless (defined($class)) {
seek($fh, -1, 1);
croak "unknown char $ch at offset " . tell($fh);
}
if ( $class == CHAR_ALPHA || $class == CHAR_NUM ) {
$name .= $ch;
next;
} else {
Expand Down Expand Up @@ -848,7 +853,6 @@ sub get_startxref {
next;
}
$tok = $ch . $tok;
print "$tok\n";
}

croak "startxref not found" unless $tok eq 'startxref';
Expand Down Expand Up @@ -2063,6 +2067,7 @@ sub _parse_contents {
for my $obj ( @$contents ) {
$stream .= $self->_get_stream_data($obj);
}
debug('stream',$stream);

my $core = $self->{core}->clone(\$stream);

Expand All @@ -2071,6 +2076,7 @@ sub _parse_contents {
my ($token,$type) = $core->get_primitive();
last unless defined($token);
# print "$type: $token\n";
next if $type == Mail::SpamAssassin::PDF::Core::TYPE_COMMENT;
if ( $type != Mail::SpamAssassin::PDF::Core::TYPE_OP ) {
push(@params,$token);
next;
Expand Down Expand Up @@ -2241,7 +2247,7 @@ use re 'taint';
use Digest::MD5 qw(md5_hex);
use Data::Dumper;

my $VERSION = 0.20;
my $VERSION = 0.21;

our @ISA = qw(Mail::SpamAssassin::Plugin);

Expand Down
12 changes: 8 additions & 4 deletions lib/Mail/SpamAssassin/PDF/Core.pm
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,9 @@ my %specials = (
my %class_map;
$class_map{$_} = CHAR_SPACE for split //, " \n\r\t\f\b";
$class_map{$_} = CHAR_NUM for split //, '0123456789.+-';
$class_map{$_} = CHAR_ALPHA for split //, 'abcdefghijklmnopqrstuvwxyz*';
$class_map{$_} = CHAR_ALPHA for split //, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ_';
$class_map{$_} = CHAR_ALPHA for split //, 'abcdefghijklmnopqrstuvwxyz';
$class_map{$_} = CHAR_ALPHA for split //, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ';
$class_map{$_} = CHAR_ALPHA for split //, '*_#"\'';
$class_map{$_} = CHAR_BEGIN_NAME for split //, '/';
$class_map{$_} = CHAR_BEGIN_ARRAY for split //, '[';
$class_map{$_} = CHAR_END_ARRAY for split //, ']';
Expand Down Expand Up @@ -134,7 +135,11 @@ sub get_name {

while (defined(my $ch = getc($fh))) {
my $class = $class_map{$ch};
if ( defined($class) && ($class == CHAR_ALPHA || $class == CHAR_NUM )) {
unless (defined($class)) {
seek($fh, -1, 1);
croak "unknown char $ch at offset " . tell($fh);
}
if ( $class == CHAR_ALPHA || $class == CHAR_NUM ) {
$name .= $ch;
next;
} else {
Expand Down Expand Up @@ -593,7 +598,6 @@ sub get_startxref {
next;
}
$tok = $ch . $tok;
print "$tok\n";
}

croak "startxref not found" unless $tok eq 'startxref';
Expand Down
2 changes: 2 additions & 0 deletions lib/Mail/SpamAssassin/PDF/Parser.pm
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,7 @@ sub _parse_contents {
for my $obj ( @$contents ) {
$stream .= $self->_get_stream_data($obj);
}
debug('stream',$stream);

my $core = $self->{core}->clone(\$stream);

Expand All @@ -446,6 +447,7 @@ sub _parse_contents {
my ($token,$type) = $core->get_primitive();
last unless defined($token);
# print "$type: $token\n";
next if $type == Mail::SpamAssassin::PDF::Core::TYPE_COMMENT;
if ( $type != Mail::SpamAssassin::PDF::Core::TYPE_OP ) {
push(@params,$token);
next;
Expand Down
2 changes: 1 addition & 1 deletion lib/Mail/SpamAssassin/Plugin/PDFInfo2.pm
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ use re 'taint';
use Digest::MD5 qw(md5_hex);
use Data::Dumper;

my $VERSION = 0.20;
my $VERSION = 0.21;

our @ISA = qw(Mail::SpamAssassin::Plugin);

Expand Down

0 comments on commit 19a00b1

Please sign in to comment.