Skip to content

Commit

Permalink
Merge pull request #1885 from ikedas/issue-1884 by ikedas
Browse files Browse the repository at this point in the history
WWSympa: Invalid UTF-8 sequences in input may trigger crashing (#1884)
  • Loading branch information
ikedas authored Sep 21, 2024
2 parents a1126e6 + 3a1bbc7 commit e8cee91
Show file tree
Hide file tree
Showing 7 changed files with 55 additions and 56 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/make-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ jobs:
--verbose --no-interactive
--with-develop
--with-feature=Data::Password --with-feature=ldap
--with-feature=safe-unicode --with-feature=smime
--with-feature=smime
--with-feature=soap --with-feature=sqlite
${{ startsWith(matrix.os, 'macos') && '--with-feature=macos' || '' }}
- name: Run tests
Expand Down
2 changes: 1 addition & 1 deletion .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
- . ~/bashrc
- coverage-install
- coverage-setup
- cpanm --quiet --notest --installdeps --with-develop --with-feature=Data::Password --with-feature=ldap --with-feature=safe-unicode --with-feature=smime --with-feature=soap --with-feature=sqlite .
- cpanm --quiet --notest --installdeps --with-develop --with-feature=Data::Password --with-feature=ldap --with-feature=smime --with-feature=soap --with-feature=sqlite .
- autoreconf -i
- ./configure
- cd src; make; cd ..
Expand Down
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ before_install:

install:
- cpan-install --coverage
- cpanm --installdeps --notest --with-develop --with-feature=Data::Password --with-feature=ldap --with-feature=safe-unicode --with-feature=smime --with-feature=soap --with-feature=sqlite .
- cpanm --installdeps --notest --with-develop --with-feature=Data::Password --with-feature=ldap --with-feature=smime --with-feature=soap --with-feature=sqlite .

before_script:
- coverage-setup
Expand Down
22 changes: 11 additions & 11 deletions cpanfile
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,14 @@ requires 'Time::HiRes', '>= 1.29';
# Used to get Unix time from local time
requires 'Time::Local', '>= 1.23';

# Normalizes file names represented by Unicode.
# Note: Perl 5.8.1 bundles version 0.23.
# Note: Perl 5.10.1 bundles 1.03 (per Unicode 5.1.0).
requires 'Unicode::Normalize', '>= 1.03';

# Sanitizes inputs with Unicode text.
requires 'Unicode::UTF8', '>= 0.58';

# Used to create URI containing non URI-canonical characters.
# Note: '3.28' is the version included in URI-1.35.
requires 'URI::Escape', '>= 3.28';
Expand Down Expand Up @@ -190,13 +198,6 @@ recommends 'Net::DNS', '>= 0.65';
# This is required if you set "list_check_smtp" sympa.conf parameter, used to check existing aliases before mailing list creation.
recommends 'Net::SMTP';

# Normalizes file names represented by Unicode
# Note: Perl 5.8.1 bundles version 0.23.
# Note: Perl 5.10.1 bundles 1.03 (per Unicode 5.1.0).
recommends 'Unicode::Normalize', '>= 1.03';

recommends 'Unicode::UTF8', '>= 0.60';

### Features
##

Expand Down Expand Up @@ -324,10 +325,9 @@ feature 'soap', 'Required if you want to run the Sympa SOAP server that provides
};

feature 'safe-unicode', 'Sanitizes inputs with Unicode text.' => sub {
# Note: Perl 5.8.1 bundles version 0.23.
# Note: Perl 5.10.1 bundles 1.03 (per Unicode 5.1.0).
requires 'Unicode::Normalize', '>= 1.03';
requires 'Unicode::UTF8', '>= 0.60';
# Note: These became required (>=6.2.73b).
#requires 'Unicode::Normalize', '>= 1.03';
#requires 'Unicode::UTF8', '>= 0.58';
};

on 'test' => sub {
Expand Down
38 changes: 26 additions & 12 deletions src/cgi/wwsympa.fcgi.in
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ use IO::File qw();
use MIME::EncWords;
use MIME::Lite::HTML;
use POSIX qw();
use Unicode::UTF8;
use URI;
use Data::Dumper; # tentative

Expand Down Expand Up @@ -1057,6 +1058,15 @@ while ($query = Sympa::WWW::FastCGI->new) {

## Get params in a hash
%in = $query->Vars;
while (my ($k, $v) = each %in) {
next if ref $v;
next if Encode::is_utf8($v);
unless (Unicode::UTF8::valid_utf8($v)) {
$log->syslog('err', 'Parameter in invalid UTF-8 %s="%s": Ignored',
$k, sprintf("\\x%*v02X", "\\x", $v));
delete $in{$k};
}
}

# Determin robot.
$robot = $ENV{SYMPA_DOMAIN};
Expand Down Expand Up @@ -1868,24 +1878,28 @@ sub _split_params {
}

if (@params) {
$in{'action'} = $params[0];
$in{'action'} = shift @params;
my @args = @{$action_args{$in{'action'}} // $action_args{'default'}};

my $i = 1;
foreach my $p (@args) {
my $pname;
## More than 1 param
my ($k, $v);
if ($p =~ /^\@(\w+)$/) {
$pname = $1;
$in{$pname} = join '/', @params[$i .. $#params];
$in{$pname} .= '/' if $ending_slash;
last;
$k = $1;
$v = join '/', @params;
$v .= '/' if $ending_slash;
} else {
$pname = $p;
$in{$pname} = $params[$i];
$k = $p;
$v = shift @params;
}
$in{$k} = $v;

unless (Encode::is_utf8($v) or Unicode::UTF8::valid_utf8($v)) {
$log->syslog('err',
'Parameter in invalid UTF-8 %s="%s": Ignored',
$k, sprintf("\\x%*v02X", "\\x", $v));
delete $in{$k};
}
wwslog('debug', 'Incoming parameter: %s=%s', $pname, $in{$pname});
$i++;
last if 0 == index $p, '@';
}
}
}
Expand Down
24 changes: 7 additions & 17 deletions src/lib/Sympa/Tools/Text.pm
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ use MIME::EncWords;
use Text::LineFold;
use Unicode::GCString;
use URI::Escape qw();
BEGIN { eval 'use Unicode::Normalize qw()'; }
BEGIN { eval 'use Unicode::UTF8 qw()'; }
use Unicode::Normalize qw();
use Unicode::UTF8;

use Sympa::Language;
use Sympa::Regexps;
Expand Down Expand Up @@ -141,15 +141,11 @@ sub canonic_text {
my $utext;
if (Encode::is_utf8($text)) {
$utext = $text;
} elsif ($Unicode::UTF8::VERSION) {
} else {
no warnings 'utf8';
$utext = Unicode::UTF8::decode_utf8($text);
} else {
$utext = Encode::decode_utf8($text);
}
if ($Unicode::Normalize::VERSION) {
$utext = Unicode::Normalize::normalize('NFC', $utext);
}
$utext = Unicode::Normalize::normalize('NFC', $utext);

# Remove DOS linefeeds (^M) that cause problems with Outlook 98, AOL,
# and EIMS:
Expand Down Expand Up @@ -313,13 +309,8 @@ sub guessed_to_utf8 {
and length $text
and $text =~ /[^\x00-\x7F]/;

my $utf8;
if ($Unicode::UTF8::VERSION) {
$utf8 = Unicode::UTF8::decode_utf8($text)
if Unicode::UTF8::valid_utf8($text);
} else {
$utf8 = eval { Encode::decode_utf8($text, Encode::FB_CROAK()) };
}
my $utf8 = Unicode::UTF8::decode_utf8($text)
if Unicode::UTF8::valid_utf8($text);
unless (defined $utf8) {
foreach my $charset (map { $_ ? @$_ : () } @legacy_charsets{@langs}) {
$utf8 =
Expand All @@ -332,8 +323,7 @@ sub guessed_to_utf8 {
}

# Apply NFC: e.g. for modified-NFD by Mac OS X.
$utf8 = Unicode::Normalize::normalize('NFC', $utf8)
if $Unicode::Normalize::VERSION;
$utf8 = Unicode::Normalize::normalize('NFC', $utf8);

return Encode::encode_utf8($utf8);
}
Expand Down
21 changes: 8 additions & 13 deletions t/Tools_Text.t
Original file line number Diff line number Diff line change
Expand Up @@ -46,18 +46,13 @@ is $dec, $unicode_email, 'decode_filesystem_safe, Unicode';
# ToDo: foldcase()
# ToDo: wrap_text()

SKIP: {
skip 'Unicode::Normalize and Unicode::UTF8 required.'
unless $Unicode::Normalize::VERSION and $Unicode::UTF8::VERSION;

# Noncharacters: U+D800, U+10FFE, U+110000, U+200000
is Sympa::Tools::Text::canonic_text(
"\xED\xA0\x80\n\xF4\x8F\xBF\xBE\n\xF4\x90\x80\x80\n\xF8\x88\x80\x80\x80\n"
),
Encode::encode_utf8(
"\x{FFFD}\x{FFFD}\x{FFFD}\n\x{FFFD}\n\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\n\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\n"
),
'canonic_text';
}
# Noncharacters: U+D800, U+10FFE, U+110000, U+200000
is Sympa::Tools::Text::canonic_text(
"\xED\xA0\x80\n\xF4\x8F\xBF\xBE\n\xF4\x90\x80\x80\n\xF8\x88\x80\x80\x80\n"
),
Encode::encode_utf8(
"\x{FFFD}\x{FFFD}\x{FFFD}\n\x{FFFD}\n\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\n\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\n"
),
'canonic_text';

done_testing();

0 comments on commit e8cee91

Please sign in to comment.