Skip to content

Commit

Permalink
cpanfile: Unicode::UTF8 & Unicode::Normalize are mandatory now
Browse files Browse the repository at this point in the history
  • Loading branch information
ikedas committed Sep 1, 2024
1 parent e4ed811 commit 3a1bbc7
Show file tree
Hide file tree
Showing 6 changed files with 29 additions and 44 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/make-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ jobs:
--verbose --no-interactive
--with-develop
--with-feature=Data::Password --with-feature=ldap
--with-feature=safe-unicode --with-feature=smime
--with-feature=smime
--with-feature=soap --with-feature=sqlite
${{ startsWith(matrix.os, 'macos') && '--with-feature=macos' || '' }}
- name: Run tests
Expand Down
2 changes: 1 addition & 1 deletion .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
- . ~/bashrc
- coverage-install
- coverage-setup
- cpanm --quiet --notest --installdeps --with-develop --with-feature=Data::Password --with-feature=ldap --with-feature=safe-unicode --with-feature=smime --with-feature=soap --with-feature=sqlite .
- cpanm --quiet --notest --installdeps --with-develop --with-feature=Data::Password --with-feature=ldap --with-feature=smime --with-feature=soap --with-feature=sqlite .
- autoreconf -i
- ./configure
- cd src; make; cd ..
Expand Down
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ before_install:

install:
- cpan-install --coverage
- cpanm --installdeps --notest --with-develop --with-feature=Data::Password --with-feature=ldap --with-feature=safe-unicode --with-feature=smime --with-feature=soap --with-feature=sqlite .
- cpanm --installdeps --notest --with-develop --with-feature=Data::Password --with-feature=ldap --with-feature=smime --with-feature=soap --with-feature=sqlite .

before_script:
- coverage-setup
Expand Down
22 changes: 11 additions & 11 deletions cpanfile
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,14 @@ requires 'Time::HiRes', '>= 1.29';
# Used to get Unix time from local time
requires 'Time::Local', '>= 1.23';

# Normalizes file names represented by Unicode.
# Note: Perl 5.8.1 bundles version 0.23.
# Note: Perl 5.10.1 bundles 1.03 (per Unicode 5.1.0).
requires 'Unicode::Normalize', '>= 1.03';

# Sanitizes inputs with Unicode text.
requires 'Unicode::UTF8', '>= 0.58';

# Used to create URI containing non URI-canonical characters.
# Note: '3.28' is the version included in URI-1.35.
requires 'URI::Escape', '>= 3.28';
Expand Down Expand Up @@ -190,13 +198,6 @@ recommends 'Net::DNS', '>= 0.65';
# This is required if you set "list_check_smtp" sympa.conf parameter, used to check existing aliases before mailing list creation.
recommends 'Net::SMTP';

# Normalizes file names represented by Unicode
# Note: Perl 5.8.1 bundles version 0.23.
# Note: Perl 5.10.1 bundles 1.03 (per Unicode 5.1.0).
recommends 'Unicode::Normalize', '>= 1.03';

recommends 'Unicode::UTF8', '>= 0.60';

### Features
##

Expand Down Expand Up @@ -324,10 +325,9 @@ feature 'soap', 'Required if you want to run the Sympa SOAP server that provides
};

feature 'safe-unicode', 'Sanitizes inputs with Unicode text.' => sub {
# Note: Perl 5.8.1 bundles version 0.23.
# Note: Perl 5.10.1 bundles 1.03 (per Unicode 5.1.0).
requires 'Unicode::Normalize', '>= 1.03';
requires 'Unicode::UTF8', '>= 0.60';
# Note: These became required (>=6.2.73b).
#requires 'Unicode::Normalize', '>= 1.03';
#requires 'Unicode::UTF8', '>= 0.58';
};

on 'test' => sub {
Expand Down
24 changes: 7 additions & 17 deletions src/lib/Sympa/Tools/Text.pm
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ use MIME::EncWords;
use Text::LineFold;
use Unicode::GCString;
use URI::Escape qw();
BEGIN { eval 'use Unicode::Normalize qw()'; }
BEGIN { eval 'use Unicode::UTF8 qw()'; }
use Unicode::Normalize qw();
use Unicode::UTF8;

use Sympa::Language;
use Sympa::Regexps;
Expand Down Expand Up @@ -141,15 +141,11 @@ sub canonic_text {
my $utext;
if (Encode::is_utf8($text)) {
$utext = $text;
} elsif ($Unicode::UTF8::VERSION) {
} else {
no warnings 'utf8';
$utext = Unicode::UTF8::decode_utf8($text);
} else {
$utext = Encode::decode_utf8($text);
}
if ($Unicode::Normalize::VERSION) {
$utext = Unicode::Normalize::normalize('NFC', $utext);
}
$utext = Unicode::Normalize::normalize('NFC', $utext);

# Remove DOS linefeeds (^M) that cause problems with Outlook 98, AOL,
# and EIMS:
Expand Down Expand Up @@ -313,13 +309,8 @@ sub guessed_to_utf8 {
and length $text
and $text =~ /[^\x00-\x7F]/;

my $utf8;
if ($Unicode::UTF8::VERSION) {
$utf8 = Unicode::UTF8::decode_utf8($text)
if Unicode::UTF8::valid_utf8($text);
} else {
$utf8 = eval { Encode::decode_utf8($text, Encode::FB_CROAK()) };
}
my $utf8 = Unicode::UTF8::decode_utf8($text)
if Unicode::UTF8::valid_utf8($text);
unless (defined $utf8) {
foreach my $charset (map { $_ ? @$_ : () } @legacy_charsets{@langs}) {
$utf8 =
Expand All @@ -332,8 +323,7 @@ sub guessed_to_utf8 {
}

# Apply NFC: e.g. for modified-NFD by Mac OS X.
$utf8 = Unicode::Normalize::normalize('NFC', $utf8)
if $Unicode::Normalize::VERSION;
$utf8 = Unicode::Normalize::normalize('NFC', $utf8);

return Encode::encode_utf8($utf8);
}
Expand Down
21 changes: 8 additions & 13 deletions t/Tools_Text.t
Original file line number Diff line number Diff line change
Expand Up @@ -46,18 +46,13 @@ is $dec, $unicode_email, 'decode_filesystem_safe, Unicode';
# ToDo: foldcase()
# ToDo: wrap_text()

SKIP: {
skip 'Unicode::Normalize and Unicode::UTF8 required.'
unless $Unicode::Normalize::VERSION and $Unicode::UTF8::VERSION;

# Noncharacters: U+D800, U+10FFE, U+110000, U+200000
is Sympa::Tools::Text::canonic_text(
"\xED\xA0\x80\n\xF4\x8F\xBF\xBE\n\xF4\x90\x80\x80\n\xF8\x88\x80\x80\x80\n"
),
Encode::encode_utf8(
"\x{FFFD}\x{FFFD}\x{FFFD}\n\x{FFFD}\n\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\n\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\n"
),
'canonic_text';
}
# Noncharacters: U+D800, U+10FFE, U+110000, U+200000
is Sympa::Tools::Text::canonic_text(
"\xED\xA0\x80\n\xF4\x8F\xBF\xBE\n\xF4\x90\x80\x80\n\xF8\x88\x80\x80\x80\n"
),
Encode::encode_utf8(
"\x{FFFD}\x{FFFD}\x{FFFD}\n\x{FFFD}\n\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\n\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\x{FFFD}\n"
),
'canonic_text';

done_testing();

0 comments on commit 3a1bbc7

Please sign in to comment.