From 3f93db90876689f3b3db9fe3233046feb2501c56 Mon Sep 17 00:00:00 2001 From: Michael Mikonos <127171689+mknos@users.noreply.github.com> Date: Tue, 13 Feb 2024 01:23:14 +0800 Subject: [PATCH] wc: unexpected counts for -w flag (#457) * As noted earlier, word count output in wc did not match GNU version * A word is one or more non-space characters, surrounded by whitespace (including newline) * The old code seemed to count the spaces in between words, but I couldn't follow it * To test this I built the OpenBSD version of wc on my linux system, then compared total word count for a set of files between OpenBSD, GNU and perl %$wc_obsd -w s* wc: s: Is a directory 0 s 524 seq 330 shar 319 shar2 225 sleep 4098 sort 1789 spell 895 split wc: sssss: Is a directory 0 sssss 444 strings 1116 sum 9740 total %perl wc -w s* wc: 's' is a directory 524 seq 330 shar 319 shar2 225 sleep 4098 sort 1789 spell 895 split wc: 'sssss' is a directory 444 strings 1116 sum 9740 total %/usr/bin/wc -w s* /usr/bin/wc: s: Is a directory 0 s 524 seq 330 shar 319 shar2 225 sleep 4098 sort 1789 spell 895 split /usr/bin/wc: sssss: Is a directory 0 sssss 444 strings 1116 sum 9740 total --- bin/wc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bin/wc b/bin/wc index fd9114f3..32934c55 100755 --- a/bin/wc +++ b/bin/wc @@ -169,8 +169,9 @@ sub wc_fh { } } if ($opt{'w'}) { - @words = split(/\w+/,$_); - $words += $#words; + s/\A\s+//; + @words = split /\s+/; + $words += scalar @words; } if ($opt{'m'}) { @chars = m/$encoding/gox; @@ -181,7 +182,6 @@ sub wc_fh { } } if ($paras > 1) { $paras--; } - if ($words > 1) { $words--; } $total_paras += $paras; $total_lines += $lines; $total_words += $words; $total_chars += $chars; $total_bytes += $bytes;