From 3b71dbc2310b338f5dd1d8e842d835fc615d849d Mon Sep 17 00:00:00 2001 From: James Bonfield Date: Tue, 24 Sep 2024 17:06:48 +0100 Subject: [PATCH] Replace home-brew string end searching with memchr. With long aux tags the trival while loop can be suprisingly slow. "while (s < end && *s) ++s;" isn't well vectorised or turned into word-by-word processing by neither gcc nor clang, but these tricks are used by the system memchr implementation. An alternative could be this (used in my WIP VCF parser), which is more optimised for relatively short strings. Included here just for potential future reference on systems with noddy memchr implementations. #define haszero(x) (((x)-0x0101010101010101UL)&~(x)&0x8080808080808080UL) static inline char *memchr8(char *s, char sym, size_t len) { const uint64_t sym8 = sym * 0x0101010101010101UL; uint64_t *s8 = (uint64_t *)s; uint64_t *s8_end = (uint64_t *)(s+(len&~7)); while (s8 < s8_end && !haszero(*s8 ^ sym8)) s8++; // Precise identification char *s_end = s + len; s = (char *)s8; while (s < s_end && *s != sym) { s++; } return s < s_end ? s : NULL; } --- sam.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sam.c b/sam.c index 7e58da6e7..a96a6120d 100644 --- a/sam.c +++ b/sam.c @@ -4856,9 +4856,9 @@ static inline uint8_t *skip_aux(uint8_t *s, uint8_t *end) switch (size) { case 'Z': case 'H': - while (s < end && *s) ++s; - return s < end ? s + 1 : end; - case 'B': + s = memchr(s, 0, end-s); + return s ? s+1 : end; + case 'B': if (end - s < 5) return NULL; size = aux_type2size(*s); ++s; n = le_to_u32(s);