From f6bec84e74ea1f78001fa6f7e1f3ec11724fb28b Mon Sep 17 00:00:00 2001 From: James Bonfield Date: Thu, 18 Jul 2024 11:58:02 +0100 Subject: [PATCH 1/3] Remove undefined behavior in expression language. The bit-wise AND, OR and XOR logic works on the overloaded double (res->d), but casting this to int64_t for NAN is undefined behaviour. Infact the current cast doesn't even yield the correct bit-wise layout for NAN, as (long)d and *(long *)&d give different results for d==NAN. However the result wasn't used anyway as it's promptly cleared again in the subsequent `if (undef) hts_expr_val_undef(res)` code. Hence undefined or otherwise, it made no difference. --- hts_expr.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/hts_expr.c b/hts_expr.c index 5e5a132ea..0fdb3bc8b 100644 --- a/hts_expr.c +++ b/hts_expr.c @@ -527,8 +527,10 @@ static int bitand_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn, } else if (res->is_str || val.is_str) { hts_expr_val_free(&val); return -1; + } else { + res->is_true = + (res->d = ((int64_t)res->d & (int64_t)val.d)) != 0; } - res->is_true = (res->d = ((int64_t)res->d & (int64_t)val.d)) != 0; } else { break; } @@ -560,8 +562,10 @@ static int bitxor_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn, } else if (res->is_str || val.is_str) { hts_expr_val_free(&val); return -1; + } else { + res->is_true = + (res->d = ((int64_t)res->d ^ (int64_t)val.d)) != 0; } - res->is_true = (res->d = ((int64_t)res->d ^ (int64_t)val.d)) != 0; } else { break; } @@ -593,8 +597,10 @@ static int bitor_expr(hts_filter_t *filt, void *data, hts_expr_sym_func *fn, } else if (res->is_str || val.is_str) { hts_expr_val_free(&val); return -1; + } else { + res->is_true = + (res->d = ((int64_t)res->d | (int64_t)val.d)) != 0; } - res->is_true = (res->d = ((int64_t)res->d | (int64_t)val.d)) != 0; } else { break; } From c19fed3f7594323ca65d3627b8ddb7541dfe630c Mon Sep 17 00:00:00 2001 From: James Bonfield Date: Thu, 18 Jul 2024 12:12:34 +0100 Subject: [PATCH 2/3] Fix signed overflow for hts_parse_decimal. Obviously there can still be overflows if we attempt to parse numbers which are too big, but for the legally accepted range of this, parsing "-9,223,372,036,854,775,808" as tested in test/sam.c triggered a problem as the positive version doesn't fit in "long long". We parse as unsigned and only switch to signed via the implicit return type conversion (and probably exploiting twos-complement, but that's a fair assumption). --- hts.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hts.c b/hts.c index caf85e64a..42be3f21f 100644 --- a/hts.c +++ b/hts.c @@ -3826,7 +3826,7 @@ void hts_itr_destroy(hts_itr_t *iter) } } -static inline long long push_digit(long long i, char c) +static inline unsigned long long push_digit(unsigned long long i, char c) { // ensure subtraction occurs first, avoiding overflow for >= MAX-48 or so int digit = c - '0'; @@ -3835,7 +3835,7 @@ static inline long long push_digit(long long i, char c) long long hts_parse_decimal(const char *str, char **strend, int flags) { - long long n = 0; + unsigned long long n = 0; int digits = 0, decimals = 0, e = 0, lost = 0; char sign = '+', esign = '+'; const char *s, *str_orig = str; From 63ad6c1d03ea727657ffdcf43def85381d277579 Mon Sep 17 00:00:00 2001 From: James Bonfield Date: Thu, 18 Jul 2024 12:28:40 +0100 Subject: [PATCH 3/3] Fix an undefined addition to a NULL pointer in vcf_format. The pointer was never used, but the NULL+0 still triggers clang's ubsan. --- vcf.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/vcf.c b/vcf.c index 53f2b7a92..daedad34d 100644 --- a/vcf.c +++ b/vcf.c @@ -4020,7 +4020,10 @@ int vcf_format(const bcf_hdr_t *h, const bcf1_t *v, kstring_t *s) kputc_('\t', s); // INFO if (v->n_info) { - uint8_t *ptr = (uint8_t *)v->shared.s + v->unpack_size[0] + v->unpack_size[1] + v->unpack_size[2]; + uint8_t *ptr = v->shared.s + ? (uint8_t *)v->shared.s + v->unpack_size[0] + + v->unpack_size[1] + v->unpack_size[2] + : NULL; int first = 1; bcf_info_t *info = v->d.info;