Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Speed up kputll. #1805

Merged
merged 1 commit into from
Jul 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 57 additions & 11 deletions htslib/kstring.h
Original file line number Diff line number Diff line change
Expand Up @@ -375,17 +375,63 @@ static inline int kputw(int c, kstring_t *s)

static inline int kputll(long long c, kstring_t *s)
{
char buf[32];
int i, l = 0;
unsigned long long x = c;
if (c < 0) x = -x;
do { buf[l++] = x%10 + '0'; x /= 10; } while (x > 0);
if (c < 0) buf[l++] = '-';
if (ks_resize(s, s->l + l + 2) < 0)
return EOF;
for (i = l - 1; i >= 0; --i) s->s[s->l++] = buf[i];
s->s[s->l] = 0;
return 0;
// Worst case expansion. One check reduces function size
// and aids inlining chance. Memory overhead is minimal.
if (ks_resize(s, s->l + 23) < 0)
return EOF;

unsigned long long x = c;
if (c < 0) {
x = -x;
s->s[s->l++] = '-';
}

if (x <= UINT32_MAX)
return kputuw(x, s);

static const char kputull_dig2r[] =
"00010203040506070809"
"10111213141516171819"
"20212223242526272829"
"30313233343536373839"
"40414243444546474849"
"50515253545556575859"
"60616263646566676869"
"70717273747576777879"
"80818283848586878889"
"90919293949596979899";
unsigned int l, j;
char *cp;

// Find out how long the number is (could consider clzll)
uint64_t m = 1;
l = 0;
if (sizeof(long long)==sizeof(uint64_t) && x >= 10000000000000000000ULL) {
// avoids overflow below
l = 20;
} else {
do {
l++;
m *= 10;
} while (x >= m);
}

// Add digits two at a time
j = l;
cp = s->s + s->l;
while (x >= 10) {
const char *d = &kputull_dig2r[2*(x%100)];
x /= 100;
memcpy(&cp[j-=2], d, 2);
}

// Last one (if necessary). We know that x < 10 by now.
if (j == 1)
cp[0] = x + '0';

s->l += l;
s->s[s->l] = 0;
return 0;
}

static inline int kputl(long c, kstring_t *s) {
Expand Down
81 changes: 81 additions & 0 deletions test/test_kstring.c
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,84 @@ static int test_kputw(int64_t start, int64_t end) {
return 0;
}

static int test_kputll_from_to(kstring_t *str, long long s, long long e) {
long long i = s;

for (;;) {
str->l = 0;
memset(str->s, 0xff, str->m);
if (kputll(i, str) < 0 || !str->s) {
perror("kputll");
return -1;
}
if (str->l >= str->m || str->s[str->l] != '\0') {
fprintf(stderr, "No NUL termination on string from kputll\n");
return -1;
}
if (i != strtoll(str->s, NULL, 10)) {
fprintf(stderr,
"kputll wrote the wrong value, expected %lld, got %s\n",
i, str->s);
return -1;
}
if (i >= e) break;
i++;
}
return 0;
}

static int test_kputll(long long start, long long end) {
kstring_t str = { 0, 0, NULL };
unsigned long long val;

str.s = malloc(2);
if (!str.s) {
perror("malloc");
return -1;
}
str.m = 2;

for (val = 1; val < INT64_MAX-5; val *= 10) {
if (test_kputll_from_to(&str, val >= 5 ? val - 5 : val, val) < 0) {
free(ks_release(&str));
return -1;
}
}

for (val = 1; val < INT64_MAX-5; val *= 10) {
long long valm = -val;
if (test_kputll_from_to(&str, valm >= 5 ? valm - 5 : valm, valm) < 0) {
free(ks_release(&str));
return -1;
}
}

if (test_kputll_from_to(&str, INT64_MAX - 5, INT64_MAX) < 0) {
free(ks_release(&str));
return -1;
}

if (test_kputll_from_to(&str, INT64_MIN, INT64_MIN + 5) < 0) {
free(ks_release(&str));
return -1;
}

str.m = 1; // Force a resize
int64_t start2 = (int64_t)start; // no larger on our platforms
int64_t end2 = (int64_t)end;
clamp(&start2, INT64_MIN, INT64_MAX);
clamp(&end2, INT64_MIN, INT64_MAX);

if (test_kputll_from_to(&str, start, end) < 0) {
free(ks_release(&str));
return -1;
}

free(ks_release(&str));

return 0;
}

// callback used by test_kgetline
static char *mock_fgets(char *str, int num, void *p) {
int *mock_state = (int*)p;
Expand Down Expand Up @@ -413,6 +491,9 @@ int main(int argc, char **argv) {
if (!test || strcmp(test, "kputw") == 0)
if (test_kputw(start, end) != 0) res = EXIT_FAILURE;

if (!test || strcmp(test, "kputll") == 0)
if (test_kputll(start, end) != 0) res = EXIT_FAILURE;

if (!test || strcmp(test, "kgetline") == 0)
if (test_kgetline() != 0) res = EXIT_FAILURE;

Expand Down