From 8cdc477eebf2e1cf65642d81a841fa32e279369e Mon Sep 17 00:00:00 2001 From: Hasindu Gamaarachchi Date: Sun, 4 Aug 2024 20:33:25 +1000 Subject: [PATCH] update slow5lib and workflows --- .github/workflows/c-cpp.yml | 38 +++- .github/workflows/release-simulation.yml | 3 + .github/workflows/release.yml | 3 + slow5lib/Makefile | 4 +- slow5lib/include/slow5/slow5.h | 4 + slow5lib/include/slow5/slow5_defs.h | 2 +- slow5lib/include/slow5/slow5_mt.h | 2 - slow5lib/src/slow5.c | 94 ++++++---- slow5lib/src/slow5_byte.h | 229 +++++++++++++++++++++++ slow5lib/src/slow5_idx.c | 28 ++- slow5lib/src/slow5_idx.h | 4 +- slow5lib/src/slow5_press.c | 10 + 12 files changed, 371 insertions(+), 50 deletions(-) create mode 100644 slow5lib/src/slow5_byte.h diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index 766a49f..4129595 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -6,6 +6,9 @@ on: pull_request: branches: [ '*' ] +env: + ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true + jobs: ubuntu_14: name: ubuntu_14 @@ -98,9 +101,38 @@ jobs: run: make CC=icc -j8 - name: test run: make test - os_x_11: - name: OSX 11 - runs-on: macos-11 + ubuntu_24: + name: Ubuntu 24 + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@v2 + - name: install packages + run: sudo apt-get update && sudo apt-get install zlib1g-dev + - name: build + run: make -j8 + - name: test + run: make test + os_x_12: + name: OSX 12 + runs-on: macos-12 + steps: + - uses: actions/checkout@v2 + - name: build + run: make -j8 + - name: test + run: make test + os_x_13: + name: OSX 13 + runs-on: macos-13 + steps: + - uses: actions/checkout@v2 + - name: build + run: make -j8 + - name: test + run: make test + os_x_14: + name: OSX 14 + runs-on: macos-14 steps: - uses: actions/checkout@v2 - name: build diff --git a/.github/workflows/release-simulation.yml b/.github/workflows/release-simulation.yml index 7d0c6b1..1d7ec28 100644 --- a/.github/workflows/release-simulation.yml +++ b/.github/workflows/release-simulation.yml @@ -7,6 +7,9 @@ on: pull_request: branches: [ dev ] +env: + ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true + jobs: ubuntu_14: name: ubuntu_14 diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 16db86f..4c6e74e 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -6,6 +6,9 @@ on: tags: - "v*" +env: + ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true + jobs: ubuntu_14: name: ubuntu_14 diff --git a/slow5lib/Makefile b/slow5lib/Makefile index c1913b5..7e7d95f 100644 --- a/slow5lib/Makefile +++ b/slow5lib/Makefile @@ -55,10 +55,10 @@ $(SHAREDLIB): $(OBJ) $(SVBLIB) $(SVBLIB): make -C $(SVB) no_simd=$(no_simd) libstreamvbyte.a -$(BUILD_DIR)/slow5.o: src/slow5.c src/slow5_extra.h src/slow5_idx.h src/slow5_misc.h src/klib/ksort.h $(SLOW5_H) +$(BUILD_DIR)/slow5.o: src/slow5.c src/slow5_extra.h src/slow5_idx.h src/slow5_misc.h src/klib/ksort.h src/slow5_byte.h $(SLOW5_H) $(CC) $(CFLAGS) $(CPPFLAGS) $< -c -fpic -o $@ -$(BUILD_DIR)/slow5_idx.o: src/slow5_idx.c src/slow5_idx.h src/slow5_extra.h src/slow5_misc.h $(SLOW5_H) +$(BUILD_DIR)/slow5_idx.o: src/slow5_idx.c src/slow5_idx.h src/slow5_extra.h src/slow5_misc.h src/slow5_byte.h $(SLOW5_H) $(CC) $(CFLAGS) $(CPPFLAGS) $< -c -fpic -o $@ $(BUILD_DIR)/slow5_misc.o: src/slow5_misc.c src/slow5_misc.h include/slow5/slow5_error.h diff --git a/slow5lib/include/slow5/slow5.h b/slow5lib/include/slow5/slow5.h index b6ee89e..dc94486 100755 --- a/slow5lib/include/slow5/slow5.h +++ b/slow5lib/include/slow5/slow5.h @@ -624,6 +624,10 @@ void slow5_set_log_level(enum slow5_log_level_opt log_level); //sets a global variable, so not thread safe void slow5_set_exit_condition(enum slow5_exit_condition_opt exit_condition); +//experimental +/* no error messages printed and not exit when a requested read ID is not found in index*/ +// being tested, do not use until added to documentation +void slow5_set_skip_rid(); //get the list of hdr data keys in sorted order (only the returned pointer must be freed, not the ones inside - subjet to change) //len is the numberof elements diff --git a/slow5lib/include/slow5/slow5_defs.h b/slow5lib/include/slow5/slow5_defs.h index 845df54..8fb27e6 100644 --- a/slow5lib/include/slow5/slow5_defs.h +++ b/slow5lib/include/slow5/slow5_defs.h @@ -44,7 +44,7 @@ The API documentation is available at https://hasindu2008.github.io/slow5tools/ */ // library version -#define SLOW5_LIB_VERSION "1.2.0-beta" +#define SLOW5_LIB_VERSION "1.2.0" // maximum file version supported by this library - independent of slow5 library version above // if updating change all 4 below diff --git a/slow5lib/include/slow5/slow5_mt.h b/slow5lib/include/slow5/slow5_mt.h index 51c66be..4fc37df 100644 --- a/slow5lib/include/slow5/slow5_mt.h +++ b/slow5lib/include/slow5/slow5_mt.h @@ -12,8 +12,6 @@ extern "C" { *** Easy Multi-thread API ************************************************************************* **************************************************************************************************/ -/**************** This API is still in beta stage, there could be bugs *************************/ - /* This is a easy multi-thread API that can fetch a batch of slow5 records using multiple threads in parallel. This API uses a fork-join thread model. It is not meant to be used by a programmer who has the expertise to write multi-threaded code and use the slow5 low-level API directly. diff --git a/slow5lib/src/slow5.c b/slow5lib/src/slow5.c index 6a57948..79bfcea 100644 --- a/slow5lib/src/slow5.c +++ b/slow5lib/src/slow5.c @@ -41,9 +41,9 @@ SOFTWARE. #include "slow5_extra.h" #include "slow5_idx.h" #include "slow5_misc.h" +#include "slow5_byte.h" #include "klib/ksort.h" - /* IMPORTANT: The comments in this are NOT the API documentation The API documentation is available at https://hasindu2008.github.io/slow5tools/ The comments here are for internal use and do not rely on them. Open a GitHub issue for any questions. @@ -86,6 +86,8 @@ static inline slow5_file_t *slow5_open_append(const char *filename, enum slow5_ enum slow5_log_level_opt slow5_log_level = SLOW5_LOG_INFO; enum slow5_exit_condition_opt slow5_exit_condition = SLOW5_EXIT_OFF; +int8_t slow5_bigend = 0; +int8_t slow5_skip_rid = 0; __thread int slow5_errno_intern = 0; @@ -207,7 +209,6 @@ struct slow5_file *slow5_init(FILE *fp, const char *pathname, enum slow5_fmt for * allocate memory for header, SLOW5 file structure and populate file number and offset * * errors - * SLOW5_ERR_OTH big endian machine * SLOW5_ERR_ARG fp is NULL * SLOW5_ERR_UNK format could not be determined from extension * SLOW5_ERR_MEM memory allocation failed @@ -216,9 +217,10 @@ struct slow5_file *slow5_init(FILE *fp, const char *pathname, enum slow5_fmt for struct slow5_file *slow5_init_empty(FILE *fp, const char *pathname, enum slow5_fmt format) { if (slow5_is_big_endian()) { - SLOW5_ERROR("%s","Big endian machine detected. slow5lib only support little endian at this time. Please open a github issue stating your machine spec ."); - slow5_errno = SLOW5_ERR_OTH; - return NULL; + SLOW5_WARNING("%s","Big endian machine detected and the support is experimental. Report issues at ."); + slow5_bigend = 1; + //slow5_errno = SLOW5_ERR_OTH; + //return NULL; } // pathname allowed to be NULL at this point if (!fp) { @@ -304,7 +306,6 @@ struct slow5_file *slow5_open(const char *pathname, const char *mode) { * slow5_close() should be called when finished with the structure. * * On error, NULL is returned and slow5_errno is set to indicate the error. - * SLOW5_ERR_OTH Big endian is not supported. * SLOW5_ERR_ARG The pathname or mode provided was NULL. * SLOW5_ERR_IO The file could not be opened. See errno for details. * slow5_init errors @@ -318,9 +319,10 @@ struct slow5_file *slow5_open(const char *pathname, const char *mode) { */ struct slow5_file *slow5_open_with(const char *pathname, const char *mode, enum slow5_fmt format) { if (slow5_is_big_endian()) { - SLOW5_ERROR_EXIT("%s", "Big endian machine detected. slow5lib only supports little endian at this time. Please open a github issue stating your machine spec ."); - slow5_errno = SLOW5_ERR_OTH; - return NULL; + SLOW5_WARNING("%s","Big endian machine detected and the support is experimental. Report issues at ."); + slow5_bigend = 1; + //slow5_errno = SLOW5_ERR_OTH; + //return NULL; } if (!pathname || !mode) { if (!pathname) { @@ -818,7 +820,7 @@ struct slow5_hdr *slow5_hdr_init(FILE *fp, enum slow5_fmt format, slow5_press_me } else if (fread(&record_method, sizeof record_method, 1, fp) != 1) { SLOW5_ERROR("Malformed blow5 header. Failed to read the record compression method.%s", feof(fp) ? " EOF reached." : ""); goto err_fread; - } else if (fread(&header->num_read_groups, sizeof header->num_read_groups, 1, fp) != 1) { + } else if (SLOW5_FREAD(&header->num_read_groups, sizeof header->num_read_groups, 1, fp) != 1) { SLOW5_ERROR("Malformed blow5 header. Failed to read the number of read groups.%s", feof(fp) ? " EOF reached." : ""); goto err_fread; } else if (slow5_signal_press_version_cmp(header->version) >= 0 && fread(&signal_method, sizeof signal_method, 1, fp) != 1) { @@ -829,7 +831,7 @@ struct slow5_hdr *slow5_hdr_init(FILE *fp, enum slow5_fmt format, slow5_press_me free(header); slow5_errno = SLOW5_ERR_IO; return NULL; - } else if (fread(&header_size, sizeof header_size, 1, fp) != 1) { + } else if (SLOW5_FREAD(&header_size, sizeof header_size, 1, fp) != 1) { SLOW5_ERROR("Malformed blow5 header. Failed to read the ascii header size.%s", feof(fp) ? " EOF reached." : ""); goto err_fread; } @@ -1000,7 +1002,7 @@ void *slow5_hdr_to_mem(struct slow5_hdr *header, enum slow5_fmt format, slow5_pr len += sizeof version->patch; memcpy(mem + len, &record_comp, sizeof record_comp); len += sizeof record_comp; - memcpy(mem + len, &header->num_read_groups, sizeof header->num_read_groups); + SLOW5_MEMCPY(mem + len, &header->num_read_groups, sizeof header->num_read_groups); len += sizeof header->num_read_groups; memcpy(mem + len, &signal_comp, sizeof signal_comp); len += sizeof signal_comp; @@ -1144,7 +1146,7 @@ void *slow5_hdr_to_mem(struct slow5_hdr *header, enum slow5_fmt format, slow5_pr mem[len] = '\0'; } else if (format == SLOW5_FORMAT_BINARY) { //write the header size in bytes (which was skipped previously) header_size = len - (SLOW5_BINARY_HDR_SIZE_OFFSET + sizeof header_size); - memcpy(mem + SLOW5_BINARY_HDR_SIZE_OFFSET, &header_size, sizeof header_size); + SLOW5_MEMCPY(mem + SLOW5_BINARY_HDR_SIZE_OFFSET, &header_size, sizeof header_size); } if (n != NULL) { @@ -2522,6 +2524,9 @@ int slow5_get(const char *read_id, struct slow5_rec **read, struct slow5_file *s size_t bytes; char *mem; if (!(mem = (char *)slow5_get_mem(read_id, &bytes, s5p))) { + if(slow5_errno == SLOW5_ERR_NOTFOUND && slow5_skip_rid == 1){ + return slow5_errno; + } SLOW5_EXIT_IF_ON_ERR(); return slow5_errno; } @@ -2816,7 +2821,7 @@ int slow5_rec_parse(char *read_mem, size_t read_size, const char *read_id, struc case COL_read_id: size = sizeof read->read_id_len; - memcpy(&read->read_id_len, read_mem + offset, size); + SLOW5_MEMCPY(&read->read_id_len, read_mem + offset, size); offset += size; size = read->read_id_len * sizeof *read->read_id; @@ -2842,37 +2847,37 @@ int slow5_rec_parse(char *read_mem, size_t read_size, const char *read_id, struc case COL_read_group: size = sizeof read->read_group; - memcpy(&read->read_group, read_mem + offset, size); + SLOW5_MEMCPY(&read->read_group, read_mem + offset, size); offset += size; break; case COL_digitisation: size = sizeof read->digitisation; - memcpy(&read->digitisation, read_mem + offset, size); + SLOW5_MEMCPY(&read->digitisation, read_mem + offset, size); offset += size; break; case COL_offset: size = sizeof read->offset; - memcpy(&read->offset, read_mem + offset, size); + SLOW5_MEMCPY(&read->offset, read_mem + offset, size); offset += size; break; case COL_range: size = sizeof read->range; - memcpy(&read->range, read_mem + offset, size); + SLOW5_MEMCPY(&read->range, read_mem + offset, size); offset += size; break; case COL_sampling_rate: size = sizeof read->sampling_rate; - memcpy(&read->sampling_rate, read_mem + offset, size); + SLOW5_MEMCPY(&read->sampling_rate, read_mem + offset, size); offset += size; break; case COL_len_raw_signal: size = sizeof read->len_raw_signal; - memcpy(&read->len_raw_signal, read_mem + offset, size); + SLOW5_MEMCPY(&read->len_raw_signal, read_mem + offset, size); offset += size; break; @@ -2902,6 +2907,7 @@ int slow5_rec_parse(char *read_mem, size_t read_size, const char *read_id, struc } memcpy(read->raw_signal, read_mem + offset, size); + SLOW5_BYTE_SWAP_ARRAY(read->raw_signal, read->len_raw_signal); offset += size; if (signal_method != SLOW5_COMPRESS_NONE) { @@ -3094,7 +3100,7 @@ static int slow5_rec_aux_parse(char *tok, char *read_mem, uint64_t offset, size_ if (SLOW5_IS_PTR(aux_meta->types[i])) { /* Type is an array */ size = sizeof len; - memcpy(&len, read_mem + offset, size); + SLOW5_MEMCPY(&len, read_mem + offset, size); offset += size; } @@ -3121,6 +3127,15 @@ static int slow5_rec_aux_parse(char *tok, char *read_mem, uint64_t offset, size_ return -1; } memcpy(data, read_mem + offset, bytes); + if(slow5_bigend){ + if(len == 1){ + SLOW5_BYTE_SWAP_VOID(data,bytes); + } else if (len > 1 && bytes/len==2){ + SLOW5_BYTE_SWAP_ARRAY_VOID(data, 2, len); + } else if (len > 1 && bytes/len>2){ + SLOW5_ENDIAN_ERROR("multi-byte array"); + } + } offset += bytes; } @@ -3243,6 +3258,7 @@ void *slow5_get_next_mem(size_t *n, const struct slow5_file *s5p) { } goto err; } + SLOW5_BYTE_SWAP(&bytes_tmp); bytes = bytes_tmp; mem = (char *) malloc(bytes); @@ -3936,19 +3952,19 @@ void *slow5_rec_to_mem(struct slow5_rec *read, struct slow5_aux_meta *aux_meta, mem = (char *) malloc(cap * sizeof *mem); SLOW5_MALLOC_CHK(mem); - memcpy(mem + curr_len, &read->read_id_len, sizeof read->read_id_len); + SLOW5_MEMCPY(mem + curr_len, &read->read_id_len, sizeof read->read_id_len); curr_len += sizeof read->read_id_len; memcpy(mem + curr_len, read->read_id, read->read_id_len * sizeof *read->read_id); curr_len += read->read_id_len * sizeof *read->read_id; - memcpy(mem + curr_len, &read->read_group, sizeof read->read_group); + SLOW5_MEMCPY(mem + curr_len, &read->read_group, sizeof read->read_group); curr_len += sizeof read->read_group; - memcpy(mem + curr_len, &read->digitisation, sizeof read->digitisation); + SLOW5_MEMCPY(mem + curr_len, &read->digitisation, sizeof read->digitisation); curr_len += sizeof read->digitisation; - memcpy(mem + curr_len, &read->offset, sizeof read->offset); + SLOW5_MEMCPY(mem + curr_len, &read->offset, sizeof read->offset); curr_len += sizeof read->offset; - memcpy(mem + curr_len, &read->range, sizeof read->range); + SLOW5_MEMCPY(mem + curr_len, &read->range, sizeof read->range); curr_len += sizeof read->range; - memcpy(mem + curr_len, &read->sampling_rate, sizeof read->sampling_rate); + SLOW5_MEMCPY(mem + curr_len, &read->sampling_rate, sizeof read->sampling_rate); curr_len += sizeof read->sampling_rate; size_t bytes_raw_sig = read->len_raw_signal * sizeof *read->raw_signal; @@ -3968,9 +3984,9 @@ void *slow5_rec_to_mem(struct slow5_rec *read, struct slow5_aux_meta *aux_meta, read->raw_signal = (int16_t *) raw_sig_svb; } - memcpy(mem + curr_len, &read->len_raw_signal, sizeof read->len_raw_signal); + SLOW5_MEMCPY(mem + curr_len, &read->len_raw_signal, sizeof read->len_raw_signal); curr_len += sizeof read->len_raw_signal; - memcpy(mem + curr_len, read->raw_signal, bytes_raw_sig); + memcpy(mem + curr_len, read->raw_signal, bytes_raw_sig); SLOW5_BYTE_SWAP_ARRAY_VOID(mem + curr_len, sizeof *read->raw_signal, read->len_raw_signal); curr_len += bytes_raw_sig; // Auxiliary fields @@ -3998,8 +4014,7 @@ void *slow5_rec_to_mem(struct slow5_rec *read, struct slow5_aux_meta *aux_meta, mem = (char *) realloc(mem, cap); SLOW5_MALLOC_CHK(mem); } - - memcpy(mem + curr_len, &aux_data.len, sizeof aux_data.len); + SLOW5_MEMCPY(mem + curr_len, &aux_data.len, sizeof aux_data.len); curr_len += sizeof aux_data.len; } else if (curr_len + aux_data.bytes >= cap) { // Realloc if necessary @@ -4010,6 +4025,15 @@ void *slow5_rec_to_mem(struct slow5_rec *read, struct slow5_aux_meta *aux_meta, if (aux_data.len != 0) { memcpy(mem + curr_len, aux_data.data, aux_data.bytes); + if(slow5_bigend){ + if(aux_data.len==1){ + SLOW5_BYTE_SWAP_VOID(mem + curr_len, aux_data.bytes); + } else if (aux_data.len > 1 && aux_data.bytes/aux_data.len==2) { + SLOW5_BYTE_SWAP_ARRAY_VOID(mem + curr_len, 2, aux_data.len); + } else if (aux_data.len > 1 && aux_data.bytes/aux_data.len>2) { + SLOW5_ENDIAN_ERROR("multi-byte array"); + } + } } curr_len += aux_data.bytes; @@ -4031,7 +4055,7 @@ void *slow5_rec_to_mem(struct slow5_rec *read, struct slow5_aux_meta *aux_meta, uint8_t *comp_mem_full = (uint8_t *) malloc(sizeof record_size + record_size); SLOW5_MALLOC_CHK(comp_mem_full); // Copy size of compressed record - memcpy(comp_mem_full, &record_size, sizeof record_size); + SLOW5_MEMCPY(comp_mem_full, &record_size, sizeof record_size); // Copy compressed record memcpy(comp_mem_full + sizeof record_size, comp_mem, record_size); free(comp_mem); @@ -4604,6 +4628,12 @@ void slow5_set_exit_condition(enum slow5_exit_condition_opt exit_condition) { slow5_exit_condition = exit_condition; } +/* no error messages printed and not exit when a requested read ID is not found in index*/ +// being tested, do not use until added to documentation +void slow5_set_skip_rid(){ + slow5_skip_rid = 1; +} + /* * is slow5 file at end? seek back, read and find out * return 0 if not at end and file pointer left unchanged diff --git a/slow5lib/src/slow5_byte.h b/slow5lib/src/slow5_byte.h new file mode 100644 index 0000000..624fdde --- /dev/null +++ b/slow5lib/src/slow5_byte.h @@ -0,0 +1,229 @@ +/** + * @file slow5_byte.h + * @brief SLOW5 byte handling functions + * @author Hasindu Gamaarachchi (hasindu@garvan.org.au) + * @date 05/05/2024 + */ + +/* +MIT License + +Copyright (c) 2020 Hasindu Gamaarachchi + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + + +#ifndef SLOW5_BYTE_H +#define SLOW5_BYTE_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif /* _cplusplus */ + +/* This is for internal use only - do not use any of the following directly*/ + + +static inline void slow5_byte_swap(void *dest, const void *src, size_t size){ + if(size==2){ + + // uint16_t tmp = ((uint16_t*)src)[0]; + // tmp = (tmp << 8) | (tmp >> 8); + // ((uint16_t*)dest)[0] = tmp; + + char tmp0 = ((char*)src)[0]; + char tmp1 = ((char*)src)[1]; + ((char*)dest)[0] = tmp1; + ((char*)dest)[1] = tmp0; + + } else if(size==4){ + + // uint32_t tmp = ((uint32_t*)src)[0]; + // tmp = (tmp >> 24) | ((tmp << 8) & 0x00FF0000) | ((tmp >> 8) & 0x0000FF00) | (tmp << 24); + // ((uint32_t*)dest)[0] = tmp; + + char tmp0 = ((char*)src)[0]; + char tmp1 = ((char*)src)[1]; + char tmp2 = ((char*)src)[2]; + char tmp3 = ((char*)src)[3]; + ((char*)dest)[0] = tmp3; + ((char*)dest)[1] = tmp2; + ((char*)dest)[2] = tmp1; + ((char*)dest)[3] = tmp0; + + } else if(size==8){ + + // uint64_t tmp = ((uint64_t*)src)[0]; + // tmp = (tmp >> 56) | ((tmp << 40) & 0x00FF000000000000) | ((tmp << 24) & 0x0000FF0000000000) | + // ((tmp << 8) & 0x000000FF00000000) | ((tmp >> 8) & 0x00000000FF000000) | + // ((tmp >> 24) & 0x0000000000FF0000) | ((tmp >> 40) & 0x000000000000FF00) | (tmp << 56); + // ((uint64_t*)dest)[0] = tmp; + + char tmp0 = ((char*)src)[0]; + char tmp1 = ((char*)src)[1]; + char tmp2 = ((char*)src)[2]; + char tmp3 = ((char*)src)[3]; + char tmp4 = ((char*)src)[4]; + char tmp5 = ((char*)src)[5]; + char tmp6 = ((char*)src)[6]; + char tmp7 = ((char*)src)[7]; + ((char*)dest)[0] = tmp7; + ((char*)dest)[1] = tmp6; + ((char*)dest)[2] = tmp5; + ((char*)dest)[3] = tmp4; + ((char*)dest)[4] = tmp3; + ((char*)dest)[5] = tmp2; + ((char*)dest)[6] = tmp1; + ((char*)dest)[7] = tmp0; + + } +} + + +static inline int slow5_fwrite_bigend(const void *ptr, size_t size, size_t nitems, FILE *stream) { + + if(!(size==1 || size==2 || size==4 || size==8)){ + fprintf(stderr,"ERROR: slow5_fwrite_bigend is only implemented for data types of size %zu\n",size); + return -1; + } + if(nitems!=1){ + fprintf(stderr,"ERROR: slow5_fwrite_bigend is only implemented for nitems=1\n"); + return -1; + } + + if(size == 1){ + return fwrite(ptr, size, nitems, stream); + } else { + void *ptr_copy = malloc(size*nitems); + if(ptr_copy==NULL){ + fprintf(stderr,"[%s::ERROR]\033[1;31m malloc failled\033[0m\n", __func__); + fprintf(stderr,"At %s:%d\n", __FILE__, __LINE__-2); + return -1; + } + slow5_byte_swap(ptr_copy, ptr, size); + int ret = fwrite(ptr_copy, size, nitems, stream); + free(ptr_copy); + return ret; + } +} + +static inline int slow5_fread_bigend(void *ptr, size_t size, size_t nitems, FILE *stream) { + + if(!(size==1 || size==2 || size==4 || size==8)){ + fprintf(stderr,"[%s::ERROR]\033[1;31m slow5_fwrite_bigend is only implemented for data types of size %zu\033[0m\n",__func__,size); + fprintf(stderr,"At %s:%d\n", __FILE__, __LINE__-2); + return -1; + } + if(nitems!=1){ + fprintf(stderr,"[%s::ERROR]\033[1;31m slow5_fwrite_bigend is only implemented for nitems=1\033[0m\n", __func__); + fprintf(stderr,"At %s:%d\n", __FILE__, __LINE__-2); + return -1; + } + + if(size == 1){ + return fread(ptr, size, nitems, stream); + } else { + void *ptr_copy = malloc(size*nitems); + if(ptr_copy==NULL){ + fprintf(stderr,"[%s::ERROR]\033[1;31m malloc failled\033[0m\n", __func__); + fprintf(stderr,"At %s:%d\n", __FILE__, __LINE__-2); + return -1; + } + size_t ret = fread(ptr_copy, size, nitems, stream); + slow5_byte_swap(ptr, ptr_copy, size); + free(ptr_copy); + return ret; + } +} + +#define SLOW5_FWRITE(ptr, size, nitems, stream) \ + ( (slow5_bigend) ? (slow5_fwrite_bigend((ptr), (size), (nitems), (stream))) : (fwrite((ptr), (size), (nitems), (stream))) ) + +#define SLOW5_FREAD(ptr, size, nitems, stream) \ + ( (slow5_bigend) ? (slow5_fread_bigend((ptr), (size), (nitems), (stream))) : (fread((ptr), (size), (nitems), (stream))) ) + +static inline void *slow5_memcpy_bigend(void *dest, const void * src, size_t size) { + if(!(size==1 || size==2 || size==4 || size==8)){ + fprintf(stderr,"[%s::ERROR]\033[1;31m slow5_fwrite_bigend is only implemented for data types of size %zu\033[0m\n",__func__,size); + fprintf(stderr,"At %s:%d\n", __FILE__, __LINE__-2); + exit(EXIT_FAILURE); + } + slow5_byte_swap((dest), (src), (size)); + return (dest); +} + +#define SLOW5_MEMCPY(dest, src, size) \ + ( (slow5_bigend) ? (slow5_memcpy_bigend((dest), (src), (size))) : (memcpy((dest), (src), (size))) ) + + +#define SLOW5_BYTE_SWAP(ptr) \ + if(slow5_bigend){ \ + int size = sizeof(*(ptr)); \ + slow5_byte_swap((ptr), (ptr), size); \ + } + +#define SLOW5_BYTE_SWAP_VOID(ptr,size) \ + if(slow5_bigend){ \ + slow5_byte_swap((ptr), (ptr), size); \ + } + +#define SLOW5_BYTE_SWAP_ARRAY(ptr, nitems) \ + if(slow5_bigend){ \ + int size = sizeof(*(ptr)); \ + for(size_t i=0; i<(nitems); i++){ \ + slow5_byte_swap((ptr)+i, (ptr)+i, size); \ + } \ + } + +#define SLOW5_BYTE_SWAP_ARRAY_VOID(ptr, size, nitems) \ + if(slow5_bigend){ \ + if((size)!=2){ \ + fprintf(stderr,"[%s::ERROR]\033[1;31m Big Endian is not supported for the feature. Open an issue please.\033[0m\n", __func__); \ + fprintf(stderr,"At %s:%d\n", __FILE__, __LINE__-2); \ + exit(EXIT_FAILURE); \ + } \ + for(int64_t i=0; i<(nitems); i++){ \ + char *thisptr = ((char *)(ptr))+i*(size); \ + slow5_byte_swap(thisptr, thisptr, (size)); \ + } \ + } + + +#define SLOW5_ENDIAN_ERROR(MSG) \ + if(slow5_bigend){ \ + fprintf(stderr,"[%s::ERROR]\033[1;31m Big Endian is not supported for this" MSG "feature. Open an issue please.\033[0m\n", __func__); \ + fprintf(stderr,"At %s:%d\n", __FILE__, __LINE__-2); \ + exit(EXIT_FAILURE); \ + }\ + +#define SLOW5_ENDIAN_WARN \ + if(slow5_bigend){ \ + fprintf(stderr,"[%s::ERROR]\033[1;31m Big Endian is not supported for this multi-byte array feature. Don't trust any results. Open an issue please.\033[0m\n", __func__); \ + fprintf(stderr,"At %s:%d\n", __FILE__, __LINE__); \ + }\ + + +#ifdef __cplusplus +} +#endif /* _cplusplus */ + +#endif /* slow5_byte.h */ diff --git a/slow5lib/src/slow5_idx.c b/slow5lib/src/slow5_idx.c index d9aa270..be389e2 100644 --- a/slow5lib/src/slow5_idx.c +++ b/slow5lib/src/slow5_idx.c @@ -6,10 +6,13 @@ //#include "slow5.h" #include "slow5_extra.h" #include "slow5_misc.h" +#include "slow5_byte.h" //#include "slow5_error.h" extern enum slow5_log_level_opt slow5_log_level; extern enum slow5_exit_condition_opt slow5_exit_condition; +extern int8_t slow5_bigend; +extern int8_t slow5_skip_rid; #define BUF_INIT_CAP (20*1024*1024) #define SLOW5_INDEX_BUF_INIT_CAP (64) // 2^6 TODO is this too little? @@ -270,6 +273,7 @@ static int slow5_idx_build(struct slow5_idx *index, struct slow5_file *s5p) { } return slow5_errno; } + SLOW5_BYTE_SWAP(&record_size); //if bigendian size = sizeof record_size + record_size; @@ -388,10 +392,16 @@ int slow5_idx_write(struct slow5_idx *index, struct slow5_version version) { struct slow5_rec_idx read_index = kh_value(index->hash, pos); slow5_rid_len_t read_id_len = strlen(index->ids[i]); - if (fwrite(&read_id_len, sizeof read_id_len, 1, index->fp) != 1 || - fwrite(index->ids[i], sizeof *index->ids[i], read_id_len, index->fp) != read_id_len || - fwrite(&read_index.offset, sizeof read_index.offset, 1, index->fp) != 1 || - fwrite(&read_index.size, sizeof read_index.size, 1, index->fp) != 1) { + if (SLOW5_FWRITE(&read_id_len, sizeof read_id_len, 1, index->fp) != 1){ + return SLOW5_ERR_IO; + } + if (fwrite(index->ids[i], sizeof *index->ids[i], read_id_len, index->fp) != read_id_len){ + return SLOW5_ERR_IO; + } + if (SLOW5_FWRITE(&read_index.offset, sizeof read_index.offset, 1, index->fp) != 1 ){ + return SLOW5_ERR_IO; + } + if (SLOW5_FWRITE(&read_index.size, sizeof read_index.size, 1, index->fp) != 1) { return SLOW5_ERR_IO; } } @@ -434,7 +444,7 @@ int slow5_idx_read(struct slow5_idx *index) { while (1) { slow5_rid_len_t read_id_len; - if (fread(&read_id_len, sizeof read_id_len, 1, index->fp) != 1) { + if (SLOW5_FREAD(&read_id_len, sizeof read_id_len, 1, index->fp) != 1) { SLOW5_ERROR("Malformed slow5 index. Failed to read the read ID length.%s", feof(index->fp) ? " Missing index end of file marker." : ""); if (feof(index->fp)) { slow5_errno = SLOW5_ERR_TRUNC; @@ -472,8 +482,8 @@ int slow5_idx_read(struct slow5_idx *index) { uint64_t offset; uint64_t size; - if (fread(&offset, sizeof offset, 1, index->fp) != 1 || - fread(&size, sizeof size, 1, index->fp) != 1) { + if (SLOW5_FREAD(&offset, sizeof offset, 1, index->fp) != 1 || + SLOW5_FREAD(&size, sizeof size, 1, index->fp) != 1) { return SLOW5_ERR_IO; } @@ -526,7 +536,9 @@ int slow5_idx_get(struct slow5_idx *index, const char *read_id, struct slow5_rec khint_t pos = kh_get(slow5_s2i, index->hash, read_id); if (pos == kh_end(index->hash)) { - SLOW5_ERROR("Read ID '%s' was not found.", read_id) + if (slow5_skip_rid == 0) { + SLOW5_ERROR("Read ID '%s' was not found.", read_id) + } ret = -1; } else if (read_index) { *read_index = kh_value(index->hash, pos); diff --git a/slow5lib/src/slow5_idx.h b/slow5lib/src/slow5_idx.h index 05a328e..77d9d6a 100644 --- a/slow5lib/src/slow5_idx.h +++ b/slow5lib/src/slow5_idx.h @@ -11,8 +11,8 @@ extern "C" { #endif /* -IMPORTANT: The low-level API is not yet finalised or documented and is only for internal use. -If anyone is interested, please open a GitHub issue, rather than trying to figure out from the code. +IMPORTANT: These are functions for internal use. +If anyone is interested in getting any of these functions exposed, please open a GitHub issue. Function prototypes can be changed without notice or completely removed. So do NOT use these functions in your code. these functions are used by slow5tools and pyslow5 - so any change to a function here means slow5tools and pyslow5 must be fixed. */ diff --git a/slow5lib/src/slow5_press.c b/slow5lib/src/slow5_press.c index adef868..7f5d200 100644 --- a/slow5lib/src/slow5_press.c +++ b/slow5lib/src/slow5_press.c @@ -17,6 +17,7 @@ extern enum slow5_log_level_opt slow5_log_level; extern enum slow5_exit_condition_opt slow5_exit_condition; +extern int8_t slow5_bigend; /* zlib */ static int zlib_init_deflate(z_stream *strm); @@ -1052,6 +1053,11 @@ static uint8_t *ptr_compress_svb(const uint32_t *ptr, size_t count, size_t *n) { /* return NULL on malloc error, n cannot be NULL */ static uint8_t *ptr_compress_svb_zd(const int16_t *ptr, size_t count, size_t *n) { + + if(slow5_bigend){ + SLOW5_ERROR_EXIT("%s","Compression of SVB-ZD on big-endian architectures is not supported yet."); + } + uint32_t length = count / sizeof *ptr; int32_t *in = (int32_t *) malloc(length * sizeof *in); if (!in) { @@ -1109,6 +1115,10 @@ static uint32_t *ptr_depress_svb(const uint8_t *ptr, size_t count, size_t *n) { /* return NULL on malloc error, n cannot be NULL */ static int16_t *ptr_depress_svb_zd(const uint8_t *ptr, size_t count, size_t *n) { + if(slow5_bigend){ + SLOW5_ERROR_EXIT("%s","Decompression of SVB-ZD on big-endian architectures is not supported yet."); + } + uint32_t *diff = ptr_depress_svb(ptr, count, n); if (!diff) { return NULL;