From f8d212d7799aba029269dd72113cec8f667139ad Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Fri, 3 Nov 2023 14:50:47 +0000 Subject: [PATCH 1/7] Statically compute jump length --- yt/frontends/ramses/io_utils.pyx | 56 +++++++++++++++++++------------- 1 file changed, 34 insertions(+), 22 deletions(-) diff --git a/yt/frontends/ramses/io_utils.pyx b/yt/frontends/ramses/io_utils.pyx index 9dc749e1400..751f8bcf096 100644 --- a/yt/frontends/ramses/io_utils.pyx +++ b/yt/frontends/ramses/io_utils.pyx @@ -18,6 +18,11 @@ cdef int INT32_SIZE = sizeof(np.int32_t) cdef int INT64_SIZE = sizeof(np.int64_t) cdef int DOUBLE_SIZE = sizeof(np.float64_t) + +cdef inline int skip_len(int Nskip, int record_len) noexcept nogil: + return Nskip * (record_len * DOUBLE_SIZE + INT64_SIZE) + + @cython.cpow(True) @cython.boundscheck(False) @cython.wraparound(False) @@ -29,10 +34,13 @@ def read_amr(FortranFile f, dict headers, cdef INT64_t ncpu, nboundary, max_level, nlevelmax, ncpu_and_bound cdef DOUBLE_t nx, ny, nz - cdef INT64_t ilevel, icpu, n, ndim, skip_len - cdef INT32_t ng, buffer_size + cdef INT64_t ilevel, icpu, n, ndim, jump_len + cdef INT32_t ng cdef np.ndarray[np.int32_t, ndim=2] numbl cdef np.ndarray[np.float64_t, ndim=2] pos + cdef int i + + cdef np.float64_t[::1, :] pos_view ndim = headers['ndim'] numbl = headers['numbl'] @@ -43,10 +51,12 @@ def read_amr(FortranFile f, dict headers, ncpu_and_bound = nboundary + ncpu - pos = np.empty((0, 3), dtype=np.float64) - buffer_size = 0 + # Allocate more memory if required + pos = np.empty((max(numbl.max(), ngridbound.max()), 3), dtype="d", order="F") + pos_view = pos + # Compute number of fields to skip. This should be 31 in 3 dimensions - skip_len = (1 # father index + jump_len = (1 # father index + 2*ndim # neighbor index + 2**ndim # son index + 2**ndim # cpu map @@ -54,6 +64,8 @@ def read_amr(FortranFile f, dict headers, ) # Initialize values max_level = 0 + cdef int record_len + for ilevel in range(nlevelmax): for icpu in range(ncpu_and_bound): if icpu < ncpu: @@ -63,21 +75,25 @@ def read_amr(FortranFile f, dict headers, if ng == 0: continue - # Skip grid index, 'next' and 'prev' arrays (they are used - # to build the linked list in RAMSES) - f.skip(3) - - # Allocate more memory if required - if ng > buffer_size: - pos = np.empty((ng, 3), dtype="d") - buffer_size = ng - - pos[:ng, 0] = f.read_vector("d") - nx - pos[:ng, 1] = f.read_vector("d") - ny - pos[:ng, 2] = f.read_vector("d") - nz + # Skip grid index, 'next' and 'prev' arrays and possibly + # records from previous iterations + record_len = (ng * INT32_SIZE + INT64_SIZE) + f.seek(record_len * 3, 1) + + f.read_vector_inplace("d", &pos_view[0, 0]) + f.read_vector_inplace("d", &pos_view[0, 1]) + f.read_vector_inplace("d", &pos_view[0, 2]) + + for i in range(ng): + pos_view[i, 0] -= nx + for i in range(ng): + pos_view[i, 1] -= ny + for i in range(ng): + pos_view[i, 2] -= nz # Skip father, neighbor, son, cpu map and refinement map - f.skip(skip_len) + f.seek(record_len * jump_len, 1) + # Note that we're adding *grids*, not individual cells. if ilevel >= min_level: n = oct_handler.add(icpu + 1, ilevel - min_level, pos[:ng, :], @@ -87,10 +103,6 @@ def read_amr(FortranFile f, dict headers, return max_level - -cdef inline int skip_len(int Nskip, int record_len) noexcept nogil: - return Nskip * (record_len * DOUBLE_SIZE + INT64_SIZE) - @cython.cpow(True) @cython.boundscheck(False) @cython.wraparound(False) From c87b49fad1f3097b277113f588b7a24bf5f397f0 Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Tue, 7 Nov 2023 22:50:04 +0100 Subject: [PATCH 2/7] Mask out levels that we won't read --- yt/frontends/ramses/data_structures.py | 14 ++++++------- yt/frontends/ramses/io_utils.pyx | 21 ++++++++++++++----- yt/geometry/oct_container.pxd | 14 ++++++------- yt/geometry/oct_container.pyx | 28 +++++++++++++------------- 4 files changed, 44 insertions(+), 33 deletions(-) diff --git a/yt/frontends/ramses/data_structures.py b/yt/frontends/ramses/data_structures.py index 3c44ecfa6d7..e0a7dd75f90 100644 --- a/yt/frontends/ramses/data_structures.py +++ b/yt/frontends/ramses/data_structures.py @@ -375,7 +375,7 @@ def _fill_no_ghostzones(self, fd, fields, selector, file_handler): data = {} cell_count = selector.count_oct_cells(self.oct_handler, self.domain_id) - levels, cell_inds, file_inds = self.oct_handler.file_index_octs( + level_inds, cell_inds, file_inds = self.oct_handler.file_index_octs( selector, self.domain_id, cell_count ) @@ -389,7 +389,7 @@ def _fill_no_ghostzones(self, fd, fields, selector, file_handler): file_handler.offset, file_handler.level_count, cpu_list, - levels, + level_inds, cell_inds, file_inds, ndim, @@ -418,13 +418,13 @@ def _fill_with_ghostzones( gz_cache = getattr(self, "_ghost_zone_cache", None) if gz_cache: - levels, cell_inds, file_inds, domains = gz_cache + level_inds, cell_inds, file_inds, domain_inds = gz_cache else: gz_cache = ( - levels, + level_inds, cell_inds, file_inds, - domains, + domain_inds, ) = self.oct_handler.file_index_octs_with_ghost_zones( selector, self.domain_id, cell_count, self._num_ghost_zones ) @@ -439,7 +439,7 @@ def _fill_with_ghostzones( file_handler.offset, file_handler.level_count, cpu_list, - levels, + level_inds, cell_inds, file_inds, ndim, @@ -447,7 +447,7 @@ def _fill_with_ghostzones( fields, tr, oct_handler, - domains=domains, + domains=domain_inds, ) return tr diff --git a/yt/frontends/ramses/io_utils.pyx b/yt/frontends/ramses/io_utils.pyx index 751f8bcf096..32f84628caf 100644 --- a/yt/frontends/ramses/io_utils.pyx +++ b/yt/frontends/ramses/io_utils.pyx @@ -163,13 +163,13 @@ def fill_hydro(FortranFile f, np.ndarray[np.int64_t, ndim=2] offsets, np.ndarray[np.int64_t, ndim=2] level_count, list cpu_enumerator, - np.ndarray[np.uint8_t, ndim=1] levels, + np.ndarray[np.uint8_t, ndim=1] level_inds, np.ndarray[np.uint8_t, ndim=1] cell_inds, np.ndarray[np.int64_t, ndim=1] file_inds, INT64_t ndim, list all_fields, list fields, dict tr, RAMSESOctreeContainer oct_handler, - np.ndarray[np.int32_t, ndim=1] domains=np.array([], dtype='int32')): + np.ndarray[np.int32_t, ndim=1] domain_inds=np.array([], dtype='int32')): cdef INT64_t offset cdef dict tmp cdef str field @@ -186,8 +186,9 @@ def fill_hydro(FortranFile f, cdef np.int64_t[::1] cpu_list = np.asarray(cpu_enumerator, dtype=np.int64) cdef np.int64_t[::1] jumps = np.zeros(nfields_selected + 1, dtype=np.int64) - cdef int jump_len + cdef int jump_len, Ncells cdef np.ndarray[np.float64_t, ndim=3] buffer + cdef np.uint8_t[::1] mask_level = np.zeros(nlevels, dtype=np.uint8) jump_len = 0 j = 0 @@ -202,8 +203,18 @@ def fill_hydro(FortranFile f, cdef int first_field_index = jumps[0] buffer = np.empty((level_count.max(), twotondim, nfields_selected), dtype="float64", order='F') + + Ncells = len(level_inds) + for i in range(Ncells): + mask_level[level_inds[i]] |= 1 + + # print("Reading hydro fields, unique levels=%s", np.asarray(levels_to_read)) + # selected_levels = range(levels.min(), levels.max()+1) + # selected_cpus = range(cpu_list.min(), cpu_list.max()+1) # Loop over levels for ilevel in range(nlevels): + if mask_level[ilevel] == 0: + continue # Loop over cpu domains for ii in range(ncpu_selected): icpu = cpu_list[ii] @@ -242,7 +253,7 @@ def fill_hydro(FortranFile f, if ncpu_selected > 1: oct_handler.fill_level_with_domain( - ilevel, levels, cell_inds, file_inds, domains, tr, tmp, domain=icpu+1) + ilevel, level_inds, cell_inds, file_inds, domain_inds, tr, tmp, domain=icpu+1) else: oct_handler.fill_level( - ilevel, levels, cell_inds, file_inds, tr, tmp) + ilevel, level_inds, cell_inds, file_inds, tr, tmp) diff --git a/yt/geometry/oct_container.pxd b/yt/geometry/oct_container.pxd index 3891f8fdab0..40e189f81b1 100644 --- a/yt/geometry/oct_container.pxd +++ b/yt/geometry/oct_container.pxd @@ -85,9 +85,9 @@ cdef class OctreeContainer: cpdef void fill_level( self, const int level, - const np.uint8_t[:] levels, - const np.uint8_t[:] cell_inds, - const np.int64_t[:] file_inds, + const np.uint8_t[::1] level_inds, + const np.uint8_t[::1] cell_inds, + const np.int64_t[::1] file_inds, dict dest_fields, dict source_fields, np.int64_t offset = ? @@ -95,10 +95,10 @@ cdef class OctreeContainer: cpdef int fill_level_with_domain( self, const int level, - const np.uint8_t[:] levels, - const np.uint8_t[:] cell_inds, - const np.int64_t[:] file_inds, - const np.int32_t[:] domains, + const np.uint8_t[::1] level_inds, + const np.uint8_t[::1] cell_inds, + const np.int64_t[::1] file_inds, + const np.int32_t[::1] domain_inds, dict dest_fields, dict source_fields, const np.int32_t domain, diff --git a/yt/geometry/oct_container.pyx b/yt/geometry/oct_container.pyx index 4b09d360e51..806099b056a 100644 --- a/yt/geometry/oct_container.pyx +++ b/yt/geometry/oct_container.pyx @@ -745,15 +745,15 @@ cdef class OctreeContainer: cpdef void fill_level( self, const int level, - const np.uint8_t[:] levels, - const np.uint8_t[:] cell_inds, - const np.int64_t[:] file_inds, + const np.uint8_t[::1] levels, + const np.uint8_t[::1] cell_inds, + const np.int64_t[::1] file_inds, dict dest_fields, dict source_fields, np.int64_t offset = 0 ): - cdef np.ndarray[np.float64_t, ndim=2] source - cdef np.ndarray[np.float64_t, ndim=1] dest + cdef np.float64_t[:, :] source + cdef np.float64_t[::1] dest cdef int i, lvl for key in dest_fields: @@ -831,10 +831,10 @@ cdef class OctreeContainer: cpdef int fill_level_with_domain( self, const int level, - const np.uint8_t[:] levels, - const np.uint8_t[:] cell_inds, - const np.int64_t[:] file_inds, - const np.int32_t[:] domains, + const np.uint8_t[::1] level_inds, + const np.uint8_t[::1] cell_inds, + const np.int64_t[::1] file_inds, + const np.int32_t[::1] domain_inds, dict dest_fields, dict source_fields, const np.int32_t domain, @@ -846,8 +846,8 @@ cdef class OctreeContainer: These buffer oct cells have a different domain than the local one and are usually not read, but one has to read them e.g. to compute ghost zones. """ - cdef np.ndarray[np.float64_t, ndim=2] source - cdef np.ndarray[np.float64_t, ndim=1] dest + cdef np.float64_t[:, :] source + cdef np.float64_t[::1] dest cdef int i, count, lev cdef np.int32_t dom @@ -855,9 +855,9 @@ cdef class OctreeContainer: dest = dest_fields[key] source = source_fields[key] count = 0 - for i in range(levels.shape[0]): - lev = levels[i] - dom = domains[i] + for i in range(level_inds.shape[0]): + lev = level_inds[i] + dom = domain_inds[i] if lev != level or dom != domain: continue count += 1 if file_inds[i] < 0: From 2255adb3563111bebae5606e7af7445bad1ab2bc Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Tue, 7 Nov 2023 23:44:48 +0100 Subject: [PATCH 3/7] Early exit if cell count is 0 --- yt/frontends/ramses/data_structures.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/yt/frontends/ramses/data_structures.py b/yt/frontends/ramses/data_structures.py index e0a7dd75f90..4071772ce3b 100644 --- a/yt/frontends/ramses/data_structures.py +++ b/yt/frontends/ramses/data_structures.py @@ -383,6 +383,9 @@ def _fill_no_ghostzones(self, fd, fields, selector, file_handler): for field in fields: data[field] = np.zeros(cell_count, "float64") + if cell_count == 0: + return data + cpu_list = [self.domain_id - 1] fill_hydro( fd, @@ -433,6 +436,10 @@ def _fill_with_ghostzones( # Initializing data container for field in fields: tr[field] = np.zeros(cell_count, "float64") + + if cell_count == 0: + return tr + cpu_list = list(range(ncpu)) fill_hydro( fd, From 3a90e45c5aeea7be545df0c1c2cbd97563d77988 Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Wed, 8 Nov 2023 00:04:27 +0100 Subject: [PATCH 4/7] Only compute index in file etc. *if* we have cells matching --- yt/frontends/ramses/data_structures.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/yt/frontends/ramses/data_structures.py b/yt/frontends/ramses/data_structures.py index 4071772ce3b..2918bd14e42 100644 --- a/yt/frontends/ramses/data_structures.py +++ b/yt/frontends/ramses/data_structures.py @@ -375,10 +375,6 @@ def _fill_no_ghostzones(self, fd, fields, selector, file_handler): data = {} cell_count = selector.count_oct_cells(self.oct_handler, self.domain_id) - level_inds, cell_inds, file_inds = self.oct_handler.file_index_octs( - selector, self.domain_id, cell_count - ) - # Initializing data container for field in fields: data[field] = np.zeros(cell_count, "float64") @@ -386,6 +382,10 @@ def _fill_no_ghostzones(self, fd, fields, selector, file_handler): if cell_count == 0: return data + level_inds, cell_inds, file_inds = self.oct_handler.file_index_octs( + selector, self.domain_id, cell_count + ) + cpu_list = [self.domain_id - 1] fill_hydro( fd, @@ -419,6 +419,13 @@ def _fill_with_ghostzones( selector.count_octs(self.oct_handler, self.domain_id) * self.nz**ndim ) + # Initializing data container + for field in fields: + tr[field] = np.zeros(cell_count, "float64") + + if cell_count == 0: + return tr + gz_cache = getattr(self, "_ghost_zone_cache", None) if gz_cache: level_inds, cell_inds, file_inds, domain_inds = gz_cache @@ -433,13 +440,6 @@ def _fill_with_ghostzones( ) self._ghost_zone_cache = gz_cache - # Initializing data container - for field in fields: - tr[field] = np.zeros(cell_count, "float64") - - if cell_count == 0: - return tr - cpu_list = list(range(ncpu)) fill_hydro( fd, From 921d84f29a237a897b539d86b2b56442443265c4 Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Wed, 8 Nov 2023 00:04:57 +0100 Subject: [PATCH 5/7] Fix buggy call to 'fill_hydro' Was missing a refactoring step --- yt/frontends/ramses/data_structures.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/yt/frontends/ramses/data_structures.py b/yt/frontends/ramses/data_structures.py index 2918bd14e42..84595177c57 100644 --- a/yt/frontends/ramses/data_structures.py +++ b/yt/frontends/ramses/data_structures.py @@ -454,7 +454,7 @@ def _fill_with_ghostzones( fields, tr, oct_handler, - domains=domain_inds, + domain_inds=domain_inds, ) return tr From 5e898df6087eb8427170312b2684f925fffdbce4 Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Wed, 8 Nov 2023 22:53:55 +0100 Subject: [PATCH 6/7] Add useful comments --- yt/frontends/ramses/data_structures.py | 2 ++ yt/frontends/ramses/io_utils.pyx | 16 +++++++++------- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/yt/frontends/ramses/data_structures.py b/yt/frontends/ramses/data_structures.py index 84595177c57..a25a08644c7 100644 --- a/yt/frontends/ramses/data_structures.py +++ b/yt/frontends/ramses/data_structures.py @@ -379,6 +379,7 @@ def _fill_no_ghostzones(self, fd, fields, selector, file_handler): for field in fields: data[field] = np.zeros(cell_count, "float64") + # Do an early exit if the cell count is null if cell_count == 0: return data @@ -423,6 +424,7 @@ def _fill_with_ghostzones( for field in fields: tr[field] = np.zeros(cell_count, "float64") + # Do an early exit if the cell count is null if cell_count == 0: return tr diff --git a/yt/frontends/ramses/io_utils.pyx b/yt/frontends/ramses/io_utils.pyx index 32f84628caf..fc99a8c23f7 100644 --- a/yt/frontends/ramses/io_utils.pyx +++ b/yt/frontends/ramses/io_utils.pyx @@ -40,6 +40,8 @@ def read_amr(FortranFile f, dict headers, cdef np.ndarray[np.float64_t, ndim=2] pos cdef int i + # The ordering is very important here, as we'll write directly into the memory + # address the content of the files. cdef np.float64_t[::1, :] pos_view ndim = headers['ndim'] @@ -75,8 +77,7 @@ def read_amr(FortranFile f, dict headers, if ng == 0: continue - # Skip grid index, 'next' and 'prev' arrays and possibly - # records from previous iterations + # Skip grid index, 'next' and 'prev' arrays record_len = (ng * INT32_SIZE + INT64_SIZE) f.seek(record_len * 3, 1) @@ -187,9 +188,12 @@ def fill_hydro(FortranFile f, cdef np.int64_t[::1] jumps = np.zeros(nfields_selected + 1, dtype=np.int64) cdef int jump_len, Ncells - cdef np.ndarray[np.float64_t, ndim=3] buffer cdef np.uint8_t[::1] mask_level = np.zeros(nlevels, dtype=np.uint8) + # The ordering is very important here, as we'll write directly into the memory + # address the content of the files. + cdef np.float64_t[::1, :, :] buffer + jump_len = 0 j = 0 for i, field in enumerate(all_fields): @@ -204,18 +208,16 @@ def fill_hydro(FortranFile f, buffer = np.empty((level_count.max(), twotondim, nfields_selected), dtype="float64", order='F') + # Precompute which levels we need to read Ncells = len(level_inds) for i in range(Ncells): mask_level[level_inds[i]] |= 1 - # print("Reading hydro fields, unique levels=%s", np.asarray(levels_to_read)) - # selected_levels = range(levels.min(), levels.max()+1) - # selected_cpus = range(cpu_list.min(), cpu_list.max()+1) # Loop over levels for ilevel in range(nlevels): if mask_level[ilevel] == 0: continue - # Loop over cpu domains + # Loop over cpu domains. In most cases, we'll only read the CPU corresponding to the domain. for ii in range(ncpu_selected): icpu = cpu_list[ii] nc = level_count[icpu, ilevel] From ed47ea5b2e753e51388deb8683b14587841d3d3a Mon Sep 17 00:00:00 2001 From: Corentin Cadiou Date: Fri, 10 Nov 2023 12:15:06 +0100 Subject: [PATCH 7/7] Use SEEK_SET/SEEK_CUR to seek --- yt/frontends/ramses/io_utils.pyx | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/yt/frontends/ramses/io_utils.pyx b/yt/frontends/ramses/io_utils.pyx index fc99a8c23f7..69368b086ce 100644 --- a/yt/frontends/ramses/io_utils.pyx +++ b/yt/frontends/ramses/io_utils.pyx @@ -1,6 +1,7 @@ # distutils: libraries = STD_LIBS # distutils: include_dirs = LIB_DIR cimport cython +from libc.stdio cimport SEEK_CUR, SEEK_SET cimport numpy as np import numpy as np @@ -10,6 +11,7 @@ from yt.utilities.cython_fortran_utils cimport FortranFile from yt.utilities.exceptions import YTIllDefinedAMRData + ctypedef np.int32_t INT32_t ctypedef np.int64_t INT64_t ctypedef np.float64_t DOUBLE_t @@ -79,7 +81,7 @@ def read_amr(FortranFile f, dict headers, continue # Skip grid index, 'next' and 'prev' arrays record_len = (ng * INT32_SIZE + INT64_SIZE) - f.seek(record_len * 3, 1) + f.seek(record_len * 3, SEEK_CUR) f.read_vector_inplace("d", &pos_view[0, 0]) f.read_vector_inplace("d", &pos_view[0, 1]) @@ -93,7 +95,7 @@ def read_amr(FortranFile f, dict headers, pos_view[i, 2] -= nz # Skip father, neighbor, son, cpu map and refinement map - f.seek(record_len * jump_len, 1) + f.seek(record_len * jump_len, SEEK_CUR) # Note that we're adding *grids*, not individual cells. if ilevel >= min_level: @@ -151,7 +153,7 @@ cpdef read_offset(FortranFile f, INT64_t min_level, INT64_t domain_id, INT64_t n if ilevel >= min_level: offset_view[icpu, ilevel - min_level] = f.tell() level_count_view[icpu, ilevel - min_level] = file_ncache - f.seek(skip_len(Nskip, file_ncache), 1) + f.seek(skip_len(Nskip, file_ncache), SEEK_CUR) return offset, level_count @@ -226,7 +228,7 @@ def fill_hydro(FortranFile f, offset = offsets[icpu, ilevel] if offset == -1: continue - f.seek(offset + skip_len(first_field_index, nc)) + f.seek(offset + skip_len(first_field_index, nc), SEEK_SET) # We have already skipped the first fields (if any) # so we "rewind" (this will cancel the first seek) @@ -236,7 +238,7 @@ def fill_hydro(FortranFile f, for j in range(nfields_selected): jump_len += jumps[j] if jump_len > 0: - f.seek(skip_len(jump_len, nc), 1) + f.seek(skip_len(jump_len, nc), SEEK_CUR) jump_len = 0 f.read_vector_inplace('d', &buffer[0, i, j]) @@ -246,7 +248,7 @@ def fill_hydro(FortranFile f, # but since we're doing an absolute seek at the beginning of # the loop on CPUs, we can spare one seek here ## if jump_len > 0: - ## f.seek(skip_len(jump_len, nc), 1) + ## f.seek(skip_len(jump_len, nc), SEEK_CUR) # Alias buffer into dictionary tmp = {}