diff --git a/publication/joss/paper.bib b/publication/joss/paper.bib index df4979e4..45f4286d 100644 --- a/publication/joss/paper.bib +++ b/publication/joss/paper.bib @@ -1,19 +1,19 @@ @article{Sugiyama:2019, author = {Sugiyama, Naonori S. and Saito, Shun and Beutler, Florian and Seo, Hee-Jong}, - title = {A complete {FFT}-based decomposition formalism for the redshift-space bispectrum}, - journal = {Mon.~Not.~R.~Astron.~Soc.}, - year = {2019}, - volume = {484}, - number = {1}, - pages = {364--384}, - doi = {10.1093/mnras/sty3249}, + title = {A complete {FFT}-based decomposition formalism for the redshift-space bispectrum}, + journal = {Mon.~Not.~R.~Astron.~Soc.}, + year = {2019}, + volume = {484}, + number = {1}, + pages = {364--384}, + doi = {10.1093/mnras/sty3249}, } @article{Sugiyama:2018, author = {Sugiyama, Naonori S. and Shiraishi, Maresuke and Okumura, Teppei}, title = {{Limits on statistical anisotropy from {BOSS DR12} galaxies using bipolar spherical harmonics}}, - journal = {Mon.~Not.~R.~Astron.~Soc.}, - year = {2018}, + journal = {Mon.~Not.~R.~Astron.~Soc.}, + year = {2018}, volume = {473}, number = {2}, pages = {2737--2752}, @@ -65,13 +65,13 @@ @article{Amendola:2018 @article{Bernardeau:2002, author = {Bernardeau, F. and Colombi, S. and Gazta{\~n}aga, E. and Scoccimarro, R.}, - title = {Large-scale structure of the Universe and cosmological perturbation theory}, - journal = {Phys.~Rep.}, - year = {2002}, - volume = {367}, - issues = {1--3}, - pages = {1--248}, - doi = {10.1016/s0370-1573(02)00135-7}, + title = {Large-scale structure of the Universe and cosmological perturbation theory}, + journal = {Phys.~Rep.}, + year = {2002}, + volume = {367}, + issues = {1--3}, + pages = {1--248}, + doi = {10.1016/s0370-1573(02)00135-7}, } @article{Sefusatti:2006, @@ -92,11 +92,11 @@ @article{BOSS:2017 {SDSS-III Baryon Oscillation Spectroscopic Survey}: cosmological analysis of the {DR}12 galaxy sample }, - journal = {Mon.~Not.~R.~Astron.~Soc.}, + journal = {Mon.~Not.~R.~Astron.~Soc.}, year = {2017}, - volume = {470}, - number = {3}, - pages = {2617--2652}, + volume = {470}, + number = {3}, + pages = {2617--2652}, doi = {10.1093/mnras/stx721}, } @@ -107,7 +107,7 @@ @article{eBOSS:2021 {Cosmological} implications from two decades of spectroscopic surveys at the {Apache Point Observatory} }, - journal = {Phys.~Rev.~D}, + journal = {Phys.~Rev.~D}, year = {2021}, volume = {103}, issue = {8}, @@ -117,13 +117,43 @@ @article{eBOSS:2021 @article{Scoccimarro:1999, author = {Scoccimarro, Rom\'an and Couchman, H. M. P. and Frieman, Joshua A.}, - title = {{The Bispectrum as a Signature of Gravitational Instability in Redshift Space}}, - journal = {Astrophys.~J.}, - year = {1999}, - volume = {517}, - number = {2}, - pages = {531--540}, - doi = {10.1086/307220}, + title = {{The Bispectrum as a Signature of Gravitational Instability in Redshift Space}}, + journal = {Astrophys.~J.}, + year = {1999}, + volume = {517}, + number = {2}, + pages = {531--540}, + doi = {10.1086/307220}, +} + +@article{Yamamoto:2006, + author = {Yamamoto, Kazuhiro and Nakamichi, Masashi and Kamino, Akinari and Bassett, Bruce A. and Nishioka, Hiroaki}, + title = {{A Measurement of the Quadrupole Power Spectrum in the Clustering of the 2dF QSO Survey}}, + journal = {Publ. Astron. Soc. Jpn.}, + year = {2006}, + volume = {58}, + number = {1}, + pages = {93-102}, + doi = {10.1093/pasj/58.1.93}, +} + +@article{Wilson:2016, + author = {Wilson, M. J. and Peacock, J. A. and Taylor, A. N. and {de la Torre}, S.}, + title = {Rapid modelling of the redshift-space power spectrum multipoles for a masked density field}, + journal = {Mon.~Not.~R.~Astron.~Soc.}, + year = {2016}, + volume = {464}, + number = {3}, + pages = {3121--3130}, + doi = {10.1093/mnras/stw2576}, +} + +@misc{VillaescusaNavarro:2018, + author = {{Villaescusa-Navarro}, Francisco}, + title = {Pylians: {Python} libraries for the analysis of numerical simulations}, + howpublished = {{Astrophysics Source Code Library} [ascl:1811.008]}, + year = {2018}, + eid = {ascl:1811.008}, } @article{Scoccimarro:2015, @@ -137,37 +167,48 @@ @article{Scoccimarro:2015 doi = {10.1103/PhysRevD.92.083532}, } +@article{Slepian:2015, + author = {Slepian, Zachary and Eisenstein, Daniel J.}, + title = {Computing the three-point correlation function of galaxies in $\mathcal{O}(N^2)$ time}, + journal = {Mon.~Not.~R.~Astron.~Soc.}, + year = {2015}, + volume = {454}, + number = {4}, + pages = {4142--4158}, + doi = {10.1093/mnras/stv2119}, +} + @article{Slepian:2018, author = {Slepian, Zachary and Eisenstein, Daniel J.}, - title = {A practical computational method for the anisotropic redshift-space three-point correlation function}, - journal = {Mon.~Not.~R.~Astron.~Soc.}, - year = {2018}, - volume = {478}, - number = {2}, - pages = {1468--1483}, - doi = {10.1093/mnras/sty1063}, + title = {A practical computational method for the anisotropic redshift-space three-point correlation function}, + journal = {Mon.~Not.~R.~Astron.~Soc.}, + year = {2018}, + volume = {478}, + number = {2}, + pages = {1468--1483}, + doi = {10.1093/mnras/sty1063}, } -@article{Yamamoto:2006, - author = {Yamamoto, Kazuhiro and Nakamichi, Masashi and Kamino, Akinari and Bassett, Bruce A. and Nishioka, Hiroaki}, - title = {{A Measurement of the Quadrupole Power Spectrum in the Clustering of the 2dF QSO Survey}}, - journal = {Publ. Astron. Soc. Jpn.}, - year = {2006}, - volume = {58}, +@article{Slepian:2016, + author = {Slepian, Zachary and Eisenstein, Daniel J.}, + title = {Accelerating the two-point and three-point galaxy correlation functions using Fourier transforms}, + journal = {Mon.~Not.~R.~Astron.~Soc.}, + year = {2016}, + volume = {455}, number = {1}, - pages = {93-102}, - doi = {10.1093/pasj/58.1.93}, + pages = {L31--L35}, + doi = {10.1093/mnrasl/slv133}, } -@article{Wilson:2016, - author = {Wilson, M. J. and Peacock, J. A. and Taylor, A. N. and {de la Torre}, S.}, - title = {Rapid modelling of the redshift-space power spectrum multipoles for a masked density field}, - journal = {Mon.~Not.~R.~Astron.~Soc.}, - year = {2016}, - volume = {464}, - number = {3}, - pages = {3121--3130}, - doi = {10.1093/mnras/stw2576}, +@article{Philcox:2021, + author = {Philcox, Oliver H. E.}, + title = {Cosmology without window functions. {II}. Cubic estimators for the galaxy bispectrum}, + journal = {Phys.~Rev.~D}, + year = {2021}, + volume = {104}, + number = {12}, + pages = {123529}, + doi = {10.1103/physrevd.104.123529}, } @book{Hockney:1988, diff --git a/publication/joss/paper.md b/publication/joss/paper.md index 12d6e162..67945ffc 100644 --- a/publication/joss/paper.md +++ b/publication/joss/paper.md @@ -51,7 +51,7 @@ complementarity, methods for measuring two-point clustering statistics are also included in the package. -[^1]: [github.com/naonori/hitomi/](https://github.com/naonori/hitomi/) +[^1]: [github.com/naonori/hitomi](https://github.com/naonori/hitomi) # Statement of need @@ -98,20 +98,61 @@ can compute: models derived in Fourier space through the Hankel transform [@Wilson:2016;@Sugiyama:2019]. +For the global plane-parallel estimators, the simulation box is placed at +the spatial infinity (or equivalently the observer is), so that the +line of sight to each particle can be treated as the same and taken to be +along the $z$-axis. For the local plane-parallel estimators, the observer +is placed at the origin in the survey coordinates, and the line of sight +is chosen to point towards one of the particles in a triplet or pair for +three- or two-point clustering measurements respectively. + +The geometry of the survey leaves an imprint on the clustering statistics, +where in Fourier space the effect is a convolution with the survey window +function. This convolution mixes different multipoles of the underlying +clustering statistics and the survey window, and the precise convolution +formula (i.e. the number of multipoles to include in modelling) needed to +achieve a given level of convergence depends on the precise survey geometry +including any sample weights applied. Therefore the functionality to +measure the window function is an integral part of this program. + These functionalities are essential to cosmological inference pipelines, and can help validate any analytical covariance matrix predictions against sample estimates. Since precise covariance matrix estimates usually require clustering measurements repeated over a large number of simulated mock catalogues, computational efficiency is an important objective. -Finally, `Triumvirate` also enables comparison studies between -alternative compressed statistics of three-point clustering -[e.g. @Scoccimarro:2015; @Slepian:2018], which may have different -constraining power on different cosmological parameters. + +Finally, `Triumvirate` also enables comparison studies between +alternative compressed statistics of three-point clustering, which may have +different constraining power on different cosmological parameters. There +are existing software packages for some of these alternative approaches: + + * `pylians`[^4] [@VillaescusaNavarro:2018] computes the bispectrum with + the Scoccimarro estimator [@Scoccimarro:2015] for triangle + configurations parametrised by two wavenumbers and the angle between + the corresponding wavevectors; + + * `nbodykit`[^5] [@Hand:2018] computes the isotropised 3PCF with a + pair-counting algorithm [@Slepian:2015], although in principle + this can be generalised to anisotropic 3PCF [@Slepian:2018], or be + implemented using FFTs [@Slepian:2016]; + + * @Philcox:2021[^6] advocates a windowless cubic estimator for the + bispectrum in the Soccimarro decomposition, which can be evaluated + using FFTs. However, this approach requires the inversion of a Fisher + matrix obtained from a suite of Monte Carlo realisations. + +As these programs use a different decomposition of three-point clustering +statistics and focus on either configuration- or Fourier-space statistics +only, `Triumvirate` fulfills complementary needs in current galaxy +clustering analyses. [^2]: [desi.lbl.gov](https://www.desi.lbl.gov) [^3]: [sci.esa.int/euclid](https://sci.esa.int/web/euclid/), [euclid-ec.org](https://www.euclid-ec.org) +[^4]: [pylians3.readthedocs.io](https://pylians3.readthedocs.io) +[^5]: [nbodykit.readthedocs.io](https://nbodykit.readthedocs.io) +[^6]: [github.com/oliverphilcox/Spectra-Without-Windows](https://github.com/oliverphilcox/Spectra-Without-Windows) # Implementation @@ -195,52 +236,53 @@ $\mathcal{O}\left({N_\mathrm{bin}^2 N_\mathrm{mesh} \ln N_\mathrm{mesh}}\right)$ where $N_\mathrm{bin}$ is the number of coordinate bins. It is worth noting that in `Triumvirate`, the spherical harmonic weights -are applied to individual particles rather than the mesh grids, in -contrast to other packages such as `nbodykit`. This should result in -more accurate results at the expense of memory usage, as multiple meshes -need to be stored for spherical harmonics of different degrees and orders. -We estimate the minimum memory usage for bispectrum measurements to be -$11 M$ and $9 M$ respectively for local and global plane-parallel -estimators, where $M = 16 N_\mathrm{mesh}$ bytes (roughly -$1.5\times10^{-8} N_\mathrm{mesh}$ gibibytes[^4]); for local and global +are applied to individual particles rather than the mesh grids. This should +result in more accurate results at the expense of memory usage, as multiple +meshes need to be stored for spherical harmonics of different degrees +and orders. We estimate the minimum memory usage for bispectrum +measurements to be $11 M$ and $9 M$ respectively for local and global +plane-parallel estimators, where $M = 16 N_\mathrm{mesh}$ bytes (roughly +$1.5\times10^{-8} N_\mathrm{mesh}$ gibibytes[^7]); for local and global plane-parallel 3PCF estimators, the figures are $10 M$ and $9 M$ respectively. In the table below, we show the wall time and peak memory usage for -bispectrum and three-point correlation function measurements of a few -select multipoles and grid numbers with $N_\mathrm{bin} = 20$, using a -single core on one AMD EPYC 7H12 processor with base frequency 2.60 GHz. -With multithreading enabled, the run time is reduced (see the last column -in the table). Here 'lpp' and 'gpp' denote local and global plane-parallel -approximations respectively. For the global plane-parallel estimates, -the catalogue used is a cubic box containing -$N_\mathrm{part} = 8 \times 10^6$ particles; for the local plane-parallel -estimates, the data and random catalogues contain -$N_\mathrm{part} = 6.6 \times 10^5$ and $1.3 \times 10^7$ particles -respectively. +bispectrum and 3PCF measurements of a few select multipoles and +grid numbers with $N_\mathrm{bin} = 20$, using a single core on one +AMD EPYC 7H12 processor with base frequency 2.60 GHz. With multithreading +enabled, the run time is reduced (see the last column in the table). Here +'lpp' and 'gpp' denote local and global plane-parallel approximations +respectively. For the global plane-parallel estimates, the catalogue used +is a cubic box containing $N_\mathrm{part} = 8 \times 10^6$ particles; +for the local plane-parallel estimates, the data and random catalogues +contain $N_\mathrm{part} = 6.6 \times 10^5$ and $1.3 \times 10^7$ particles +respectively. Since both the bispectrum and 3PCF are computed with FFTs, +the computation time and memory usage for them are roughly the same, with +minor differences due to the slightly different number of mesh grids needed +for evaluation. --------------------------------------------------------------------------------------------------------------------------- -Multipole/$N_\mathrm{mesh}$ $128^3$ $256^3$ $512^3$ $512^3$ (32 threads) ------------------------------- ---------------------- ---------------------- ---------------------- ---------------------- -$B_{000}^{\mathrm{(lpp)}}$ 96 s, 1.8 GiB 215 s, 4.4 GiB 1247 s, 25 GiB 85 s, 25 GiB +------------------------------------------------------------------------------------------------------------------------------------------ +Multipole/$N_\mathrm{mesh}$ $128^3$ $256^3$ $512^3$ $512^3$ (32 threads) +------------------------------ ------------------------- ------------------------- ------------------------- ----------------------------- +$B_{000}^{\mathrm{(lpp)}}$ 96 s, 1.8 GiB 215 s, 4.4 GiB 1247 s, 25 GiB 85 s, 25 GiB -$B_{000}^{\mathrm{(gpp)}}$ 42 s, 0.7 GiB 172 s, 2.9 GiB 1185 s, 21 GiB 59 s, 21 GiB +$B_{000}^{\mathrm{(gpp)}}$ 42 s, 0.7 GiB 172 s, 2.9 GiB 1185 s, 21 GiB 59 s, 21 GiB -$B_{202}^{\mathrm{(lpp)}}$ 320 s, 1.8 GiB 1030 s, 4.4 GiB 6449 s, 25 GiB 267 s, 25 GiB +$B_{202}^{\mathrm{(lpp)}}$ 320 s, 1.8 GiB 1030 s, 4.4 GiB 6449 s, 25 GiB 267 s, 25 GiB -$B_{202}^{\mathrm{(gpp)}}$ 42 s, 0.7 GiB 176 s, 2.9 GiB 1187 s, 21 GiB 60 s, 21 GiB +$B_{202}^{\mathrm{(gpp)}}$ 42 s, 0.7 GiB 176 s, 2.9 GiB 1187 s, 21 GiB 60 s, 21 GiB -$\zeta_{000}^{\mathrm{(lpp)}}$ 90 s, 1.8 GiB 211 s, 4.4 GiB 1403 s, 21 GiB 83 s, 21 GiB +$\zeta_{000}^{\mathrm{(lpp)}}$ 90 s, 1.8 GiB 211 s, 4.4 GiB 1403 s, 21 GiB 83 s, 21 GiB -$\zeta_{000}^{\mathrm{(gpp)}}$ 43 s, 0.7 GiB 178 s, 2.9 GiB 1226 s, 19 GiB 55 s, 19 GiB +$\zeta_{000}^{\mathrm{(gpp)}}$ 43 s, 0.7 GiB 178 s, 2.9 GiB 1226 s, 19 GiB 55 s, 19 GiB -$\zeta_{202}^{\mathrm{(lpp)}}$ 267 s, 1.8 GiB 964 s, 4.4 GiB 6377 s, 21 GiB 266 s, 21 GiB +$\zeta_{202}^{\mathrm{(lpp)}}$ 267 s, 1.8 GiB 964 s, 4.4 GiB 6377 s, 21 GiB 266 s, 21 GiB -$\zeta_{202}^{\mathrm{(gpp)}}$ 43 s, 0.7 GiB 177 s, 2.9 GiB 1241 s, 19 GiB 57 s, 19 GiB --------------------------------------------------------------------------------------------------------------------------- +$\zeta_{202}^{\mathrm{(gpp)}}$ 43 s, 0.7 GiB 177 s, 2.9 GiB 1241 s, 19 GiB 57 s, 19 GiB +------------------------------------------------------------------------------------------------------------------------------------------ -[^4]: Note that 1 gibibytes (GiB) is $2^{30}$ bytes, as opposed to +[^7]: Note that 1 gibibytes (GiB) is $2^{30}$ bytes, as opposed to 1 gigabytes (GB) which is $10^9$ bytes. GiB is the preferred unit by job schedulers such as Slurm for computer clusters. @@ -251,6 +293,10 @@ $\zeta_{202}^{\mathrm{(gpp)}}$ 43 s, 0.7 GiB 177 s, 2.9 GiB user feedback. One extension of interest is the inclusion of other three-point clustering estimators with different coordinate systems and compression choices, and the functionality to transform between them. +The ability to measure clustering statistics from a density field already +sampled on a mesh grid may also be useful. In addition, porting the code +to graphic processing units (GPUs) can bring further parallelisation that +can enhance the performance of the code. # Acknowledgements