Skip to content

Commit

Permalink
Update JOSS manuscript
Browse files Browse the repository at this point in the history
  • Loading branch information
MikeSWang committed Oct 5, 2023
1 parent 2358a78 commit fc0deb4
Show file tree
Hide file tree
Showing 2 changed files with 177 additions and 89 deletions.
146 changes: 94 additions & 52 deletions publication/joss/paper.bib
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
@article{Sugiyama:2019,
author = {Sugiyama, Naonori S. and Saito, Shun and Beutler, Florian and Seo, Hee-Jong},
title = {A complete {FFT}-based decomposition formalism for the redshift-space bispectrum},
journal = {Mon.~Not.~R.~Astron.~Soc.},
year = {2019},
volume = {484},
number = {1},
pages = {364--384},
doi = {10.1093/mnras/sty3249},
title = {A complete {FFT}-based decomposition formalism for the redshift-space bispectrum},
journal = {Mon.~Not.~R.~Astron.~Soc.},
year = {2019},
volume = {484},
number = {1},
pages = {364--384},
doi = {10.1093/mnras/sty3249},
}

@article{Sugiyama:2018,
author = {Sugiyama, Naonori S. and Shiraishi, Maresuke and Okumura, Teppei},
title = {{Limits on statistical anisotropy from {BOSS DR12} galaxies using bipolar spherical harmonics}},
journal = {Mon.~Not.~R.~Astron.~Soc.},
year = {2018},
journal = {Mon.~Not.~R.~Astron.~Soc.},
year = {2018},
volume = {473},
number = {2},
pages = {2737--2752},
Expand Down Expand Up @@ -65,13 +65,13 @@ @article{Amendola:2018

@article{Bernardeau:2002,
author = {Bernardeau, F. and Colombi, S. and Gazta{\~n}aga, E. and Scoccimarro, R.},
title = {Large-scale structure of the Universe and cosmological perturbation theory},
journal = {Phys.~Rep.},
year = {2002},
volume = {367},
issues = {1--3},
pages = {1--248},
doi = {10.1016/s0370-1573(02)00135-7},
title = {Large-scale structure of the Universe and cosmological perturbation theory},
journal = {Phys.~Rep.},
year = {2002},
volume = {367},
issues = {1--3},
pages = {1--248},
doi = {10.1016/s0370-1573(02)00135-7},
}

@article{Sefusatti:2006,
Expand All @@ -92,11 +92,11 @@ @article{BOSS:2017
{SDSS-III Baryon Oscillation Spectroscopic Survey}:
cosmological analysis of the {DR}12 galaxy sample
},
journal = {Mon.~Not.~R.~Astron.~Soc.},
journal = {Mon.~Not.~R.~Astron.~Soc.},
year = {2017},
volume = {470},
number = {3},
pages = {2617--2652},
volume = {470},
number = {3},
pages = {2617--2652},
doi = {10.1093/mnras/stx721},
}

Expand All @@ -107,7 +107,7 @@ @article{eBOSS:2021
{Cosmological} implications from two decades of spectroscopic surveys
at the {Apache Point Observatory}
},
journal = {Phys.~Rev.~D},
journal = {Phys.~Rev.~D},
year = {2021},
volume = {103},
issue = {8},
Expand All @@ -117,13 +117,44 @@ @article{eBOSS:2021

@article{Scoccimarro:1999,
author = {Scoccimarro, Rom\'an and Couchman, H. M. P. and Frieman, Joshua A.},
title = {{The Bispectrum as a Signature of Gravitational Instability in Redshift Space}},
journal = {Astrophys.~J.},
year = {1999},
volume = {517},
number = {2},
pages = {531--540},
doi = {10.1086/307220},
title = {{The Bispectrum as a Signature of Gravitational Instability in Redshift Space}},
journal = {Astrophys.~J.},
year = {1999},
volume = {517},
number = {2},
pages = {531--540},
doi = {10.1086/307220},
}

@article{Yamamoto:2006,
author = {Yamamoto, Kazuhiro and Nakamichi, Masashi and Kamino, Akinari and Bassett, Bruce A. and Nishioka, Hiroaki},
title = {{A Measurement of the Quadrupole Power Spectrum in the Clustering of the 2dF QSO Survey}},
journal = {Publ. Astron. Soc. Jpn.},
year = {2006},
volume = {58},
number = {1},
pages = {93-102},
doi = {10.1093/pasj/58.1.93},
}

@article{Wilson:2016,
author = {Wilson, M. J. and Peacock, J. A. and Taylor, A. N. and {de la Torre}, S.},
title = {Rapid modelling of the redshift-space power spectrum multipoles for a masked density field},
journal = {Mon.~Not.~R.~Astron.~Soc.},
year = {2016},
volume = {464},
number = {3},
pages = {3121--3130},
doi = {10.1093/mnras/stw2576},
}

@misc{VillaescusaNavarro:2018,
author = {{Villaescusa-Navarro}, Francisco},
title = {Pylians: {Python} libraries for the analysis of numerical simulations},
howpublished = {{Astrophysics Source Code Library}},
year = {2018},
eprint = {1811.008},
eid = {ascl:1811.008},
}

@article{Scoccimarro:2015,
Expand All @@ -137,37 +168,48 @@ @article{Scoccimarro:2015
doi = {10.1103/PhysRevD.92.083532},
}

@article{Slepian:2015,
author = {Slepian, Zachary and Eisenstein, Daniel J.},
title = {Computing the three-point correlation function of galaxies in $\mathcal{O}(N^2)$ time},
journal = {Mon.~Not.~R.~Astron.~Soc.},
year = {2015},
volume = {454},
number = {4},
pages = {4142--4158},
doi = {10.1093/mnras/stv2119},
}

@article{Slepian:2018,
author = {Slepian, Zachary and Eisenstein, Daniel J.},
title = {A practical computational method for the anisotropic redshift-space three-point correlation function},
journal = {Mon.~Not.~R.~Astron.~Soc.},
year = {2018},
volume = {478},
number = {2},
pages = {1468--1483},
doi = {10.1093/mnras/sty1063},
title = {A practical computational method for the anisotropic redshift-space three-point correlation function},
journal = {Mon.~Not.~R.~Astron.~Soc.},
year = {2018},
volume = {478},
number = {2},
pages = {1468--1483},
doi = {10.1093/mnras/sty1063},
}

@article{Yamamoto:2006,
author = {Yamamoto, Kazuhiro and Nakamichi, Masashi and Kamino, Akinari and Bassett, Bruce A. and Nishioka, Hiroaki},
title = {{A Measurement of the Quadrupole Power Spectrum in the Clustering of the 2dF QSO Survey}},
journal = {Publ. Astron. Soc. Jpn.},
year = {2006},
volume = {58},
@article{Slepian:2016,
author = {Slepian, Zachary and Eisenstein, Daniel J.},
title = {Accelerating the two-point and three-point galaxy correlation functions using Fourier transforms},
journal = {Mon.~Not.~R.~Astron.~Soc.},
year = {2016},
volume = {455},
number = {1},
pages = {93-102},
doi = {10.1093/pasj/58.1.93},
pages = {L31--L35},
doi = {10.1093/mnrasl/slv133},
}

@article{Wilson:2016,
author = {Wilson, M. J. and Peacock, J. A. and Taylor, A. N. and {de la Torre}, S.},
title = {Rapid modelling of the redshift-space power spectrum multipoles for a masked density field},
journal = {Mon.~Not.~R.~Astron.~Soc.},
year = {2016},
volume = {464},
number = {3},
pages = {3121--3130},
doi = {10.1093/mnras/stw2576},
@article{Philcox:2021,
author = {Philcox, Oliver H. E.},
title = {Cosmology without window functions. {II}. Cubic estimators for the galaxy bispectrum},
journal = {Phys.~Rev.~D},
year = {2021},
volume = {104},
number = {12},
pages = {123529},
doi = {10.1103/physrevd.104.123529},
}

@book{Hockney:1988,
Expand Down
120 changes: 83 additions & 37 deletions publication/joss/paper.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ complementarity, methods for measuring two-point clustering statistics are
also included in the package.


[^1]: [github.com/naonori/hitomi/](https://github.com/naonori/hitomi/)
[^1]: [github.com/naonori/hitomi](https://github.com/naonori/hitomi)


# Statement of need
Expand Down Expand Up @@ -98,20 +98,61 @@ can compute:
models derived in Fourier space through the Hankel transform
[@Wilson:2016;@Sugiyama:2019].

For the global plane-parallel estimators, the simulation box is placed at
the spatial infinity (or equivalently the observer is), so that the
line of sight to each particle can be treated as the same and taken to be
along the $z$-axis. For the local plane-parallel estimators, the observer
is placed at the origin in the survey coordinates, and the line of sight
is chosen to point towards one of the particles in a triplet or pair for
three- or two-point clustering measurements respectively.

The geometry of the survey leaves an imprint on the clustering statistics,
where in Fourier space the effect is a convolution with the survey window
function. This convolution mixes different multipoles of the underlying
clustering statistics and the survey window, and the precise convolution
formula (i.e. the number of multipoles to include in modelling) needed to
achieve a given level of convergence depends on the precise survey geometry
including any sample weights applied. Therefore the functionality to
measure the window function is an integral part of this program.

These functionalities are essential to cosmological inference pipelines,
and can help validate any analytical covariance matrix predictions against
sample estimates. Since precise covariance matrix estimates usually
require clustering measurements repeated over a large number of simulated
mock catalogues, computational efficiency is an important objective.
Finally, `Triumvirate` also enables comparison studies between
alternative compressed statistics of three-point clustering
[e.g. @Scoccimarro:2015; @Slepian:2018], which may have different
constraining power on different cosmological parameters.

Finally, `Triumvirate` also enables comparison studies between
alternative compressed statistics of three-point clustering, which may have
different constraining power on different cosmological parameters. There
are existing software packages for some of these alternative approaches:

* `pylians`[^4] [@VillaescusaNavarro:2018] computes the bispectrum with
the Scoccimarro estimator [@Scoccimarro:2015] for triangle
configurations parametrised by two wavenumbers and the angle between
the corresponding wavevectors;

* `nbodykit`[^5] [@Hand:2018] computes the isotropised 3PCF with a
pair-counting algorithm [@Slepian:2015], although in principle
this can be generalised to anisotropic 3PCF [@Slepian:2018], or be
implemented using FFTs [@Slepian:2016];

* @Philcox:2021[^6] advocates a windowless cubic estimator for the
bispectrum in the Soccimarro decomposition, which can be evaluated
using FFTs. However, this approach requires the inversion of a Fisher
matrix obtained from a suite of Monte Carlo realisations.

As these programs use a different decomposition of three-point clustering
statistics and focus on either configuration- or Fourier-space statistics
only, `Triumvirate` fulfills complementary needs in current galaxy
clustering analyses.


[^2]: [desi.lbl.gov](https://www.desi.lbl.gov)
[^3]: [sci.esa.int/euclid](https://sci.esa.int/web/euclid/),
[euclid-ec.org](https://www.euclid-ec.org)
[^4]: [pylians3.readthedocs.io](https://pylians3.readthedocs.io)
[^5]: [nbodykit.readthedocs.io](https://nbodykit.readthedocs.io)
[^6]: [github.com/oliverphilcox/Spectra-Without-Windows](https://github.com/oliverphilcox/Spectra-Without-Windows)


# Implementation
Expand Down Expand Up @@ -195,52 +236,53 @@ $\mathcal{O}\left({N_\mathrm{bin}^2 N_\mathrm{mesh} \ln N_\mathrm{mesh}}\right)$
where $N_\mathrm{bin}$ is the number of coordinate bins.

It is worth noting that in `Triumvirate`, the spherical harmonic weights
are applied to individual particles rather than the mesh grids, in
contrast to other packages such as `nbodykit`. This should result in
more accurate results at the expense of memory usage, as multiple meshes
need to be stored for spherical harmonics of different degrees and orders.
We estimate the minimum memory usage for bispectrum measurements to be
$11 M$ and $9 M$ respectively for local and global plane-parallel
estimators, where $M = 16 N_\mathrm{mesh}$ bytes (roughly
$1.5\times10^{-8} N_\mathrm{mesh}$ gibibytes[^4]); for local and global
are applied to individual particles rather than the mesh grids. This should
result in more accurate results at the expense of memory usage, as multiple
meshes need to be stored for spherical harmonics of different degrees
and orders. We estimate the minimum memory usage for bispectrum
measurements to be $11 M$ and $9 M$ respectively for local and global
plane-parallel estimators, where $M = 16 N_\mathrm{mesh}$ bytes (roughly
$1.5\times10^{-8} N_\mathrm{mesh}$ gibibytes[^7]); for local and global
plane-parallel 3PCF estimators, the figures are $10 M$ and $9 M$
respectively.

In the table below, we show the wall time and peak memory usage for
bispectrum and three-point correlation function measurements of a few
select multipoles and grid numbers with $N_\mathrm{bin} = 20$, using a
single core on one AMD EPYC 7H12 processor with base frequency 2.60 GHz.
With multithreading enabled, the run time is reduced (see the last column
in the table). Here 'lpp' and 'gpp' denote local and global plane-parallel
approximations respectively. For the global plane-parallel estimates,
the catalogue used is a cubic box containing
$N_\mathrm{part} = 8 \times 10^6$ particles; for the local plane-parallel
estimates, the data and random catalogues contain
$N_\mathrm{part} = 6.6 \times 10^5$ and $1.3 \times 10^7$ particles
respectively.
bispectrum and 3PCF measurements of a few select multipoles and
grid numbers with $N_\mathrm{bin} = 20$, using a single core on one
AMD EPYC 7H12 processor with base frequency 2.60 GHz. With multithreading
enabled, the run time is reduced (see the last column in the table). Here
'lpp' and 'gpp' denote local and global plane-parallel approximations
respectively. For the global plane-parallel estimates, the catalogue used
is a cubic box containing $N_\mathrm{part} = 8 \times 10^6$ particles;
for the local plane-parallel estimates, the data and random catalogues
contain $N_\mathrm{part} = 6.6 \times 10^5$ and $1.3 \times 10^7$ particles
respectively. Since both the bispectrum and 3PCF are computed with FFTs,
the computation time and memory usage for them are roughly the same, with
minor differences due to the slightly different number of mesh grids needed
for evaluation.

--------------------------------------------------------------------------------------------------------------------------
Multipole/$N_\mathrm{mesh}$ $128^3$ $256^3$ $512^3$ $512^3$ (32 threads)
------------------------------ ---------------------- ---------------------- ---------------------- ----------------------
$B_{000}^{\mathrm{(lpp)}}$ 96 s, 1.8 GiB 215 s, 4.4 GiB 1247 s, 25 GiB 85 s, 25 GiB
--------------------------------------------------------------------------------------------------------------------------------
Multipole/$N_\mathrm{mesh}$ $128^3$ $256^3$ $512^3$ $512^3$ (32 threads)
------------------------------ ----------------------- ----------------------- ----------------------- -------------------------
$B_{000}^{\mathrm{(lpp)}}$ 96 s, 1.8 GiB 215 s, 4.4 GiB 1247 s, 25 GiB 85 s, 25 GiB

$B_{000}^{\mathrm{(gpp)}}$ 42 s, 0.7 GiB 172 s, 2.9 GiB 1185 s, 21 GiB 59 s, 21 GiB
$B_{000}^{\mathrm{(gpp)}}$ 42 s, 0.7 GiB 172 s, 2.9 GiB 1185 s, 21 GiB 59 s, 21 GiB

$B_{202}^{\mathrm{(lpp)}}$ 320 s, 1.8 GiB 1030 s, 4.4 GiB 6449 s, 25 GiB 267 s, 25 GiB
$B_{202}^{\mathrm{(lpp)}}$ 320 s, 1.8 GiB 1030 s, 4.4 GiB 6449 s, 25 GiB 267 s, 25 GiB

$B_{202}^{\mathrm{(gpp)}}$ 42 s, 0.7 GiB 176 s, 2.9 GiB 1187 s, 21 GiB 60 s, 21 GiB
$B_{202}^{\mathrm{(gpp)}}$ 42 s, 0.7 GiB 176 s, 2.9 GiB 1187 s, 21 GiB 60 s, 21 GiB

$\zeta_{000}^{\mathrm{(lpp)}}$ 90 s, 1.8 GiB 211 s, 4.4 GiB 1403 s, 21 GiB 83 s, 21 GiB
$\zeta_{000}^{\mathrm{(lpp)}}$ 90 s, 1.8 GiB 211 s, 4.4 GiB 1403 s, 21 GiB 83 s, 21 GiB

$\zeta_{000}^{\mathrm{(gpp)}}$ 43 s, 0.7 GiB 178 s, 2.9 GiB 1226 s, 19 GiB 55 s, 19 GiB
$\zeta_{000}^{\mathrm{(gpp)}}$ 43 s, 0.7 GiB 178 s, 2.9 GiB 1226 s, 19 GiB 55 s, 19 GiB

$\zeta_{202}^{\mathrm{(lpp)}}$ 267 s, 1.8 GiB 964 s, 4.4 GiB 6377 s, 21 GiB 266 s, 21 GiB
$\zeta_{202}^{\mathrm{(lpp)}}$ 267 s, 1.8 GiB 964 s, 4.4 GiB 6377 s, 21 GiB 266 s, 21 GiB

$\zeta_{202}^{\mathrm{(gpp)}}$ 43 s, 0.7 GiB 177 s, 2.9 GiB 1241 s, 19 GiB 57 s, 19 GiB
--------------------------------------------------------------------------------------------------------------------------
$\zeta_{202}^{\mathrm{(gpp)}}$ 43 s, 0.7 GiB 177 s, 2.9 GiB 1241 s, 19 GiB 57 s, 19 GiB
--------------------------------------------------------------------------------------------------------------------------------


[^4]: Note that 1 gibibytes (GiB) is $2^{30}$ bytes, as opposed to
[^7]: Note that 1 gibibytes (GiB) is $2^{30}$ bytes, as opposed to
1 gigabytes (GB) which is $10^9$ bytes. GiB is the preferred unit
by job schedulers such as Slurm for computer clusters.

Expand All @@ -251,6 +293,10 @@ $\zeta_{202}^{\mathrm{(gpp)}}$ 43 s, 0.7 GiB 177 s, 2.9 GiB
user feedback. One extension of interest is the inclusion of other
three-point clustering estimators with different coordinate systems and
compression choices, and the functionality to transform between them.
The ability to measure clustering statistics from a density field already
sampled on a mesh grid may also be useful. In addition, porting the code
to graphic processing units (GPUs) can bring further parallelisation that
can enhance the performance of the code.


# Acknowledgements
Expand Down

0 comments on commit fc0deb4

Please sign in to comment.