diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index cbe5b636..c6737166 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,7 +24,7 @@ jobs: matrix: # First all python versions in basic linux os: [ ubuntu-latest ] - py: [ 3.7, 3.8, 3.9, "3.10", "pypy-3.9" ] + py: [ 3.7, 3.8, 3.9, '3.10', 3.11, 3.12, 'pypy-3.10' ] CC: [ gcc ] CXX: [ g++ ] @@ -32,25 +32,25 @@ jobs: include: # One in MacOS - os: macos-latest - py: 3.9 + py: 3.11 CC: cc CXX: c++ # Check one with clang compiler - os: ubuntu-latest - py: 3.8 + py: 3.11 CC: clang CXX: clang++ # Check one with gcc-11 - os: ubuntu-latest - py: 3.9 + py: 3.11 CC: gcc-11 CXX: g++-11 # Check one on Windows - os: windows-latest - py: 3.9 + py: 3.11 CC: gcc CXX: g++ @@ -109,7 +109,7 @@ jobs: pip install -U -r requirements.txt # Extra packages needed for testing - pip install -U nose mpi4py coverage mockmpi pytest + pip install -U coverage mockmpi pytest # Note: I'd rather include h5py here, but I can't get it to install properly # on GHA for pypy3. So only do that for regular py3. @@ -118,7 +118,14 @@ jobs: # They are slow to install on pypy, where some are installed from scratch. if: matrix.py > 3.0 run: | - pip install -U matplotlib nbval ipykernel scipy pandas guppy3 h5py pyarrow + pip install -U matplotlib nbval ipykernel scipy pandas guppy3 h5py pyarrow mpi4py + + - name: Install halotools + # halotools is currently (3/2024) broken on 3.7, 3.8, 3.12 + # Just run on the ones we know it works. + if: ((matrix.py == '3.9') || (matrix.py == '3.10') || (matrix.py == '3.11')) && (matrix.os != 'windows-latest') + run: | + pip install -U halotools - name: Install fitsio everywhere but Windows if: matrix.os != 'windows-latest' @@ -152,7 +159,8 @@ jobs: - name: Test MPI # The code is already mostly checked in the main tests with mock_mpi. # These just check that the code works when run in a real mpi session. - if: matrix.os != 'windows-latest' + # Skip windows and pypy for this. + if: (matrix.os != 'windows-latest') && (matrix.py > 3.0) run: | cd tests which -a mpiexec @@ -162,7 +170,7 @@ jobs: cd .. - name: Test Tutorial notebook - if: matrix.py == 3.7 + if: matrix.py == '3.10' run: | cd tests pytest --nbval Tutorial.ipynb --sanitize-with sanitize.cfg --current-env diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 1d888859..949741c6 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -38,7 +38,7 @@ API Changes - Changed estimate_cov with method='shot' to only return the diagonal, rather than gratuitously making a full, mostly empty diagonal matrix. (#166) - Changed name of Catalog.write kwarg from cat_precision to just precision. (#169) -- Added additionaly information in the header of output files to enable ``from_file``. (#172) +- Added additionaly information in the header of output files to enable `Corr2.from_file`. (#172) Performance improvements @@ -85,10 +85,12 @@ New features multipole algorithm or the old triangle algorithm. (#171) - Added serialization of rr, dr, etc. when writing with write_patch_results=True option, so you no longer have to separately write files for them to recover the covariance. (#172) -- Added :ref:`from_file ` class methods to construct a Correlation - object from a file without needing to know the correct configuration parameters. (#172) +- Added `Corr2.from_file` class methods to construct a Correlation object from a file without + needing to know the correct configuration parameters. (#172) - Added ``write_cov`` option to write functions to include the covariance in the output file. (#172) +- Added complex, spin-0 correlations using the letter Z, including `NZCorrelation`, + `KZCorrelation`, and `ZZCorrelation`. (#174) Bug fixes diff --git a/TreeCorr_LICENSE b/TreeCorr_LICENSE index 4ca6fac2..518a7672 100644 --- a/TreeCorr_LICENSE +++ b/TreeCorr_LICENSE @@ -1,4 +1,4 @@ -Copyright (c) 2003-2019, Mike Jarvis +Copyright (c) 2003-2024, Mike Jarvis All rights reserved. Redistribution and use in source and binary forms, with or without diff --git a/devel/mock_comm_with_fork.py b/devel/mock_comm_with_fork.py deleted file mode 100644 index d7c5b534..00000000 --- a/devel/mock_comm_with_fork.py +++ /dev/null @@ -1,168 +0,0 @@ -# Copyright (c) 2003-2019 by Mike Jarvis -# -# TreeCorr is free software: redistribution and use in source and binary forms, -# with or without modification, are permitted provided that the following -# conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions, and the disclaimer given in the accompanying LICENSE -# file. -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions, and the disclaimer given in the documentation -# and/or other materials provided with the distribution. - -from __future__ import print_function -import os -import time - -# This is an attempt to use fork to mock up an MPI session. -# It doesn't work. And I've given up trying to figure out how to make it work. - -class MockMPI(object): - """A context manager that mocks up an MPI session using fork, so it can be run - in normal unit testing. - - It makes no attempt to be efficient, so it is really only useful for unit testing - functions that are intended to use an mpi4py Comm object. - - It can also only communicate between rank=0 and other ranks. So it won't work for - use cases that need communication among all the different ranks. - - TODO: So far it only implements send and recv communication, not the more complicated - bcast, scatter, etc. - - Sample usage: - - >>> with MockMPI(size=4) as comm: - ... rank = comm.Get_rank() - ... size = comm.Get_size() - ... print('rank, size = ',rank,size,flush=True) - """ - def __init__(self, size=2): - self.size = size - self.rank = 0 - self.write_pipes = {} - self.read_pipes = {} - - def Get_rank(self): - return self.rank - - def Get_size(self): - return self.size - - def send(self, msg, dest): - print(self.rank,'sending to ',dest,msg,flush=True) - if dest == self.rank: - self.self_msg = msg - else: - print(self.rank,'writing on ',self.write_pipes[dest],flush=True) - self.write_pipes[dest].write(msg) - self.write_pipes[dest].flush() - #print('fno = ',self.write_pipes[dest].name, self.write_pipes[dest].fileno(),flush=True) - #fno = self.write_pipes[dest].fileno() - #self.write_pipes[dest].close() - #self.write_pipes[dest] = os.fdopen(fno,'w') - #print(self.rank,'reopened ',self.write_pipes[dest],flush=True) - print(self.rank,'sent to ',dest,flush=True) - - def recv(self, source): - print(self.rank,'receiving from ',source,flush=True) - if source == self.rank: - msg = self.self_msg - else: - print(self.rank,'reading from ',self.read_pipes[source],flush=True) - msg = self.read_pipes[source].read() - print(self.rank,'received from ',source,msg,flush=True) - return msg - - def Barrier(self): - print(self.rank,'staring Barrier',flush=True) - # Sync up by checking in with everyone - # 0 -> all, then reply back to 0. - if self.rank == 0: - for p in range(1,self.size): - self.send('check',p) - for p in range(1,self.size): - self.recv(p) - else: - self.recv(0) - self.send('ready',0) - - def __enter__(self): - size = self.size - next_rank = 1 - while size > 1: - r1,w1 = os.pipe() # communication from 0 to rank - r2,w2 = os.pipe() # communication from rank to 0 - print('pipes for',next_rank,'are',r1,w1,r2,w2) - pid = os.fork() - if pid: - # Parent - os.close(r1) - os.close(w2) - self.read_pipes[next_rank] = r2 - self.write_pipes[next_rank] = w1 - next_rank += 1 - size -= 1 - else: - # Child - self.rank = next_rank - size = 0 # Don't do further forks from non-parent. - os.close(r2) - os.close(w1) - # Clear these, since it gets copies of the rank 0 ones, which we don't want - self.read_pipes.clear() - self.write_pipes.clear() - self.read_pipes[0] = r1 - self.write_pipes[0] = w2 - if self.rank == 0: - # Let rank 0 read/write to itself. - r,w = os.pipe() - self.read_pipes[0] = r - self.write_pipes[0] = w - for p in self.read_pipes: - self.read_pipes[p] = os.fdopen(self.read_pipes[p]) - for p in self.write_pipes: - os.set_blocking(self.write_pipes[p],False) - self.write_pipes[p] = os.fdopen(self.write_pipes[p],'w') - return self - - def __exit__(self, type, value, traceback): - print(rank, 'is exiting.') - #self.Barrier() - # I can't figure out how to make a Barrier work right. - for p in self.write_pipes: - self.write_pipes[p].close() - print(rank, 'closed all writes') - # Without this it fails - time.sleep(3) - for p in self.read_pipes: - self.read_pipes[p].close() - print(rank, 'closed all reads') - if self.rank > 0: - os._exit(0) - print(rank, 'exited') - -msg = 'default_msg' - -with MockMPI(2) as comm: - rank = comm.Get_rank() - size = comm.Get_size() - print('rank, size = ',rank,size,flush=True) - print(rank, 'can read from ',list(comm.read_pipes.keys()),flush=True) - print(rank, 'can write to ',list(comm.write_pipes.keys()),flush=True) - - comm.send('my rank is %d'%rank, dest=0) - - if rank == 0: - print('Final section',flush=True) - for p in range(size): - print('Try to read from ',p,flush=True) - msg = comm.recv(source=p) - print('rank 0 received message: ',msg,flush=True) - comm.send('done', dest=p) - - print(rank,'done',flush=True) - # This next line causes it to freeze. - #final_msg = comm.recv(0) - #print(rank,'final message = ',final_msg,flush=True) diff --git a/devel/mpi_example.py b/devel/mpi_example.py index 6394afc4..2b3fce80 100644 --- a/devel/mpi_example.py +++ b/devel/mpi_example.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following diff --git a/docs/catalog.rst b/docs/catalog.rst index edbc22c4..7d5c0bb2 100644 --- a/docs/catalog.rst +++ b/docs/catalog.rst @@ -15,12 +15,18 @@ Other utilities related to catalogs .. autofunction:: treecorr.calculateVarK .. autofunction:: - treecorr.calculateVarG + treecorr.calculateVarZ .. autofunction:: treecorr.calculateVarV +.. autofunction:: + treecorr.calculateVarG +.. autofunction:: + treecorr.calculateVarT +.. autofunction:: + treecorr.calculateVarQ .. automodule:: treecorr.catalog :members: - :exclude-members: Catalog, read_catalogs, calculateVarG, calculateVarK, calculateVarV + :exclude-members: Catalog, read_catalogs, calculateVarK, calculateVarZ, calculateVarV, calculateVarG, calculateVarT, calculateVatQ File Readers ------------ diff --git a/docs/correlation2.rst b/docs/correlation2.rst index 99791109..e4e181f1 100644 --- a/docs/correlation2.rst +++ b/docs/correlation2.rst @@ -10,6 +10,9 @@ correlation functions: nn nk kk + nz + kz + zz nv kv vv @@ -18,10 +21,10 @@ correlation functions: gg nt kt - gt + tt nq kq - gq + qq Each of the above classes is a sub-class of the base class Corr2, so they have a number of features in common about how they are constructed. The common features are documented here. diff --git a/docs/correlation3.rst b/docs/correlation3.rst index 52c97ec0..d8ac2b78 100644 --- a/docs/correlation3.rst +++ b/docs/correlation3.rst @@ -11,17 +11,6 @@ auto-correlation functions: ggg kkk -.. note:: - - There are classes that can handle cross-correlations of the same type: - - * `treecorr.NNNCrossCorrelation` - * `treecorr.GGGCrossCorrelation` - * `treecorr.KKKCrossCorrelation` - - However, we do not yet have the ability to compute 3-point cross-correlations across - different types (such as NNG or KGG, etc.) - Each of the above classes is a sub-class of the base class Corr3, so they have a number of features in common about how they are constructed. The common features are documented here. diff --git a/docs/field.rst b/docs/field.rst index 3025719e..9f8d914b 100644 --- a/docs/field.rst +++ b/docs/field.rst @@ -15,10 +15,19 @@ There are several kinds of `Field` classes. - `KField` holds both counts of objects and the mean "kappa" of those objects. It is used for correlations with a K in the name, including `KKCorrelation`, `NKCorrelation`, `KGCorrelation`, and `KKKCorrelation`. + - `ZField` holds both counts of objects and the mean complex spin-0 field of those objects. + It is used for correlations with a V in the name, including + `VVCorrelation`, `NVCorrelation`, and `KVCorrelation`. + - `VField` holds both counts of objects and the mean vector field of those objects. + It is used for correlations with a V in the name, including + `VVCorrelation`, `NVCorrelation`, and `KVCorrelation`. - `GField` holds both counts of objects and the mean shear of those objects. It is used for correlations with a G in the name, including `GGCorrelation`, `NGCorrelation`, `KGCorrelation`, and `GGGCorrelation`. - - `VField` holds both counts of objects and the mean velocity of those objects. + - `TField` holds both counts of objects and the mean spin-3 field of those objects. + It is used for correlations with a V in the name, including + `VVCorrelation`, `NVCorrelation`, and `KVCorrelation`. + - `QField` holds both counts of objects and the mean spin-4 field of those objects. It is used for correlations with a V in the name, including `VVCorrelation`, `NVCorrelation`, and `KVCorrelation`. @@ -36,8 +45,17 @@ command do so for you. .. autoclass:: treecorr.KField :members: -.. autoclass:: treecorr.GField +.. autoclass:: treecorr.ZField :members: .. autoclass:: treecorr.VField :members: + +.. autoclass:: treecorr.GField + :members: + +.. autoclass:: treecorr.TField + :members: + +.. autoclass:: treecorr.QField + :members: diff --git a/docs/ggg.rst b/docs/ggg.rst index 0f665b30..7bdea89d 100644 --- a/docs/ggg.rst +++ b/docs/ggg.rst @@ -6,9 +6,3 @@ GGGCorrelation: Shear-shear-shear correlations :members: :special-members: :show-inheritance: - -.. autoclass:: treecorr.GGGCrossCorrelation - :members: - :special-members: - :show-inheritance: - diff --git a/docs/guide.rst b/docs/guide.rst index e01650f3..ffb753cf 100644 --- a/docs/guide.rst +++ b/docs/guide.rst @@ -226,7 +226,7 @@ These correlations do not suffer as much from masking effects, so the compensation is not as necessary. However, it does produce a slightly better estimate of the correlation function if you are able to use a random catalog. -Furthermore, the `process ` functions can take lists of Catalogs if desired, +Furthermore, the `process ` functions can take lists of Catalogs if desired, in which case it will do all the possible combinations. This is especially relevant for doing randoms, since the statistics get better if you generate several randoms and do all the correlations to beat down the noise:: @@ -244,7 +244,7 @@ Manually accumulating the correlation function ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ For even more control over the calculation, you can break up the steps in the -`process ` functions. There are typically three steps: +`process ` functions. There are typically three steps: 1. Calculate the variance of the field as needed (i.e. for anything but NN correlations). 2. Accumulate the correlations into the bins for each auto-correlation and cross-correlation desired. @@ -261,9 +261,9 @@ function, you could write the following:: ng.process_cross(c1,c2) ng.finalize(varg) -In addition to `process_cross `, +In addition to `process_cross `, classes that allow auto-correlations have a -`process_auto ` method for manually processing +`process_auto ` method for manually processing auto-correlations. See the doc strings for these methods for more information. Breaking up the calculation manually like this is probably not often necessary anymore. diff --git a/docs/kkk.rst b/docs/kkk.rst index aca624dc..42c65f72 100644 --- a/docs/kkk.rst +++ b/docs/kkk.rst @@ -6,9 +6,3 @@ KKKCorrelation: Scalar-scalar-scalar correlations :members: :special-members: :show-inheritance: - -.. autoclass:: treecorr.KKKCrossCorrelation - :members: - :special-members: - :show-inheritance: - diff --git a/docs/kz.rst b/docs/kz.rst new file mode 100644 index 00000000..0e332795 --- /dev/null +++ b/docs/kz.rst @@ -0,0 +1,13 @@ + +KZCorrelation: Scalar-spin-0 correlations +----------------------------------------- + +.. autoclass:: treecorr.KZCorrelation + :members: + :special-members: + :show-inheritance: + +.. autoclass:: treecorr.BaseKZCorrelation + :members: + :special-members: + :show-inheritance: diff --git a/docs/nnn.rst b/docs/nnn.rst index 63aa2b51..42c9750e 100644 --- a/docs/nnn.rst +++ b/docs/nnn.rst @@ -6,9 +6,3 @@ NNNCorrelation: Count-count-count correlations :members: :special-members: :show-inheritance: - -.. autoclass:: treecorr.NNNCrossCorrelation - :members: - :special-members: - :show-inheritance: - diff --git a/docs/nz.rst b/docs/nz.rst new file mode 100644 index 00000000..175e51d9 --- /dev/null +++ b/docs/nz.rst @@ -0,0 +1,13 @@ + +NZCorrelation: Count-spin-0 correlations +---------------------------------------- + +.. autoclass:: treecorr.NZCorrelation + :members: + :special-members: + :show-inheritance: + +.. autoclass:: treecorr.BaseNZCorrelation + :members: + :special-members: + :show-inheritance: diff --git a/docs/params.rst b/docs/params.rst index 0f3b3ffa..b8843adc 100644 --- a/docs/params.rst +++ b/docs/params.rst @@ -237,13 +237,14 @@ Parameters about the input file(s) need to flip the sign of g1 or g2, you may do that with ``flip_g1`` or ``flip_g2`` (or both). +:flip_z1: (bool, default=False) Whether to flip the sign of z1. +:flip_z2: (bool, default=False) Whether to flip the sign of z2. :flip_v1: (bool, default=False) Whether to flip the sign of v1. :flip_v2: (bool, default=False) Whether to flip the sign of v2. - - Sometimes there are issues with the sign conventions of gamma. If you - need to flip the sign of v1 or v2, you may do that with ``flip_v1`` or ``flip_v2`` - (or both). - +:flip_t1: (bool, default=False) Whether to flip the sign of t1. +:flip_t2: (bool, default=False) Whether to flip the sign of t2. +:flip_q1: (bool, default=False) Whether to flip the sign of q1. +:flip_q2: (bool, default=False) Whether to flip the sign of q2. :vark: (float) Variance of the scalar field to use. (Default is to calculate it directly.) :varg: (float) Variance of the shear field to use. (Default is to calculate it directly.) :varv: (float) Variance of the velocity field to use. (Default is to calculate it directly.) @@ -379,7 +380,7 @@ about the output columns. - ``DR`` (if ``nn_statistic=compensated``) = The cross terms between data and random. - ``RD`` (if ``nn_statistic=compensated`` cross-correlation) = The cross term between random and data, which for a cross-correlation is not equivalent to ``DR``. -:nn_statistic: (str, default='compensated') Which statistic to use for xi as the estimator of the NN correlation function. +:nn_statistic: (str, default='compensated') Which statistic to use for estimator of the NN correlation function. Options are (D = data catalog, R = random catalog) @@ -401,7 +402,7 @@ about the output columns. - ``weight`` = The total weight of the pairs in each bin. - ``npairs`` = The total number of pairs in each bin. -:nk_statistic: (str, default='compensated' if ``rand_files`` is given, otherwise 'simple') Which statistic to use for the mean shear as the estimator of the NK correlation function. +:nk_statistic: (str, default='compensated' if ``rand_files`` is given, otherwise 'simple') Which statistic to use for the estimator of the NK correlation function. Options are: @@ -427,6 +428,135 @@ about the output columns. - ``weight`` = The total weight of the pairs in each bin. - ``npairs`` = The total number of pairs in each bin. +:nz_file_name: (str) The output filename for count-spin-0 correlation function. + + The output columns are: + + - ``R_nom`` = The center of the bin + - ``meanR`` = The mean separation of the points that went into the bin. + - ``meanlogR`` = The mean log(R) of the points that went into the bin. + - ``z_real`` = The mean real components of z. + - ``z_imag`` = The mean imaginary components of z. + - ``sigma`` = The 1-sigma error bar for ``vR`` and ``vT``. + - ``weight`` = The total weight of the pairs in each bin. + - ``npairs`` = The total number of pairs in each bin. + +:nz_statistic: (str, default='compensated' if ``rand_files`` is given, otherwise 'simple') Which statistic to use for the estimator of the NZ correlation function. + + Options are: + + - 'compensated' is simiar to the Landy-Szalay statistic: + Define: + + - NZ = Sum(z around data points) + - RZ = Sum(z around random points), scaled to be equivalent in effective number as the number of pairs in NZ. + - npairs = number of pairs in NZ. + + Then this statistic is = (NZ-RZ)/npairs + - 'simple' is the normal version: = NZ/npairs + +:kz_file_name: (str) The output filename for scalar-spin-0 correlation function. + + The output columns are: + + - ``R_nom`` = The center of the bin + - ``meanR`` = The mean separation of the points that went into the bin. + - ``meanlogR`` = The mean log(R) of the points that went into the bin. + - ``xi`` = The real component of the correlations function xi. + - ``xi_im`` = The imaginary component of the correlation function. + - ``sigma`` = The 1-sigma error bar for exch component of xi. + - ``weight`` = The total weight of the pairs in each bin. + - ``npairs`` = The total number of pairs in each bin. + +:zz_file_name: (str) The output filename for spin-0-spin-0 correlation function. + + The output columns are: + + - ``R_nom`` = The center of the bin + - ``meanR`` = The mean separation of the points that went into the bin. + - ``meanlogR`` = The mean log(R) of the points that went into the bin. + - ``xip`` = or Re() + - ``xim`` = or Re() + - ``xip_im`` = or Im() + + This should normally be consistent with zero, especially for an + auto-correlation, because if every pair were counted twice to + get each galaxy in both positions, then this would come out + exactly zero. + + - ``xim_im`` = or Im() + - ``sigma_xip`` = The 1-sigma error bar for each component of xi+. + - ``sigma_xim`` = The 1-sigma error bar for each component of xi-. + - ``weight`` = The total weight of the pairs in each bin. + - ``npairs`` = The total number of pairs in each bin. + +:nv_file_name: (str) The output filename for count-vector correlation function. + + The output columns are: + + - ``R_nom`` = The center of the bin + - ``meanR`` = The mean separation of the points that went into the bin. + - ``meanlogR`` = The mean log(R) of the points that went into the bin. + - ``vR`` = The mean radial vector with respect to the point in question. + - ``vT`` = The mean counter-clockwise tangential vector with respect to the point in questin. + - ``sigma`` = The 1-sigma error bar for ``vR`` and ``vT``. + - ``weight`` = The total weight of the pairs in each bin. + - ``npairs`` = The total number of pairs in each bin. + +:nv_statistic: (str, default='compensated' if ``rand_files`` is given, otherwise 'simple') Which statistic to use for the estimator of the NV correlation function. + + Options are: + + - 'compensated' is simiar to the Landy-Szalay statistic: + Define: + + - NV = Sum(v around data points) + - RV = Sum(v around random points), scaled to be equivalent in effective number as the number of pairs in NG. + - npairs = number of pairs in NV. + + Then this statistic is vR = (NV-RV)/npairs + - 'simple' is the normal version: vR = NV/npairs + +:kv_file_name: (str) The output filename for scalar-vector correlation function. + + The output columns are: + + - ``R_nom`` = The center of the bin + - ``meanR`` = The mean separation of the points that went into the bin. + - ``meanlogR`` = The mean log(R) of the points that went into the bin. + - ``xi`` = The real comonent of the correlation function, xi(R). + - ``xi`` = The imaginary comonent of the correlation function, xi(R). + - ``sigma`` = The 1-sigma error bar for each component of xi. + - ``weight`` = The total weight of the pairs in each bin. + - ``npairs`` = The total number of pairs in each bin. + +:vv_file_name: (str) The output filename for vector-vector correlation function. + + The output columns are: + + - ``R_nom`` = The center of the bin + - ``meanR`` = The mean separation of the points that went into the bin. + - ``meanlogR`` = The mean log(R) of the points that went into the bin. + - ``xip`` = where v1 and v2 are measured with respect to the line joining the two points, where p1 is on the left and p2 is on the right. + - ``xim`` = where v1 and v2 are measured with respect to the line joining the two points, where p1 is on the left and p2 is on the right. + - ``xip_im`` = . + + In the formulation of xi+ using complex numbers, this is the imaginary component. + It should normally be consistent with zero, especially for an + auto-correlation, because if every pair were counted twice to + get each galaxy in both positions, then this would come out + exactly zero. + + - ``xim_im`` = . + + In the formulation of xi- using complex numbers, this is the imaginary component. + It should be consistent with zero for parity invariant vector fields. + + - ``sigma_xip`` = The 1-sigma error bar for xi+. + - ``sigma_xim`` = The 1-sigma error bar for xi-. + - ``weight`` = The total weight of the pairs in each bin. + - ``npairs`` = The total number of pairs in each bin. + :ng_file_name: (str) The output filename for count-shear correlation function. This is the count-shear correlation function, often called galaxy-galaxy @@ -443,7 +573,7 @@ about the output columns. - ``weight`` = The total weight of the pairs in each bin. - ``npairs`` = The total number of pairs in each bin. -:ng_statistic: (str, default='compensated' if ``rand_files`` is given, otherwise 'simple') Which statistic to use for the mean shear as the estimator of the NG correlation function. +:ng_statistic: (str, default='compensated' if ``rand_files`` is given, otherwise 'simple') Which statistic to use for the estimator of the NG correlation function. Options are: @@ -503,56 +633,56 @@ about the output columns. - ``weight`` = The total weight of the pairs in each bin. - ``npairs`` = The total number of pairs in each bin. -:nv_file_name: (str) The output filename for count-vector correlation function. +:nt_file_name: (str) The output filename for count-spin-3 correlation function. The output columns are: - ``R_nom`` = The center of the bin - ``meanR`` = The mean separation of the points that went into the bin. - ``meanlogR`` = The mean log(R) of the points that went into the bin. - - ``vR`` = The mean radial vector with respect to the point in question. - - ``vT`` = The mean counter-clockwise tangential vector with respect to the point in questin. - - ``sigma`` = The 1-sigma error bar for ``vR`` and ``vT``. + - ``tR`` = The mean real component of the spin-3 field relative to the center points. + - ``tR_im`` = The mean imaginary component of the spin-3 field relative to the center points. + - ``sigma`` = The 1-sigma error bar for ``tR`` and ``tR_im``. - ``weight`` = The total weight of the pairs in each bin. - ``npairs`` = The total number of pairs in each bin. -:nv_statistic: (str, default='compensated' if ``rand_files`` is given, otherwise 'simple') Which statistic to use for the mean vector as the estimator of the NV correlation function. +:nt_statistic: (str, default='compensated' if ``rand_files`` is given, otherwise 'simple') Which statistic to use for the estimator of the NT correlation function. Options are: - 'compensated' is simiar to the Landy-Szalay statistic: Define: - - NV = Sum(gamma around data points) - - RV = Sum(gamma around random points), scaled to be equivalent in effective number as the number of pairs in NG. - - npairs = number of pairs in NV. + - NT = Sum(t around data points) + - RT = Sum(t around random points), scaled to be equivalent in effective number as the number of pairs in NG. + - npairs = number of pairs in NT. - Then this statistic is vR = (NV-RV)/npairs - - 'simple' is the normal version: vR = NV/npairs + Then this statistic is tR = (NT-RT)/npairs + - 'simple' is the normal version: tR = NT/npairs -:kv_file_name: (str) The output filename for scalar-vector correlation function. +:kt_file_name: (str) The output filename for scalar-spin-3 correlation function. The output columns are: - ``R_nom`` = The center of the bin - ``meanR`` = The mean separation of the points that went into the bin. - ``meanlogR`` = The mean log(R) of the points that went into the bin. - - ``kvR`` = The kappa-scaled mean radial vector. - - ``kvT`` = The kappa-scaled mean counter-clockwise tangential vector. - - ``sigma`` = The 1-sigma error bar for ``kvR`` and ``kvT``. + - ``xi`` = The real component of the correlation function, xi. + - ``xi_im`` = The imaginary component of the correlation function, xi. + - ``sigma`` = The 1-sigma error bar for each component of xi. - ``weight`` = The total weight of the pairs in each bin. - ``npairs`` = The total number of pairs in each bin. -:vv_file_name: (str) The output filename for vector-vector correlation function. +:tt_file_name: (str) The output filename for spin-3-spin-3 correlation function. The output columns are: - ``R_nom`` = The center of the bin - ``meanR`` = The mean separation of the points that went into the bin. - ``meanlogR`` = The mean log(R) of the points that went into the bin. - - ``xip`` = where v1 and v2 are measured with respect to the line joining the two points, where p1 is on the left and p2 is on the right. - - ``xim`` = where v1 and v2 are measured with respect to the line joining the two points, where p1 is on the left and p2 is on the right. - - ``xip_im`` = . + - ``xip`` = where t1 and t2 are measured with respect to the line joining the two points, where p1 is on the left and p2 is on the right. + - ``xim`` = where t1 and t2 are measured with respect to the line joining the two points, where p1 is on the left and p2 is on the right. + - ``xip_im`` = . In the formulation of xi+ using complex numbers, this is the imaginary component. It should normally be consistent with zero, especially for an @@ -560,10 +690,77 @@ about the output columns. get each galaxy in both positions, then this would come out exactly zero. - - ``xim_im`` = . + - ``xim_im`` = . In the formulation of xi- using complex numbers, this is the imaginary component. - It should be consistent with zero for parity invariant vector fields. + It should be consistent with zero for parity invariant fields. + + - ``sigma_xip`` = The 1-sigma error bar for xi+. + - ``sigma_xim`` = The 1-sigma error bar for xi-. + - ``weight`` = The total weight of the pairs in each bin. + - ``npairs`` = The total number of pairs in each bin. + +:nq_file_name: (str) The output filename for count-spin-4 correlation function. + + The output columns are: + + - ``R_nom`` = The center of the bin + - ``meanR`` = The mean separation of the points that went into the bin. + - ``meanlogR`` = The mean log(R) of the points that went into the bin. + - ``qR`` = The mean real component of the spin-4 field relative to the center points. + - ``qR_im`` = The mean imaginary component of the spin-4 field relative to the center points. + - ``sigma`` = The 1-sigma error bar for ``qR`` and ``qR_im``. + - ``weight`` = The total weight of the pairs in each bin. + - ``npairs`` = The total number of pairs in each bin. + +:nq_statistic: (str, default='compensated' if ``rand_files`` is given, otherwise 'simple') Which statistic to use for the estimator of the NQ correlation function. + + Options are: + + - 'compensated' is simiar to the Landy-Szalay statistic: + Define: + + - NQ = Sum(q around data points) + - RQ = Sum(q around random points), scaled to be equivalent in effective number as the number of pairs in NG. + - npairs = number of pairs in NQ. + + Then this statistic is qR = (NQ-RQ)/npairs + - 'simple' is the normal version: qR = NQ/npairs + +:kq_file_name: (str) The output filename for scalar-spin-4 correlation function. + + The output columns are: + + - ``R_nom`` = The center of the bin + - ``meanR`` = The mean separation of the points that went into the bin. + - ``meanlogR`` = The mean log(R) of the points that went into the bin. + - ``xi`` = The real component of the correlation function, xi. + - ``xi_im`` = The imaginary component of the correlation function, xi. + - ``sigma`` = The 1-sigma error bar for each component of xi. + - ``weight`` = The total weight of the pairs in each bin. + - ``npairs`` = The total number of pairs in each bin. + +:qq_file_name: (str) The output filename for spin-4-spin-4 correlation function. + + The output columns are: + + - ``R_nom`` = The center of the bin + - ``meanR`` = The mean separation of the points that went into the bin. + - ``meanlogR`` = The mean log(R) of the points that went into the bin. + - ``xip`` = where q1 and q2 are measured with respect to the line joining the two points. + - ``xim`` = where q1 and q2 are measured with respect to the line joining the two points. + - ``xip_im`` = . + + In the formulation of xi+ using complex numbers, this is the imaginary component. + It should normally be consistent with zero, especially for an + auto-correlation, because if every pair were counted twice to + get each galaxy in both positions, then this would come out + exactly zero. + + - ``xim_im`` = . + + In the formulation of xi- using complex numbers, this is the imaginary component. + It should be consistent with zero for parity invariant fields. - ``sigma_xip`` = The 1-sigma error bar for xi+. - ``sigma_xim`` = The 1-sigma error bar for xi-. @@ -590,7 +787,7 @@ about the output columns. - ``DDD``, ``RRR`` = The raw numbers of triangles for the data and randoms - ``DDR``, ``DRD``, ``RDD``, ``DRR``, ``RDR``, ``RRD`` (if ``nn_statistic=compensated``) = The cross terms between data and random. -:nnn_statistic: (str, default='compensated') Which statistic to use for xi as the estimator of the NNN correlation function. +:nnn_statistic: (str, default='compensated') Which statistic to use for the estimator of the NNN correlation function. Options are: @@ -617,14 +814,14 @@ about the output columns. - ``meanlogd2`` = The mean value of log(d2) for the triangles in each bin - ``meand3`` = The mean value of d3 for the triangles in each bin - ``meanlogd3`` = The mean value of log(d3) for the triangles in each bin - - ``gam0r`` = The real part of Gamma_0. - - ``gam0i`` = The imag part of Gamma_0. - - ``gam1r`` = The real part of Gamma_1. - - ``gam1i`` = The imag part of Gamma_1. - - ``gam2r`` = The real part of Gamma_2. - - ``gam2i`` = The imag part of Gamma_2. - - ``gam3r`` = The real part of Gamma_3. - - ``gam3i`` = The imag part of Gamma_3. + - ``gam0r`` = The real component of Gamma_0. + - ``gam0i`` = The imag component of Gamma_0. + - ``gam1r`` = The real component of Gamma_1. + - ``gam1i`` = The imag component of Gamma_1. + - ``gam2r`` = The real component of Gamma_2. + - ``gam2i`` = The imag component of Gamma_2. + - ``gam3r`` = The real component of Gamma_3. + - ``gam3i`` = The imag component of Gamma_3. - ``sigma_gam`` = The 1-sigma error bar for the Gamma values. - ``weight`` = The total weight of the triangles in each bin. - ``ntri`` = The total number of triangles in each bin. diff --git a/docs/patches.rst b/docs/patches.rst index b38d3549..3d81b0c2 100644 --- a/docs/patches.rst +++ b/docs/patches.rst @@ -273,7 +273,7 @@ The overall procedure for doing this is as follows: 4. Make sure not to do anything that requires the catalog be loaded from disk. TreeCorr will delay doing the actual load until it needs to do so. Here, we want to make sure it never loads the full data. -5. Run the `process ` function (for whichever correlation +5. Run the `process ` function (for whichever correlation type you need) using the ``low_mem=True`` option. Here are some worked examples. First, an auto-correlation of a @@ -336,7 +336,7 @@ Another use case that is enabled by using patches is to divide up the work of calculating a correlation function over multiple machines with MPI using `mpi4py `_. -For this usage, the `process ` functions take an optional ``comm`` +For this usage, the `process ` functions take an optional ``comm`` parameter. When running in an MPI job, you can pass in ``comm=MPI.COMM_WORLD``, and TreeCorr will divide up the work among however many nodes you are using. The results will be sent back the the rank 0 node and combined to produce the diff --git a/docs/scripts.rst b/docs/scripts.rst index 2fe5f8a3..ce76085e 100644 --- a/docs/scripts.rst +++ b/docs/scripts.rst @@ -77,9 +77,9 @@ The corr3 function from python Other utilities related to corr2 and corr3 ------------------------------------------ -.. autofunction:: treecorr.corr2ex.print_corr2_params +.. autofunction:: treecorr.exec_corr2.print_corr2_params -.. autofunction:: treecorr.corr3ex.print_corr3_params +.. autofunction:: treecorr.exec_corr3.print_corr3_params Utilities related to the configuration dict diff --git a/docs/zz.rst b/docs/zz.rst new file mode 100644 index 00000000..025d9dcb --- /dev/null +++ b/docs/zz.rst @@ -0,0 +1,13 @@ + +ZZCorrelation: Spin-0-spin-0 correlations +----------------------------------------- + +.. autoclass:: treecorr.ZZCorrelation + :members: + :special-members: + :show-inheritance: + +.. autoclass:: treecorr.BaseZZCorrelation + :members: + :special-members: + :show-inheritance: diff --git a/include/BinType.h b/include/BinType.h index a433d025..9a480e1d 100644 --- a/include/BinType.h +++ b/include/BinType.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2003-2019 by Mike Jarvis +/* Copyright (c) 2003-2024 by Mike Jarvis * * TreeCorr is free software: redistribution and use in source and binary forms, * with or without modification, are permitted provided that the following diff --git a/include/Bounds.h b/include/Bounds.h index b6cde6da..6bf9bf75 100644 --- a/include/Bounds.h +++ b/include/Bounds.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2003-2019 by Mike Jarvis +/* Copyright (c) 2003-2024 by Mike Jarvis * * TreeCorr is free software: redistribution and use in source and binary forms, * with or without modification, are permitted provided that the following diff --git a/include/Cell.h b/include/Cell.h index d2e00e58..5ff87f98 100644 --- a/include/Cell.h +++ b/include/Cell.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2003-2019 by Mike Jarvis +/* Copyright (c) 2003-2024 by Mike Jarvis * * TreeCorr is free software: redistribution and use in source and binary forms, * with or without modification, are permitted provided that the following @@ -34,7 +34,7 @@ const double IOTA = 1.e-10; // NData means just count the point. // KData means use a scalar. Nominally kappa, but works with any scalar (e.g. temperature). // GData means use a shear. -enum DataType { NData=1 , KData=2 , GData=3, VData=4, TData=5, QData=6 }; +enum DataType { NData, KData, GData, ZData, VData, TData, QData }; // Return a random number between 0 and 1. double urand(long long seed=0); @@ -194,6 +194,32 @@ template std::ostream& operator<<(std::ostream& os, const CellData& c) { return os << c.getPos() << " " << c.getWG() << " " << c.getW() << " " << c.getN(); } +template +class CellData : public CellData +{ +public: + CellData() {} + + CellData(const Position& pos, const std::complex& v, double w) : + CellData(pos, v, w) {} + + template + CellData(const Position& pos, const std::complex& v, double w) : + CellData(pos, v, w) {} + + CellData(const std::vector*,WPosLeafInfo> >& vdata, + size_t start, size_t end) : + CellData(vdata, start, end) {} + + // The above constructor just computes the mean pos, since sometimes that's all we + // need. So this function will finish the rest of the construction when desired. + void finishAverages(const std::vector*,WPosLeafInfo> >&, + size_t start, size_t end); + + std::complex getWZ() const { return this->getWG(); } + void setWZ(const std::complex& wv) { this->setWG(wv); } +}; + template class CellData : public CellData { @@ -369,7 +395,7 @@ class Cell : public BaseCell }; // The above is fine for NData, but K and G need a couple more methods. -// (When we eventually do 3pt for V,T,Q, they will also need specializations.) +// (When we eventually do 3pt for Z,V,T,Q, they will also need specializations.) template class Cell : public BaseCell { diff --git a/include/Corr2.h b/include/Corr2.h index d4bb669b..6dd8c568 100644 --- a/include/Corr2.h +++ b/include/Corr2.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2003-2019 by Mike Jarvis +/* Copyright (c) 2003-2024 by Mike Jarvis * * TreeCorr is free software: redistribution and use in source and binary forms, * with or without modification, are permitted provided that the following @@ -315,6 +315,19 @@ struct XiData // All complex valued work the same as GData // So just make them sub-types of the GData versions. +template +struct XiData : public XiData +{ + XiData(double* xi0, double* xi1, double*, double*) : + XiData(xi0,xi1,0,0) {} +}; +template <> +struct XiData : public XiData +{ + XiData(double* xi0, double* xi1, double* xi2, double* xi3) : + XiData(xi0,xi1,xi2,xi3) {} +}; + template struct XiData : public XiData { diff --git a/include/Corr3.h b/include/Corr3.h index 842ecba9..5600973c 100644 --- a/include/Corr3.h +++ b/include/Corr3.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2003-2019 by Mike Jarvis +/* Copyright (c) 2003-2024 by Mike Jarvis * * TreeCorr is free software: redistribution and use in source and binary forms, * with or without modification, are permitted provided that the following diff --git a/include/Field.h b/include/Field.h index 06782764..6b2fd251 100644 --- a/include/Field.h +++ b/include/Field.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2003-2019 by Mike Jarvis +/* Copyright (c) 2003-2024 by Mike Jarvis * * TreeCorr is free software: redistribution and use in source and binary forms, * with or without modification, are permitted provided that the following @@ -28,6 +28,7 @@ // // D = NData for counts // KData for kappa +// ZData for spin-0 // VData for vector // GData for shear // TData for spin-3 diff --git a/include/Metric.h b/include/Metric.h index 1a600551..95bdea6d 100644 --- a/include/Metric.h +++ b/include/Metric.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2003-2019 by Mike Jarvis +/* Copyright (c) 2003-2024 by Mike Jarvis * * TreeCorr is free software: redistribution and use in source and binary forms, * with or without modification, are permitted provided that the following diff --git a/include/MultipoleScratch.h b/include/MultipoleScratch.h index 90635282..76d6d972 100644 --- a/include/MultipoleScratch.h +++ b/include/MultipoleScratch.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2003-2019 by Mike Jarvis +/* Copyright (c) 2003-2024 by Mike Jarvis * * TreeCorr is free software: redistribution and use in source and binary forms, * with or without modification, are permitted provided that the following diff --git a/include/Position.h b/include/Position.h index 862c6fea..6230b37d 100644 --- a/include/Position.h +++ b/include/Position.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2003-2019 by Mike Jarvis +/* Copyright (c) 2003-2024 by Mike Jarvis * * TreeCorr is free software: redistribution and use in source and binary forms, * with or without modification, are permitted provided that the following diff --git a/include/Process2.h b/include/Process2.h index ce61693a..7ef8cd9d 100644 --- a/include/Process2.h +++ b/include/Process2.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2003-2019 by Mike Jarvis +/* Copyright (c) 2003-2024 by Mike Jarvis * * TreeCorr is free software: redistribution and use in source and binary forms, * with or without modification, are permitted provided that the following diff --git a/include/Process3.h b/include/Process3.h index 234f5de3..2a7cd65a 100644 --- a/include/Process3.h +++ b/include/Process3.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2003-2019 by Mike Jarvis +/* Copyright (c) 2003-2024 by Mike Jarvis * * TreeCorr is free software: redistribution and use in source and binary forms, * with or without modification, are permitted provided that the following diff --git a/include/ProjectHelper.h b/include/ProjectHelper.h index aa8a695a..338e635b 100644 --- a/include/ProjectHelper.h +++ b/include/ProjectHelper.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2003-2019 by Mike Jarvis +/* Copyright (c) 2003-2024 by Mike Jarvis * * TreeCorr is free software: redistribution and use in source and binary forms, * with or without modification, are permitted provided that the following @@ -32,6 +32,10 @@ inline double safe_norm(const std::complex& z) template std::complex calculate_expmsialpha(const std::complex& r); +template <> +inline std::complex calculate_expmsialpha<0>(const std::complex& r) +{ return 1.; } + template <> inline std::complex calculate_expmsialpha<1>(const std::complex& r) { return conj(r) / sqrt(safe_norm(r)); } @@ -57,7 +61,8 @@ inline std::complex calculate_expmsialpha<4>(const std::complex& template inline std::complex _expmsialpha(const std::complex& r) { - const int s = (D==VData ? 1 : + const int s = (D==ZData ? 0 : + D==VData ? 1 : D==GData ? 2 : D==TData ? 3 : D==QData ? 4 : 0); diff --git a/include/Split.h b/include/Split.h index fafb4acc..476b5d17 100644 --- a/include/Split.h +++ b/include/Split.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2003-2019 by Mike Jarvis +/* Copyright (c) 2003-2024 by Mike Jarvis * * TreeCorr is free software: redistribution and use in source and binary forms, * with or without modification, are permitted provided that the following diff --git a/include/dbg.h b/include/dbg.h index 92541dc9..1a97f6e7 100644 --- a/include/dbg.h +++ b/include/dbg.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2003-2019 by Mike Jarvis +/* Copyright (c) 2003-2024 by Mike Jarvis * * TreeCorr is free software: redistribution and use in source and binary forms, * with or without modification, are permitted provided that the following diff --git a/scripts/corr2 b/scripts/corr2 index 2672da9e..25992959 100755 --- a/scripts/corr2 +++ b/scripts/corr2 @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following diff --git a/scripts/corr3 b/scripts/corr3 index b8db3af9..0de5b02b 100755 --- a/scripts/corr3 +++ b/scripts/corr3 @@ -1,5 +1,5 @@ #!/usr/bin/env python -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following diff --git a/src/Cell.cpp b/src/Cell.cpp index 3bd60d00..90f9aadc 100644 --- a/src/Cell.cpp +++ b/src/Cell.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2003-2019 by Mike Jarvis +/* Copyright (c) 2003-2024 by Mike Jarvis * * TreeCorr is free software: redistribution and use in source and binary forms, * with or without modification, are permitted provided that the following @@ -176,15 +176,16 @@ void BuildCellData( double wp = vdata[start].second.wpos; pos = vdata[start].first->getPos(); pos *= wp; - w = vdata[start].first->getW(); + double ww = vdata[start].first->getW(); double sumwp = wp; for(size_t i=start+1; i!=end; ++i) { const BaseCellData& data = *vdata[i].first; wp = vdata[i].second.wpos; pos += data.getPos() * wp; sumwp += wp; - w += data.getW(); + ww += data.getW(); } + w = float(ww); if (sumwp != 0.) { pos /= sumwp; // If C == Sphere, the average position is no longer on the surface of the unit sphere. @@ -240,6 +241,14 @@ void CellData::finishAverages( setWG(SimpleSum(vdata, start, end)); } +template <> +void CellData::finishAverages( + const std::vector*,WPosLeafInfo> >& vdata, + size_t start, size_t end) +{ + setWZ(SimpleSum(vdata, start, end)); +} + template <> void CellData::finishAverages( const std::vector*,WPosLeafInfo> >& vdata, @@ -328,6 +337,22 @@ void CellData::finishAverages( setWG(ParallelTransportSum(vdata,_pos,start,end)); } +template <> +void CellData::finishAverages( + const std::vector*,WPosLeafInfo> >& vdata, + size_t start, size_t end) +{ + setWZ(ParallelTransportSum(vdata,_pos,start,end)); +} + +template <> +void CellData::finishAverages( + const std::vector*,WPosLeafInfo> >& vdata, + size_t start, size_t end) +{ + setWZ(ParallelTransportSum(vdata,_pos,start,end)); +} + template <> void CellData::finishAverages( const std::vector*,WPosLeafInfo> >& vdata, @@ -717,6 +742,7 @@ void BaseCell::WriteTree(std::ostream& os, int indent) const InstD(NData,C); \ InstD(KData,C); \ InstD(GData,C); \ + InstD(ZData,C); \ InstD(VData,C); \ InstD(TData,C); \ InstD(QData,C); \ diff --git a/src/Corr2.cpp b/src/Corr2.cpp index e682e2d2..ccebc522 100644 --- a/src/Corr2.cpp +++ b/src/Corr2.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2003-2019 by Mike Jarvis +/* Copyright (c) 2003-2024 by Mike Jarvis * * TreeCorr is free software: redistribution and use in source and binary forms, * with or without modification, are permitted provided that the following @@ -399,6 +399,20 @@ struct DirectHelper2<1,NData,KData> { xi.xi[k] += c1.getW() * c2.getData().getWK(); } }; +template <> +struct DirectHelper2<1,NData,ZData> +{ + template + static void ProcessXi( + const Cell& c1, const Cell& c2, const double , + XiData& xi, int k, int ) + { + std::complex z2 = c1.getW() * c2.getData().getWZ(); + xi.xi[k] += real(z2); + xi.xi_im[k] += imag(z2); + } +}; + template struct DirectHelper2<2,NData,D2> { @@ -435,6 +449,60 @@ struct DirectHelper2<3,KData,KData> } }; +template <> +struct DirectHelper2<3,KData,ZData> +{ + template + static void ProcessXi( + const Cell& c1, const Cell& c2, const double , + XiData& xi, int k, int k2) + { + std::complex wkz = c1.getData().getWK() * c2.getData().getWZ(); + xi.xi[k] += real(wkz); + xi.xi_im[k] += imag(wkz); + if (R) { + xi.xi[k2] += real(wkz); + xi.xi_im[k2] += imag(wkz); + } + } +}; + +template <> +struct DirectHelper2<3,ZData,ZData> +{ + template + static void ProcessXi( + const Cell& c1, const Cell& c2, const double , + XiData& xi, int k, int k2) + { + std::complex z1 = c1.getData().getWZ(); + std::complex z2 = c2.getData().getWZ(); + ProjectHelper::Project(c1,c2,z1,z2); + + double z1rz2r = z1.real() * z2.real(); + double z1rz2i = z1.real() * z2.imag(); + double z1iz2r = z1.imag() * z2.real(); + double z1iz2i = z1.imag() * z2.imag(); + + double z1z2cr = z1rz2r + z1iz2i; // z1 * conj(z2) + double z1z2ci = z1iz2r - z1rz2i; + double z1z2r = z1rz2r - z1iz2i; // z1 * z2 + double z1z2i = z1iz2r + z1rz2i; + + xi.xip[k] += z1z2cr; + xi.xip_im[k] += z1z2ci; + xi.xim[k] += z1z2r; + xi.xim_im[k] += z1z2i; + + if (R) { + xi.xip[k2] += z1z2cr; + xi.xip_im[k2] += z1z2ci; + xi.xim[k2] += z1z2r; + xi.xim_im[k2] += z1z2i; + } + } +}; + template struct DirectHelper2<4,KData,D2> { @@ -500,9 +568,9 @@ struct DirectHelper { const int algo = (D1 == NData && D2 == NData) ? 0 : - (D1 == NData && D2 == KData) ? 1 : + (D1 == NData && (D2==KData || D2==ZData)) ? 1 : (D1 == NData && D2 >= GData) ? 2 : - (D1 == KData && D2 == KData) ? 3 : + (D1 == KData && (D2==KData || D2==ZData)) ? 3 : (D1 == KData && D2 >= GData) ? 4 : (D1 >= GData && D2 >= GData) ? 5 : -1; @@ -1206,14 +1274,18 @@ void pyExportCorr2(py::module& _treecorr) WrapCorr2(_treecorr, "NK"); WrapCorr2(_treecorr, "KK"); - WrapCorr2(_treecorr, "NG"); - WrapCorr2(_treecorr, "KG"); - WrapCorr2(_treecorr, "GG"); + WrapCorr2(_treecorr, "NZ"); + WrapCorr2(_treecorr, "KZ"); + WrapCorr2(_treecorr, "ZZ"); WrapCorr2(_treecorr, "NV"); WrapCorr2(_treecorr, "KV"); WrapCorr2(_treecorr, "VV"); + WrapCorr2(_treecorr, "NG"); + WrapCorr2(_treecorr, "KG"); + WrapCorr2(_treecorr, "GG"); + WrapCorr2(_treecorr, "NT"); WrapCorr2(_treecorr, "KT"); WrapCorr2(_treecorr, "TT"); diff --git a/src/Corr3.cpp b/src/Corr3.cpp index 62ec6da0..b93d8bbe 100644 --- a/src/Corr3.cpp +++ b/src/Corr3.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2003-2019 by Mike Jarvis +/* Copyright (c) 2003-2024 by Mike Jarvis * * TreeCorr is free software: redistribution and use in source and binary forms, * with or without modification, are permitted provided that the following diff --git a/src/Field.cpp b/src/Field.cpp index b46e2602..3ecd6699 100644 --- a/src/Field.cpp +++ b/src/Field.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2003-2019 by Mike Jarvis +/* Copyright (c) 2003-2024 by Mike Jarvis * * TreeCorr is free software: redistribution and use in source and binary forms, * with or without modification, are permitted provided that the following @@ -478,7 +478,7 @@ Field* BuildKField( } template -Field* BuildZField( +Field* BuildAnyZField( py::array_t& xp, py::array_t& yp, py::array_t& zp, py::array_t& d1p, py::array_t& d2p, py::array_t& wp, py::array_t& wposp, @@ -514,9 +514,22 @@ Field* BuildGField( double minsize, double maxsize, SplitMethod sm, long long seed, bool brute, int mintop, int maxtop) { - return BuildZField(x, y, z, g1, g2, w, wpos, - minsize, maxsize, sm, seed, - brute, mintop, maxtop); + return BuildAnyZField(x, y, z, g1, g2, w, wpos, + minsize, maxsize, sm, seed, + brute, mintop, maxtop); +} + +template +Field* BuildZField( + py::array_t& x, py::array_t& y, py::array_t& z, + py::array_t& z1, py::array_t& z2, + py::array_t& w, py::array_t& wpos, + double minsize, double maxsize, + SplitMethod sm, long long seed, bool brute, int mintop, int maxtop) +{ + return BuildAnyZField(x, y, z, z1, z2, w, wpos, + minsize, maxsize, sm, seed, + brute, mintop, maxtop); } template @@ -527,9 +540,9 @@ Field* BuildVField( double minsize, double maxsize, SplitMethod sm, long long seed, bool brute, int mintop, int maxtop) { - return BuildZField(x, y, z, v1, v2, w, wpos, - minsize, maxsize, sm, seed, - brute, mintop, maxtop); + return BuildAnyZField(x, y, z, v1, v2, w, wpos, + minsize, maxsize, sm, seed, + brute, mintop, maxtop); } template @@ -540,9 +553,9 @@ Field* BuildTField( double minsize, double maxsize, SplitMethod sm, long long seed, bool brute, int mintop, int maxtop) { - return BuildZField(x, y, z, t1, t2, w, wpos, - minsize, maxsize, sm, seed, - brute, mintop, maxtop); + return BuildAnyZField(x, y, z, t1, t2, w, wpos, + minsize, maxsize, sm, seed, + brute, mintop, maxtop); } template @@ -553,9 +566,9 @@ Field* BuildQField( double minsize, double maxsize, SplitMethod sm, long long seed, bool brute, int mintop, int maxtop) { - return BuildZField(x, y, z, q1, q2, w, wpos, - minsize, maxsize, sm, seed, - brute, mintop, maxtop); + return BuildAnyZField(x, y, z, q1, q2, w, wpos, + minsize, maxsize, sm, seed, + brute, mintop, maxtop); } template @@ -602,7 +615,7 @@ void WrapField(py::module& _treecorr, std::string Cstr) typedef void (*init_type)(BaseField& field, py::array_t& cenp, int npatch, long long seed); typedef void (*run_type)(BaseField& field, py::array_t& cenp, int npatch, - int max_iter, double tol, bool alt); + int max_iter, double tol, bool alt); typedef void (*assign_type)(BaseField& field, py::array_t& cenp, int npatch, py::array_t& pp); @@ -622,8 +635,9 @@ void WrapField(py::module& _treecorr, std::string Cstr) py::class_, BaseField > nfield(_treecorr, ("NField" + Cstr).c_str()); py::class_, BaseField > kfield(_treecorr, ("KField" + Cstr).c_str()); - py::class_, BaseField > gfield(_treecorr, ("GField" + Cstr).c_str()); + py::class_, BaseField > zfield(_treecorr, ("ZField" + Cstr).c_str()); py::class_, BaseField > vfield(_treecorr, ("VField" + Cstr).c_str()); + py::class_, BaseField > gfield(_treecorr, ("GField" + Cstr).c_str()); py::class_, BaseField > tfield(_treecorr, ("TField" + Cstr).c_str()); py::class_, BaseField > qfield(_treecorr, ("QField" + Cstr).c_str()); @@ -637,9 +651,9 @@ void WrapField(py::module& _treecorr, std::string Cstr) py::array_t& k, py::array_t& w, py::array_t& wpos, double minsize, double maxsize, SplitMethod sm, long long seed, bool brute, int mintop, int maxtop); - typedef Field* (*gfield_type)( + typedef Field* (*zfield_type)( py::array_t& x, py::array_t& y, py::array_t& z, - py::array_t& g1, py::array_t& g2, + py::array_t& z1, py::array_t& z2, py::array_t& w, py::array_t& wpos, double minsize, double maxsize, SplitMethod sm, long long seed, bool brute, int mintop, int maxtop); @@ -649,6 +663,12 @@ void WrapField(py::module& _treecorr, std::string Cstr) py::array_t& w, py::array_t& wpos, double minsize, double maxsize, SplitMethod sm, long long seed, bool brute, int mintop, int maxtop); + typedef Field* (*gfield_type)( + py::array_t& x, py::array_t& y, py::array_t& z, + py::array_t& g1, py::array_t& g2, + py::array_t& w, py::array_t& wpos, + double minsize, double maxsize, + SplitMethod sm, long long seed, bool brute, int mintop, int maxtop); typedef Field* (*tfield_type)( py::array_t& x, py::array_t& y, py::array_t& z, py::array_t& t1, py::array_t& t2, @@ -664,8 +684,9 @@ void WrapField(py::module& _treecorr, std::string Cstr) nfield.def(py::init(nfield_type(&BuildNField))); kfield.def(py::init(kfield_type(&BuildKField))); - gfield.def(py::init(gfield_type(&BuildGField))); + zfield.def(py::init(zfield_type(&BuildZField))); vfield.def(py::init(vfield_type(&BuildVField))); + gfield.def(py::init(gfield_type(&BuildGField))); tfield.def(py::init(tfield_type(&BuildTField))); qfield.def(py::init(qfield_type(&BuildQField))); } diff --git a/src/KMeans.cpp b/src/KMeans.cpp index ec0709a8..8ceb09c0 100644 --- a/src/KMeans.cpp +++ b/src/KMeans.cpp @@ -1,4 +1,4 @@ -/* Copyright (c) 2003-2019 by Mike Jarvis +/* Copyright (c) 2003-2024 by Mike Jarvis * * TreeCorr is free software: redistribution and use in source and binary forms, * with or without modification, are permitted provided that the following diff --git a/src/PyBind11Helper.h b/src/PyBind11Helper.h index 6bd3a193..0a915e85 100644 --- a/src/PyBind11Helper.h +++ b/src/PyBind11Helper.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2003-2019 by Mike Jarvis +/* Copyright (c) 2003-2024 by Mike Jarvis * * TreeCorr is free software: redistribution and use in source and binary forms, * with or without modification, are permitted provided that the following diff --git a/src/module.cpp b/src/module.cpp index 4346a9de..33cbe9b0 100644 --- a/src/module.cpp +++ b/src/module.cpp @@ -1,11 +1,6 @@ -/* -*- c++ -*- - * Copyright (c) 2012-2022 by the GalSim developers team on GitHub - * https://github.com/GalSim-developers +/* Copyright (c) 2003-2024 by Mike Jarvis * - * This file is part of GalSim: The modular galaxy image simulation toolkit. - * https://github.com/GalSim-developers/GalSim - * - * GalSim is free software: redistribution and use in source and binary forms, + * TreeCorr is free software: redistribution and use in source and binary forms, * with or without modification, are permitted provided that the following * conditions are met: * diff --git a/test_requirements.txt b/test_requirements.txt index 52e9aa79..7121eac6 100644 --- a/test_requirements.txt +++ b/test_requirements.txt @@ -1,4 +1,7 @@ fitsio>=1.2 -pandas>=0.20 +pandas>=2.0 scipy>=1.2 mockmpi>=0.8 +pyarrow>=15.0 +h5py>=3.10 +halotools>=0.8.2 diff --git a/tests/Tutorial.ipynb b/tests/Tutorial.ipynb index c1e71293..29105daf 100644 --- a/tests/Tutorial.ipynb +++ b/tests/Tutorial.ipynb @@ -252,7 +252,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "## {'min_sep': 1.0, 'max_sep': 400.0, 'nbins': 100, 'sep_units': 'arcmin', 'verbose': 2, 'output_dots': True, 'coords': 'spherical', 'metric': 'Euclidean'}\n", + "## {'min_sep': 1.0, 'max_sep': 400.0, 'nbins': 100, 'sep_units': 'arcmin', 'verbose': 2, 'output_dots': True, 'coords': 'spherical', 'metric': 'Euclidean', 'corr': 'GG'}\n", "# r_nom meanr meanlogr xip xim xip_im xim_im sigma_xip sigma_xim weight npairs \n", " 1.0304e+00 1.0311e+00 3.0480e-02 7.5773e-06 1.6730e-07 7.0287e-08 -3.5157e-08 1.8983e-08 1.8983e-08 2.0181e+05 2.0181e+05\n", " 1.0940e+00 1.0948e+00 9.0389e-02 7.3854e-06 3.5900e-07 -2.0456e-08 1.5109e-08 1.7939e-08 1.7939e-08 2.2597e+05 2.2597e+05\n", diff --git a/tests/configs/kz.yaml b/tests/configs/kz.yaml new file mode 100644 index 00000000..f0d85824 --- /dev/null +++ b/tests/configs/kz.yaml @@ -0,0 +1,21 @@ + +file_name: data/kz_lens.dat +file_name2: data/kz_source.dat + +x_col: 1 +y_col: 2 +x_units: arcmin +y_units: arcmin + +k_col: [3,0] +z1_col: [0,3] +z2_col: [0,4] + +verbose: 1 + +min_sep: 1 +max_sep: 20 +bin_size: 0.10 +sep_units: arcmin + +kz_file_name: output/kz.out diff --git a/tests/configs/kz_direct.yaml b/tests/configs/kz_direct.yaml new file mode 100644 index 00000000..08621c7c --- /dev/null +++ b/tests/configs/kz_direct.yaml @@ -0,0 +1,23 @@ +file_name: data/kz_direct_cat1.fits +file_name2: data/kz_direct_cat2.fits + +x_col: x +y_col: y +x_units: arcmin +y_units: arcmin + +k_col: k +z1_col: z1 +z2_col: z2 +w_col: w + +verbose: 1 + +min_sep: 1. +max_sep: 50. +nbins: 50 +sep_units: arcmin +bin_slop: 0 +brute: true + +kz_file_name: output/kz_direct.fits diff --git a/tests/configs/kz_direct_spherical.yaml b/tests/configs/kz_direct_spherical.yaml new file mode 100644 index 00000000..1833deaf --- /dev/null +++ b/tests/configs/kz_direct_spherical.yaml @@ -0,0 +1,23 @@ +file_name: data/kz_direct_sph_cat1.fits +file_name2: data/kz_direct_sph_cat2.fits + +ra_col: ra +dec_col: dec +ra_units: radians +dec_units: radians + +k_col: k +z1_col: z1 +z2_col: z2 +w_col: w + +verbose: 1 + +min_sep: 1. +max_sep: 10. +nbins: 50 +sep_units: degrees +bin_slop: 0 +brute: true + +kz_file_name: output/kz_direct_sph.fits diff --git a/tests/configs/kz_single.yaml b/tests/configs/kz_single.yaml new file mode 100644 index 00000000..71815d30 --- /dev/null +++ b/tests/configs/kz_single.yaml @@ -0,0 +1,21 @@ + +file_name: data/kz_single_lens.dat +file_name2: data/kz_single_source.dat + +x_col: 1 +y_col: 2 +x_units: arcmin +y_units: arcmin + +k_col: [3, 0] +z1_col: [0, 3] +z2_col: [0, 4] + +verbose: 1 + +min_sep: 1. +max_sep: 20. +bin_size: 0.10 +sep_units: arcmin + +kz_file_name: output/kz_single.out diff --git a/tests/configs/nz.yaml b/tests/configs/nz.yaml new file mode 100644 index 00000000..4504cf5a --- /dev/null +++ b/tests/configs/nz.yaml @@ -0,0 +1,21 @@ + +file_name: data/nz_lens.fits +file_name2: data/nz_source.fits +rand_file_name: data/nz_rand.fits + +x_col: x +y_col: y +x_units: arcmin +y_units: arcmin + +z1_col: z1 +z2_col: z2 + +verbose: 1 + +min_sep: 1. +max_sep: 20. +bin_size: 0.10 +sep_units: arcmin + +nz_file_name: output/nz.out diff --git a/tests/configs/nz_direct.yaml b/tests/configs/nz_direct.yaml new file mode 100644 index 00000000..9e6c3a0c --- /dev/null +++ b/tests/configs/nz_direct.yaml @@ -0,0 +1,22 @@ +file_name: data/nz_direct_cat1.fits +file_name2: data/nz_direct_cat2.fits + +x_col: x +y_col: y +x_units: arcmin +y_units: arcmin + +z1_col: z1 +z2_col: z2 +w_col: w + +verbose: 1 + +min_sep: 1. +max_sep: 50. +nbins: 50 +sep_units: arcmin +bin_slop: 0 +brute: true + +nz_file_name: output/nz_direct.fits diff --git a/tests/configs/nz_direct_spherical.yaml b/tests/configs/nz_direct_spherical.yaml new file mode 100644 index 00000000..e23cb570 --- /dev/null +++ b/tests/configs/nz_direct_spherical.yaml @@ -0,0 +1,22 @@ +file_name: data/nz_direct_sph_cat1.fits +file_name2: data/nz_direct_sph_cat2.fits + +ra_col: ra +dec_col: dec +ra_units: radians +dec_units: radians + +z1_col: z1 +z2_col: z2 +w_col: w + +verbose: 1 + +min_sep: 1. +max_sep: 10. +nbins: 50 +sep_units: degrees +bin_slop: 0 +brute: true + +nz_file_name: output/nz_direct_sph.fits diff --git a/tests/configs/nz_single.yaml b/tests/configs/nz_single.yaml new file mode 100644 index 00000000..d1a9be90 --- /dev/null +++ b/tests/configs/nz_single.yaml @@ -0,0 +1,20 @@ + +file_name: data/nz_single_lens.dat +file_name2: data/nz_single_source.dat + +x_col: 1 +y_col: 2 +x_units: arcmin +y_units: arcmin + +z1_col: 3 +z2_col: 4 + +verbose: 1 + +min_sep: 1. +max_sep: 20. +bin_size: 0.10 +sep_units: arcmin + +nz_file_name: output/nz_single.out diff --git a/tests/configs/nz_wpos.yaml b/tests/configs/nz_wpos.yaml new file mode 100644 index 00000000..6de4c152 --- /dev/null +++ b/tests/configs/nz_wpos.yaml @@ -0,0 +1,25 @@ + +file_name: data/nz_wpos_lens.fits +file_name2: + - data/nz_wpos_source0.fits + - data/nz_wpos_source1.fits + - data/nz_wpos_source2.fits + +x_col: x +y_col: y +x_units: arcmin +y_units: arcmin + +z1_col: z1 +z2_col: z2 +w_col: [0, w] +wpos_col: [0, wpos] + +verbose: 1 + +min_sep: 1. +max_sep: 25. +bin_size: 0.10 +sep_units: arcmin + +nz_file_name: output/nz_wpos.fits diff --git a/tests/configs/zz.yaml b/tests/configs/zz.yaml new file mode 100644 index 00000000..fb23e982 --- /dev/null +++ b/tests/configs/zz.yaml @@ -0,0 +1,19 @@ + +file_name: data/zz.dat + +x_col: 1 +y_col: 2 +x_units: arcmin +y_units: arcmin + +z1_col: 3 +z2_col: 4 + +verbose: 1 + +min_sep: 1. +max_sep: 100. +bin_size: 0.10 +sep_units: arcmin + +zz_file_name: output/zz.out diff --git a/tests/configs/zz_direct.yaml b/tests/configs/zz_direct.yaml new file mode 100644 index 00000000..69190598 --- /dev/null +++ b/tests/configs/zz_direct.yaml @@ -0,0 +1,22 @@ +file_name: data/zz_direct_cat1.fits +file_name2: data/zz_direct_cat2.fits + +x_col: x +y_col: y +x_units: arcmin +y_units: arcmin + +z1_col: z1 +z2_col: z2 +w_col: w + +verbose: 1 + +min_sep: 1. +max_sep: 50. +nbins: 50 +sep_units: arcmin +bin_slop: 0 +brute: true + +zz_file_name: output/zz_direct.fits diff --git a/tests/configs/zz_direct_spherical.yaml b/tests/configs/zz_direct_spherical.yaml new file mode 100644 index 00000000..37af2a4d --- /dev/null +++ b/tests/configs/zz_direct_spherical.yaml @@ -0,0 +1,22 @@ +file_name: data/zz_direct_sph_cat1.fits +file_name2: data/zz_direct_sph_cat2.fits + +ra_col: ra +dec_col: dec +ra_units: radians +dec_units: radians + +z1_col: z1 +z2_col: z2 +w_col: w + +verbose: 1 + +min_sep: 1. +max_sep: 10. +nbins: 50 +sep_units: degrees +bin_slop: 0 +brute: true + +zz_file_name: output/zz_direct_sph.fits diff --git a/tests/data/test_kz_jk_1000.npz b/tests/data/test_kz_jk_1000.npz new file mode 100644 index 00000000..eb84ea33 Binary files /dev/null and b/tests/data/test_kz_jk_1000.npz differ diff --git a/tests/data/test_nz_jk_1000.npz b/tests/data/test_nz_jk_1000.npz new file mode 100644 index 00000000..e1c79f2e Binary files /dev/null and b/tests/data/test_nz_jk_1000.npz differ diff --git a/tests/data/test_varxi_kz.npz b/tests/data/test_varxi_kz.npz new file mode 100644 index 00000000..b28cb717 Binary files /dev/null and b/tests/data/test_varxi_kz.npz differ diff --git a/tests/data/test_varxi_nz.npz b/tests/data/test_varxi_nz.npz new file mode 100644 index 00000000..1f21d647 Binary files /dev/null and b/tests/data/test_varxi_nz.npz differ diff --git a/tests/data/test_varxi_zz.npz b/tests/data/test_varxi_zz.npz new file mode 100644 index 00000000..c09929d3 Binary files /dev/null and b/tests/data/test_varxi_zz.npz differ diff --git a/tests/data/test_zz_jk_1000.npz b/tests/data/test_zz_jk_1000.npz new file mode 100644 index 00000000..9deb04c7 Binary files /dev/null and b/tests/data/test_zz_jk_1000.npz differ diff --git a/tests/mpi_helper.py b/tests/mpi_helper.py index 5eaca4a2..26c8e1f0 100644 --- a/tests/mpi_helper.py +++ b/tests/mpi_helper.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following diff --git a/tests/mpi_test.py b/tests/mpi_test.py index ad40b49d..8f209dd6 100644 --- a/tests/mpi_test.py +++ b/tests/mpi_test.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following diff --git a/tests/mpi_test3pt.py b/tests/mpi_test3pt.py index 1f357130..bf977fdf 100644 --- a/tests/mpi_test3pt.py +++ b/tests/mpi_test3pt.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following diff --git a/tests/test_catalog.py b/tests/test_catalog.py index 2df7a71a..e635bfda 100644 --- a/tests/test_catalog.py +++ b/tests/test_catalog.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following @@ -39,6 +39,8 @@ def test_ascii(): r = rng.random_sample(nobj) wpos = rng.random_sample(nobj) k = rng.random_sample(nobj) + z1 = rng.random_sample(nobj) + z2 = rng.random_sample(nobj) v1 = rng.random_sample(nobj) v2 = rng.random_sample(nobj) g1 = rng.random_sample(nobj) @@ -66,11 +68,11 @@ def test_ascii(): file_name = os.path.join('data','test.dat') with open(file_name, 'w') as fid: # These are intentionally in a different order from the order we parse them. - fid.write('# ra, dec, x, y, k, g1, g2, w, z, v1, v2, r, wpos, flag, t1, t2, q1, q2\n') + fid.write('# ra, dec, x, y, k, g1, g2, w, z, v1, v2, r, wpos, flag, t1, t2, z1, z2, q1, q2\n') for i in range(nobj): - fid.write((('%.8f '*13)+'%d'+(' %.8f'*4)+'\n')%( + fid.write((('%.8f '*13)+'%d'+(' %.8f'*6)+'\n')%( ra[i],dec[i],x[i],y[i],k[i],g1[i],g2[i],w[i],z[i], - v1[i],v2[i],r[i],wpos[i],flags[i],t1[i],t2[i],q1[i],q2[i])) + v1[i],v2[i],r[i],wpos[i],flags[i],t1[i],t2[i],z1[i],z2[i],q1[i],q2[i])) # Check basic input config = { @@ -82,15 +84,18 @@ def test_ascii(): 'w_col' : 8, 'wpos_col' : 13, 'k_col' : 5, + 'z1_col' : 17, + 'z2_col' : 18, 'v1_col' : 10, 'v2_col' : 11, 'g1_col' : 6, 'g2_col' : 7, 't1_col' : 15, 't2_col' : 16, - 'q1_col' : 17, - 'q2_col' : 18, - 'kk_file_name' : 'kk.out', # These make sure k and g are required. + 'q1_col' : 19, + 'q2_col' : 20, + 'kk_file_name' : 'kk.out', # These make sure k, g, etc. are required. + 'zz_file_name' : 'zz.out', 'gg_file_name' : 'gg.out', 'vv_file_name' : 'vv.out', 'tt_file_name' : 'tt.out', @@ -103,10 +108,12 @@ def test_ascii(): np.testing.assert_almost_equal(cat1.z, z) np.testing.assert_almost_equal(cat1.w, w) np.testing.assert_almost_equal(cat1.k, k) - np.testing.assert_almost_equal(cat1.g1, g1) - np.testing.assert_almost_equal(cat1.g2, g2) + np.testing.assert_almost_equal(cat1.z1, z1) + np.testing.assert_almost_equal(cat1.z2, z2) np.testing.assert_almost_equal(cat1.v1, v1) np.testing.assert_almost_equal(cat1.v2, v2) + np.testing.assert_almost_equal(cat1.g1, g1) + np.testing.assert_almost_equal(cat1.g2, g2) np.testing.assert_almost_equal(cat1.t1, t1) np.testing.assert_almost_equal(cat1.t2, t2) np.testing.assert_almost_equal(cat1.q1, q1) @@ -123,10 +130,12 @@ def test_ascii(): 'w_col' : 'w', 'wpos_col' : 'wpos', 'k_col' : 'k', - 'g1_col' : 'g1', - 'g2_col' : 'g2', + 'z1_col' : 'z1', + 'z2_col' : 'z2', 'v1_col' : 'v1', 'v2_col' : 'v2', + 'g1_col' : 'g1', + 'g2_col' : 'g2', 't1_col' : 't1', 't2_col' : 't2', 'q1_col' : 'q1', @@ -139,10 +148,12 @@ def test_ascii(): np.testing.assert_almost_equal(cat1b.z, z) np.testing.assert_almost_equal(cat1b.w, w) np.testing.assert_almost_equal(cat1b.k, k) - np.testing.assert_almost_equal(cat1b.g1, g1) - np.testing.assert_almost_equal(cat1b.g2, g2) + np.testing.assert_almost_equal(cat1b.z1, z1) + np.testing.assert_almost_equal(cat1b.z2, z2) np.testing.assert_almost_equal(cat1b.v1, v1) np.testing.assert_almost_equal(cat1b.v2, v2) + np.testing.assert_almost_equal(cat1b.g1, g1) + np.testing.assert_almost_equal(cat1b.g2, g2) np.testing.assert_almost_equal(cat1b.t1, t1) np.testing.assert_almost_equal(cat1b.t2, t2) np.testing.assert_almost_equal(cat1b.q1, q1) @@ -157,6 +168,7 @@ def test_ascii(): assert_raises(TypeError, treecorr.Catalog, file_name, config, dec=dec) assert_raises(TypeError, treecorr.Catalog, file_name, config, r=r) assert_raises(TypeError, treecorr.Catalog, file_name, config, g2=g2) + assert_raises(TypeError, treecorr.Catalog, file_name, config, z1=z1) assert_raises(TypeError, treecorr.Catalog, file_name, config, v1=v1) assert_raises(TypeError, treecorr.Catalog, file_name, config, t1=t1) assert_raises(TypeError, treecorr.Catalog, file_name, config, q2=q2) @@ -190,6 +202,12 @@ def test_ascii(): assert_raises(ValueError, treecorr.Catalog, file_name, config, g2_col=-1) assert_raises(ValueError, treecorr.Catalog, file_name, config, g2_col=100) assert_raises(ValueError, treecorr.Catalog, file_name, config, g2_col='invalid') + assert_raises(ValueError, treecorr.Catalog, file_name, config, z1_col=-1) + assert_raises(ValueError, treecorr.Catalog, file_name, config, z1_col=100) + assert_raises(ValueError, treecorr.Catalog, file_name, config, z1_col='invalid') + assert_raises(ValueError, treecorr.Catalog, file_name, config, z2_col=-1) + assert_raises(ValueError, treecorr.Catalog, file_name, config, z2_col=100) + assert_raises(ValueError, treecorr.Catalog, file_name, config, z2_col='invalid') assert_raises(ValueError, treecorr.Catalog, file_name, config, v1_col=-1) assert_raises(ValueError, treecorr.Catalog, file_name, config, v1_col=100) assert_raises(ValueError, treecorr.Catalog, file_name, config, v1_col='invalid') @@ -356,11 +374,11 @@ def test_ascii(): fid.write('% And more than one header line.') fid.write('% Plus some extra comment lines every so often.') fid.write('% And we use a weird comment marker to boot.') - fid.write('% ra,dec,x,y,k,g1,g2,v1,v2,w,flag,t1,t2,q1,q2\n') + fid.write('% ra,dec,x,y,k,g1,g2,v1,v2,w,flag,t1,t2,z1,z2,q1,q2\n') for i in range(nobj): - fid.write((('%.8f,'*13)+'%d'+(',%.8f'*4)+'\n')%( + fid.write((('%.8f,'*13)+'%d'+(',%.8f'*6)+'\n')%( ra[i],dec[i],x[i],y[i],k[i],g1[i],g2[i],w[i],z[i], - v1[i],v2[i],r[i],wpos[i],flags[i],t1[i],t2[i],q1[i],q2[i])) + v1[i],v2[i],r[i],wpos[i],flags[i],t1[i],t2[i],z1[i],z2[i],q1[i],q2[i])) if i%100 == 0: fid.write('%%%% Line %d\n'%i) config['delimiter'] = ',' @@ -369,10 +387,12 @@ def test_ascii(): np.testing.assert_almost_equal(cat7.ra, ra * (pi/12.)) np.testing.assert_almost_equal(cat7.dec, dec * (pi/180.)) np.testing.assert_almost_equal(cat7.r, r) - np.testing.assert_almost_equal(cat7.g1, g1) - np.testing.assert_almost_equal(cat7.g2, g2) + np.testing.assert_almost_equal(cat7.z1, z1) + np.testing.assert_almost_equal(cat7.z2, z2) np.testing.assert_almost_equal(cat7.v1, v1) np.testing.assert_almost_equal(cat7.v2, v2) + np.testing.assert_almost_equal(cat7.g1, g1) + np.testing.assert_almost_equal(cat7.g2, g2) np.testing.assert_almost_equal(cat7.t1, t1) np.testing.assert_almost_equal(cat7.t2, t2) np.testing.assert_almost_equal(cat7.q1, q1) @@ -404,6 +424,27 @@ def test_ascii(): np.testing.assert_almost_equal(cat8.g1, -g1) np.testing.assert_almost_equal(cat8.g2, g2) + # Check flip_z1, flip_z2 + config['flip_z1'] = True + cat8 = treecorr.Catalog(file_name, config) + np.testing.assert_almost_equal(cat8.z1, -z1) + np.testing.assert_almost_equal(cat8.z2, z2) + + config['flip_z2'] = 'true' + cat8 = treecorr.Catalog(file_name, config) + np.testing.assert_almost_equal(cat8.z1, -z1) + np.testing.assert_almost_equal(cat8.z2, -z2) + + config['flip_z1'] = 'n' + config['flip_z2'] = 'yes' + cat8 = treecorr.Catalog(file_name, config) + np.testing.assert_almost_equal(cat8.z1, z1) + np.testing.assert_almost_equal(cat8.z2, -z2) + + cat8 = treecorr.Catalog(file_name, config, flip_z1=True, flip_z2=False) + np.testing.assert_almost_equal(cat8.z1, -z1) + np.testing.assert_almost_equal(cat8.z2, z2) + # Check flip_v1, flip_v2 config['flip_v1'] = True cat8 = treecorr.Catalog(file_name, config) @@ -472,10 +513,12 @@ def test_ascii(): np.testing.assert_almost_equal(cat9.ra, cat8.ra) np.testing.assert_almost_equal(cat9.dec, cat8.dec) np.testing.assert_almost_equal(cat9.r, cat8.r) - np.testing.assert_almost_equal(cat9.g1, cat8.g1) - np.testing.assert_almost_equal(cat9.g2, cat8.g2) + np.testing.assert_almost_equal(cat9.z1, cat8.z1) + np.testing.assert_almost_equal(cat9.z2, cat8.z2) np.testing.assert_almost_equal(cat9.v1, cat8.v1) np.testing.assert_almost_equal(cat9.v2, cat8.v2) + np.testing.assert_almost_equal(cat9.g1, cat8.g1) + np.testing.assert_almost_equal(cat9.g2, cat8.g2) np.testing.assert_almost_equal(cat9.t1, cat8.t1) np.testing.assert_almost_equal(cat9.t2, cat8.t2) np.testing.assert_almost_equal(cat9.q1, cat8.q1) @@ -498,10 +541,12 @@ def test_ascii(): assert 'Some wpos values are zero, setting w=0 for these points' in cl.output # Test using a limited set of rows - del config['flip_g1'] - del config['flip_g2'] + del config['flip_z1'] + del config['flip_z2'] del config['flip_v1'] del config['flip_v2'] + del config['flip_g1'] + del config['flip_g2'] del config['flip_t1'] del config['flip_t2'] del config['flip_q1'] @@ -515,10 +560,12 @@ def test_ascii(): np.testing.assert_equal(cat11.sumw, np.sum(cat11.w)) np.testing.assert_equal(cat11.sumw, np.sum(cat6.w[1009:3456])) np.testing.assert_almost_equal(cat11.k[1111], k[2120]) - np.testing.assert_almost_equal(cat11.g1[1111], g1[2120]) - np.testing.assert_almost_equal(cat11.g2[1111], g2[2120]) + np.testing.assert_almost_equal(cat11.z1[1111], z1[2120]) + np.testing.assert_almost_equal(cat11.z2[1111], z2[2120]) np.testing.assert_almost_equal(cat11.v1[1111], v1[2120]) np.testing.assert_almost_equal(cat11.v2[1111], v2[2120]) + np.testing.assert_almost_equal(cat11.g1[1111], g1[2120]) + np.testing.assert_almost_equal(cat11.g2[1111], g2[2120]) np.testing.assert_almost_equal(cat11.t1[1111], t1[2120]) np.testing.assert_almost_equal(cat11.t2[1111], t2[2120]) np.testing.assert_almost_equal(cat11.q1[1111], q1[2120]) @@ -532,10 +579,12 @@ def test_ascii(): np.testing.assert_equal(cat12.sumw, np.sum(cat12.w)) np.testing.assert_equal(cat12.sumw, np.sum(cat6.w[1009:3456])) assert cat12.k is None - assert cat12.g1 is None - assert cat12.g2 is None + assert cat12.z1 is None + assert cat12.z2 is None assert cat12.v1 is None assert cat12.v2 is None + assert cat12.g1 is None + assert cat12.g2 is None assert cat12.t1 is None assert cat12.t2 is None assert cat12.q1 is None @@ -561,10 +610,12 @@ def test_ascii(): print('cat6.w[3459] = ',cat6.w[3459]) np.testing.assert_equal(cat13.sumw, np.sum(cat6.w[1009:3456:10])) np.testing.assert_almost_equal(cat13.k[100], k[2009]) - np.testing.assert_almost_equal(cat13.g1[100], g1[2009]) - np.testing.assert_almost_equal(cat13.g2[100], g2[2009]) + np.testing.assert_almost_equal(cat13.z1[100], z1[2009]) + np.testing.assert_almost_equal(cat13.z2[100], z2[2009]) np.testing.assert_almost_equal(cat13.v1[100], v1[2009]) np.testing.assert_almost_equal(cat13.v2[100], v2[2009]) + np.testing.assert_almost_equal(cat13.g1[100], g1[2009]) + np.testing.assert_almost_equal(cat13.g2[100], g2[2009]) np.testing.assert_almost_equal(cat13.t1[100], t1[2009]) np.testing.assert_almost_equal(cat13.t2[100], t2[2009]) np.testing.assert_almost_equal(cat13.q1[100], q1[2009]) @@ -580,12 +631,14 @@ def test_ascii(): np.testing.assert_equal(cat13a.sumw, np.sum(cat13a.w)) np.testing.assert_equal(cat13a.sumw, np.sum(cat6.w[::10])) np.testing.assert_almost_equal(cat13a.k[100], k[1000]) - np.testing.assert_almost_equal(cat13a.g1[100], g1[1000]) - np.testing.assert_almost_equal(cat13a.g2[100], g2[1000]) + np.testing.assert_almost_equal(cat13a.z1[100], z1[1000]) + np.testing.assert_almost_equal(cat13a.z2[100], z2[1000]) np.testing.assert_almost_equal(cat13a.v1[100], v1[1000]) np.testing.assert_almost_equal(cat13a.v2[100], v2[1000]) np.testing.assert_almost_equal(cat13a.t1[100], t1[1000]) np.testing.assert_almost_equal(cat13a.t2[100], t2[1000]) + np.testing.assert_almost_equal(cat13a.g1[100], g1[1000]) + np.testing.assert_almost_equal(cat13a.g2[100], g2[1000]) np.testing.assert_almost_equal(cat13a.q1[100], q1[1000]) np.testing.assert_almost_equal(cat13a.q2[100], q2[1000]) @@ -646,10 +699,12 @@ def test_ascii(): assert cat14a._w is None assert cat14a._wpos is None assert cat14a._k is None - assert cat14a._g1 is None - assert cat14a._g2 is None + assert cat14a._z1 is None + assert cat14a._z2 is None assert cat14a._v1 is None assert cat14a._v2 is None + assert cat14a._g1 is None + assert cat14a._g2 is None assert cat14a._t1 is None assert cat14a._t2 is None assert cat14a._q1 is None @@ -693,9 +748,12 @@ def _test_aardvark(filename, file_type, ext): config['gg_file_name'] = 'gg.out' include_v = (file_type != 'Parquet') # Parquet cannot handle duplicated names. if include_v: + config['zz_file_name'] = 'zz.out' config['vv_file_name'] = 'vv.out' config['tt_file_name'] = 'tt.out' config['qq_file_name'] = 'qq.out' + config['z1_col'] = config['g1_col'] + config['z2_col'] = config['g2_col'] config['v1_col'] = config['g1_col'] config['v2_col'] = config['g2_col'] config['t1_col'] = config['g1_col'] @@ -716,6 +774,8 @@ def _test_aardvark(filename, file_type, ext): np.testing.assert_almost_equal(cat1.g1[46392], 0.0005066675) np.testing.assert_almost_equal(cat1.g2[46392], -0.0001006742) if include_v: + np.testing.assert_almost_equal(cat1.z1[46392], 0.0005066675) + np.testing.assert_almost_equal(cat1.z2[46392], -0.0001006742) np.testing.assert_almost_equal(cat1.v1[46392], 0.0005066675) np.testing.assert_almost_equal(cat1.v2[46392], -0.0001006742) np.testing.assert_almost_equal(cat1.t1[46392], 0.0005066675) @@ -736,10 +796,12 @@ def _test_aardvark(filename, file_type, ext): assert_raises(ValueError, treecorr.Catalog, file_name, config, wpos_col='invalid') assert_raises(ValueError, treecorr.Catalog, file_name, config, flag_col='invalid') assert_raises(ValueError, treecorr.Catalog, file_name, config, k_col='invalid') - assert_raises(ValueError, treecorr.Catalog, file_name, config, g1_col='invalid') - assert_raises(ValueError, treecorr.Catalog, file_name, config, g2_col='invalid') + assert_raises(ValueError, treecorr.Catalog, file_name, config, z1_col='invalid') + assert_raises(ValueError, treecorr.Catalog, file_name, config, z2_col='invalid') assert_raises(ValueError, treecorr.Catalog, file_name, config, v1_col='invalid') assert_raises(ValueError, treecorr.Catalog, file_name, config, v2_col='invalid') + assert_raises(ValueError, treecorr.Catalog, file_name, config, g1_col='invalid') + assert_raises(ValueError, treecorr.Catalog, file_name, config, g2_col='invalid') assert_raises(ValueError, treecorr.Catalog, file_name, config, t1_col='invalid') assert_raises(ValueError, treecorr.Catalog, file_name, config, t2_col='invalid') assert_raises(ValueError, treecorr.Catalog, file_name, config, q1_col='invalid') @@ -754,6 +816,8 @@ def _test_aardvark(filename, file_type, ext): assert_raises(ValueError, treecorr.Catalog, file_name, config, g1_col='0') assert_raises(ValueError, treecorr.Catalog, file_name, config, g2_col='0') if include_v: + assert_raises(ValueError, treecorr.Catalog, file_name, config, z1_col='0') + assert_raises(ValueError, treecorr.Catalog, file_name, config, z2_col='0') assert_raises(ValueError, treecorr.Catalog, file_name, config, v1_col='0') assert_raises(ValueError, treecorr.Catalog, file_name, config, v2_col='0') assert_raises(ValueError, treecorr.Catalog, file_name, config, t1_col='0') @@ -828,10 +892,12 @@ def _test_aardvark(filename, file_type, ext): assert_raises(ValueError, treecorr.Catalog, file_name, config, wpos_col='invalid') assert_raises(ValueError, treecorr.Catalog, file_name, config, flag_col='invalid') assert_raises(ValueError, treecorr.Catalog, file_name, config, k_col='invalid') - assert_raises(ValueError, treecorr.Catalog, file_name, config, g1_col='invalid') - assert_raises(ValueError, treecorr.Catalog, file_name, config, g2_col='invalid') + assert_raises(ValueError, treecorr.Catalog, file_name, config, z1_col='invalid') + assert_raises(ValueError, treecorr.Catalog, file_name, config, z2_col='invalid') assert_raises(ValueError, treecorr.Catalog, file_name, config, v1_col='invalid') assert_raises(ValueError, treecorr.Catalog, file_name, config, v2_col='invalid') + assert_raises(ValueError, treecorr.Catalog, file_name, config, g1_col='invalid') + assert_raises(ValueError, treecorr.Catalog, file_name, config, g2_col='invalid') assert_raises(ValueError, treecorr.Catalog, file_name, config, t1_col='invalid') assert_raises(ValueError, treecorr.Catalog, file_name, config, t2_col='invalid') assert_raises(ValueError, treecorr.Catalog, file_name, config, q1_col='invalid') @@ -852,6 +918,8 @@ def _test_aardvark(filename, file_type, ext): np.testing.assert_almost_equal(cat3.g1[46292], cat2.g1[46392]) np.testing.assert_almost_equal(cat3.g2[46292], cat2.g2[46392]) if include_v: + np.testing.assert_almost_equal(cat3.z1[46292], cat2.z1[46392]) + np.testing.assert_almost_equal(cat3.z2[46292], cat2.z2[46392]) np.testing.assert_almost_equal(cat3.v1[46292], cat2.v1[46392]) np.testing.assert_almost_equal(cat3.v2[46292], cat2.v2[46392]) np.testing.assert_almost_equal(cat3.t1[46292], cat2.t1[46392]) @@ -866,10 +934,12 @@ def _test_aardvark(filename, file_type, ext): np.testing.assert_equal(cat4.sumw, np.sum(cat4.w)) np.testing.assert_equal(cat4.sumw, np.sum(cat2.w[100:50000])) assert cat4.k is None - assert cat4.g1 is None - assert cat4.g2 is None + assert cat4.z1 is None + assert cat4.z2 is None assert cat4.v1 is None assert cat4.v2 is None + assert cat4.g1 is None + assert cat4.g2 is None assert cat4.t1 is None assert cat4.t2 is None assert cat4.q1 is None @@ -890,6 +960,8 @@ def _test_aardvark(filename, file_type, ext): np.testing.assert_almost_equal(cat5a.g1[123], cat2.g1[12300]) np.testing.assert_almost_equal(cat5a.g2[123], cat2.g2[12300]) if include_v: + np.testing.assert_almost_equal(cat5a.z1[123], cat2.z1[12300]) + np.testing.assert_almost_equal(cat5a.z2[123], cat2.z2[12300]) np.testing.assert_almost_equal(cat5a.v1[123], cat2.v1[12300]) np.testing.assert_almost_equal(cat5a.v2[123], cat2.v2[12300]) np.testing.assert_almost_equal(cat5a.t1[123], cat2.t1[12300]) @@ -910,6 +982,8 @@ def _test_aardvark(filename, file_type, ext): np.testing.assert_almost_equal(cat5.g1[123], cat2.g1[12400]) np.testing.assert_almost_equal(cat5.g2[123], cat2.g2[12400]) if include_v: + np.testing.assert_almost_equal(cat5.z1[123], cat2.z1[12400]) + np.testing.assert_almost_equal(cat5.z2[123], cat2.z2[12400]) np.testing.assert_almost_equal(cat5.v1[123], cat2.v1[12400]) np.testing.assert_almost_equal(cat5.v2[123], cat2.v2[12400]) np.testing.assert_almost_equal(cat5.t1[123], cat2.t1[12400]) @@ -968,10 +1042,12 @@ def _test_aardvark(filename, file_type, ext): assert cat6a._w is None assert cat6a._wpos is None assert cat6a._k is None - assert cat6a._g1 is None - assert cat6a._g2 is None + assert cat6a._z1 is None + assert cat6a._z2 is None assert cat6a._v1 is None assert cat6a._v2 is None + assert cat6a._g1 is None + assert cat6a._g2 is None assert cat6a._t1 is None assert cat6a._t2 is None assert cat6a._q1 is None @@ -993,6 +1069,8 @@ def _test_aardvark(filename, file_type, ext): if include_v: del config['vv_file_name'] + assert_raises(ValueError, treecorr.Catalog, file_name, config, z1_col='0') + assert_raises(ValueError, treecorr.Catalog, file_name, config, z2_col='0') assert_raises(ValueError, treecorr.Catalog, file_name, config, v1_col='0') assert_raises(ValueError, treecorr.Catalog, file_name, config, v2_col='0') assert_raises(ValueError, treecorr.Catalog, file_name, config, t1_col='0') @@ -1022,19 +1100,21 @@ def test_ext(): w = wpos * rng.binomial(1, 0.95, (ngal,)) flag = rng.binomial(3, 0.02, (ngal,)) k = rng.normal(0,3, (ngal,) ) - g1 = rng.normal(0,0.1, (ngal,) ) - g2 = rng.normal(0,0.1, (ngal,) ) + z1 = rng.normal(0,0.1, (ngal,) ) + z2 = rng.normal(0,0.1, (ngal,) ) v1 = rng.normal(0,0.1, (ngal,) ) v2 = rng.normal(0,0.1, (ngal,) ) + g1 = rng.normal(0,0.1, (ngal,) ) + g2 = rng.normal(0,0.1, (ngal,) ) t1 = rng.normal(0,0.1, (ngal,) ) t2 = rng.normal(0,0.1, (ngal,) ) q1 = rng.normal(0,0.1, (ngal,) ) q2 = rng.normal(0,0.1, (ngal,) ) patch = np.arange(ngal) % 5 - data = [x,y,z,ra,dec,r,flag,w,wpos,k,g1,g2,v1,v2,t1,t2,q1,q2] + data = [x,y,z,ra,dec,r,flag,w,wpos,k,z1,z2,v1,v2,g1,g2,t1,t2,q1,q2] names = ['x','y','z','ra','dec','r','flag','w','wpos', - 'k','g1','g2','v1','v2','t1','t2','q1','q2'] + 'k','z1','z2','v1','v2','g1','g2','t1','t2','q1','q2'] fname = os.path.join('data','test_ext.fits') with fitsio.FITS(fname, 'rw', clobber=True) as f: @@ -1052,7 +1132,8 @@ def test_ext(): ra_col='ra', dec_col='dec', r_col='r', ra_units='rad', dec_units='rad', w_col='w', wpos_col='wpos', flag_col='flag', - k_col='k', g1_col='g1', g2_col='g2', v1_col='v1', v2_col='v2', + k_col='k', z1_col='z1', z2_col='z2', + v1_col='v1', v2_col='v2', g1_col='g1', g2_col='g2', t1_col='t1', t2_col='t2', q1_col='q1', q2_col='q2', ext=1) cat2 = treecorr.Catalog(fname, allow_xyz=True, @@ -1060,7 +1141,8 @@ def test_ext(): ra_col='ra', dec_col='dec', r_col='r', ra_units='rad', dec_units='rad', w_col='w', wpos_col='wpos', flag_col='flag', - k_col='k', g1_col='g1', g2_col='g2', v1_col='v1', v2_col='v2', + k_col='k', z1_col='z1', z2_col='z2', + v1_col='v1', v2_col='v2', g1_col='g1', g2_col='g2', t1_col='t1', t2_col='t2', q1_col='q1', q2_col='q2', ext=2) assert cat2 == cat1 @@ -1098,17 +1180,20 @@ def test_ext(): cat6 = treecorr.Catalog(fname, x_col='x', y_col='y', z_col='z', - k_col='k', g1_col='g1', g2_col='g2', v1_col='v1', v2_col='v2', + k_col='k', z1_col='z1', z2_col='z2', + v1_col='v1', v2_col='v2', g1_col='g1', g2_col='g2', t1_col='t1', t2_col='t2', q1_col='q1', q2_col='q2', ext=6) np.testing.assert_array_equal(cat6.x[use], cat1.x) np.testing.assert_array_equal(cat6.y[use], cat1.y) np.testing.assert_array_equal(cat6.z[use], cat1.z) np.testing.assert_array_equal(cat6.k[use], cat1.k) - np.testing.assert_array_equal(cat6.g1[use], cat1.g1) - np.testing.assert_array_equal(cat6.g2[use], cat1.g2) + np.testing.assert_array_equal(cat6.z1[use], cat1.z1) + np.testing.assert_array_equal(cat6.z2[use], cat1.z2) np.testing.assert_array_equal(cat6.v1[use], cat1.v1) np.testing.assert_array_equal(cat6.v2[use], cat1.v2) + np.testing.assert_array_equal(cat6.g1[use], cat1.g1) + np.testing.assert_array_equal(cat6.g2[use], cat1.g2) np.testing.assert_array_equal(cat6.t1[use], cat1.t1) np.testing.assert_array_equal(cat6.t2[use], cat1.t2) np.testing.assert_array_equal(cat6.q1[use], cat1.q1) @@ -1119,7 +1204,8 @@ def test_ext(): ra_col='ra', dec_col='dec', r_col='r', ra_units='rad', dec_units='rad', w_col='w', wpos_col='wpos', flag_col='flag', - k_col='k', g1_col='g1', g2_col='g2', v1_col='v1', v2_col='v2', + k_col='k', z1_col='z1', z2_col='z2', + v1_col='v1', v2_col='v2', g1_col='g1', g2_col='g2', t1_col='t1', t2_col='t2', q1_col='q1', q2_col='q2', ext=8) assert cat7 != cat1 # This one has all the column names wrong. @@ -1129,7 +1215,8 @@ def test_ext(): ra_col='ra', dec_col='dec', r_col='r', ra_units='rad', dec_units='rad', w_col='w', wpos_col='wpos', flag_col='flag', - k_col='k', g1_col='g1', g2_col='g2', v1_col='v1', v2_col='v2', + k_col='k', z1_col='z1', z2_col='z2', + v1_col='v1', v2_col='v2', g1_col='g1', g2_col='g2', t1_col='t1', t2_col='t2', q1_col='q1', q2_col='q2', ext=-1) assert cat8 == cat7 # -1 is allowed and means the last one. @@ -1139,12 +1226,14 @@ def test_ext(): ra_col='ra', dec_col='dec', r_col='r', ra_units='rad', dec_units='rad', w_col='w', wpos_col='wpos', flag_col='flag', - k_col='k', g1_col='g1', g2_col='g2', v1_col='v1', v2_col='v2', + k_col='k', z1_col='z1', z2_col='z2', + v1_col='v1', v2_col='v2', g1_col='g1', g2_col='g2', t1_col='t1', t2_col='t2', q1_col='q1', q2_col='q2', x_ext=1, y_ext=1, z_ext=1, ra_ext=2, dec_ext=1, r_ext=2, w_ext=1, wpos_ext=2, flag_ext=1, - k_ext=1, g1_ext=1, g2_ext=2, v1_ext=1, v2_ext=2, + k_ext=1, z1_ext=1, z2_ext=2, + v1_ext=1, v2_ext=2, g1_ext=1, g2_ext=2, t1_ext=2, t2_ext=1, q1_ext=2, q2_ext=1) assert cat9 == cat1 @@ -1153,12 +1242,14 @@ def test_ext(): ra_col='ra', dec_col='dec', r_col='r', ra_units='rad', dec_units='rad', w_col='w', wpos_col='wpos', flag_col='flag', - k_col='k', g1_col='g1', g2_col='g2', v1_col='v1', v2_col='v2', + k_col='k', z1_col='z1', z2_col='z2', + v1_col='v1', v2_col='v2', g1_col='g1', g2_col='g2', t1_col='t1', t2_col='t2', q1_col='q1', q2_col='q2', x_ext=3, y_ext=3, z_ext=3, ra_ext=4, dec_ext=4, r_ext=4, w_ext=5, wpos_ext=5, flag_ext=5, - k_ext=6, g1_ext=6, g2_ext=6, v1_ext=6, v2_ext=6, + k_ext=6, z1_ext=6, z2_ext=6, + v1_ext=6, v2_ext=6, g1_ext=6, g2_ext=6, t1_ext=6, t2_ext=6, q1_ext=6, q2_ext=6) assert cat10 == cat1 @@ -1169,7 +1260,8 @@ def test_ext(): ra_col='ra', dec_col='dec', r_col='r', ra_units='rad', dec_units='rad', w_col='w', wpos_col='wpos', flag_col='flag', - k_col='k', g1_col='g1', g2_col='g2', v1_col='v1', v2_col='v2', + k_col='k', z1_col='z1', z2_col='z2', + v1_col='v1', v2_col='v2', g1_col='g1', g2_col='g2', t1_col='t1', t2_col='t2', q1_col='q1', q2_col='q2', ext=3) @@ -1180,7 +1272,8 @@ def test_ext(): ra_col='ra', dec_col='dec', r_col='r', ra_units='rad', dec_units='rad', w_col='w', wpos_col='wpos', flag_col='flag', - k_col='k', g1_col='g1', g2_col='g2', v1_col='v1', v2_col='v2', + k_col='k', z1_col='z1', z2_col='z2', + v1_col='v1', v2_col='v2', g1_col='g1', g2_col='g2', t1_col='t1', t2_col='t2', q1_col='q1', q2_col='q2', ext=9) with assert_raises(ValueError): @@ -1189,7 +1282,8 @@ def test_ext(): ra_col='ra', dec_col='dec', r_col='r', ra_units='rad', dec_units='rad', w_col='w', wpos_col='wpos', flag_col='flag', - k_col='k', g1_col='g1', g2_col='g2', v1_col='v1', v2_col='v2', + k_col='k', z1_col='z1', z2_col='z2', + v1_col='v1', v2_col='v2', g1_col='g1', g2_col='g2', t1_col='t1', t2_col='t2', q1_col='q1', q2_col='q2', ext=0) with assert_raises(ValueError): @@ -1198,7 +1292,8 @@ def test_ext(): ra_col='ra', dec_col='dec', r_col='r', ra_units='rad', dec_units='rad', w_col='w', wpos_col='wpos', flag_col='flag', - k_col='k', g1_col='g1', g2_col='g2', v1_col='v1', v2_col='v2', + k_col='k', z1_col='z1', z2_col='z2', + v1_col='v1', v2_col='v2', g1_col='g1', g2_col='g2', t1_col='t1', t2_col='t2', q1_col='q1', q2_col='q2', ext=-20) @@ -1207,14 +1302,16 @@ def test_ext(): treecorr.Catalog(fname, ra_col='ra', dec_col='dec', ra_units='rad', dec_units='rad', - k_col='k', g1_col='g1', g2_col='g2', v1_col='v1', v2_col='v2', + k_col='k', z1_col='z1', z2_col='z2', + v1_col='v1', v2_col='v2', g1_col='g1', g2_col='g2', t1_col='t1', t2_col='t2', q1_col='q1', q2_col='q2', ext=6) # Position columns required with assert_raises(ValueError): treecorr.Catalog(fname, - k_col='k', g1_col='g1', g2_col='g2', v1_col='v1', v2_col='v2', + k_col='k', z1_col='z1', z2_col='z2', + v1_col='v1', v2_col='v2', g1_col='g1', g2_col='g2', t1_col='t1', t2_col='t2', q1_col='q1', q2_col='q2', ext=6) @@ -1224,7 +1321,8 @@ def test_ext(): x_col='x', y_col='y', z_col='z', ra_col='ra', dec_col='dec', r_col='r', w_col='w', wpos_col='wpos', flag_col='flag', - k_col='k', g1_col='g1', g2_col='g2', v1_col='v1', v2_col='v2', + k_col='k', z1_col='z1', z2_col='z2', + v1_col='v1', v2_col='v2', g1_col='g1', g2_col='g2', t1_col='t1', t2_col='t2', q1_col='q1', q2_col='q2', ext=1) with assert_raises(TypeError): @@ -1233,7 +1331,8 @@ def test_ext(): ra_col='ra', dec_col='dec', r_col='r', ra_units='rad', w_col='w', wpos_col='wpos', flag_col='flag', - k_col='k', g1_col='g1', g2_col='g2', v1_col='v1', v2_col='v2', + k_col='k', z1_col='z1', z2_col='z2', + v1_col='v1', v2_col='v2', g1_col='g1', g2_col='g2', t1_col='t1', t2_col='t2', q1_col='q1', q2_col='q2', ext=1) with assert_raises(TypeError): @@ -1242,7 +1341,8 @@ def test_ext(): ra_col='ra', dec_col='dec', r_col='r', dec_units='rad', w_col='w', wpos_col='wpos', flag_col='flag', - k_col='k', g1_col='g1', g2_col='g2', v1_col='v1', v2_col='v2', + k_col='k', z1_col='z1', z2_col='z2', + v1_col='v1', v2_col='v2', g1_col='g1', g2_col='g2', t1_col='t1', t2_col='t2', q1_col='q1', q2_col='q2', ext=1) @@ -1258,41 +1358,48 @@ def test_direct(): dec = rng.random_sample(nobj) w = rng.random_sample(nobj) k = rng.random_sample(nobj) - g1 = rng.random_sample(nobj) - g2 = rng.random_sample(nobj) + z1 = rng.random_sample(nobj) + z2 = rng.random_sample(nobj) v1 = rng.random_sample(nobj) v2 = rng.random_sample(nobj) + g1 = rng.random_sample(nobj) + g2 = rng.random_sample(nobj) t1 = rng.random_sample(nobj) t2 = rng.random_sample(nobj) q1 = rng.random_sample(nobj) q2 = rng.random_sample(nobj) - cat1 = treecorr.Catalog(x=x, y=y, w=w, g1=g1, g2=g2, k=k, v1=v1, v2=v2, + cat1 = treecorr.Catalog(x=x, y=y, w=w, k=k, z1=z1, z2=z2, + v1=v1, v2=v2, g1=g1, g2=g2, t1=t1, t2=t2, q1=q1, q2=q2) np.testing.assert_almost_equal(cat1.x, x) np.testing.assert_almost_equal(cat1.y, y) np.testing.assert_almost_equal(cat1.w, w) np.testing.assert_almost_equal(cat1.k, k) - np.testing.assert_almost_equal(cat1.g1, g1) - np.testing.assert_almost_equal(cat1.g2, g2) + np.testing.assert_almost_equal(cat1.z1, z1) + np.testing.assert_almost_equal(cat1.z2, z2) np.testing.assert_almost_equal(cat1.v1, v1) np.testing.assert_almost_equal(cat1.v2, v2) + np.testing.assert_almost_equal(cat1.g1, g1) + np.testing.assert_almost_equal(cat1.g2, g2) np.testing.assert_almost_equal(cat1.t1, t1) np.testing.assert_almost_equal(cat1.t2, t2) np.testing.assert_almost_equal(cat1.q1, q1) np.testing.assert_almost_equal(cat1.q2, q2) - cat2 = treecorr.Catalog(ra=ra, dec=dec, w=w, g1=g1, g2=g2, k=k, - v1=v1, v2=v2, t1=t1, t2=t2, q1=q1, q2=q2, + cat2 = treecorr.Catalog(ra=ra, dec=dec, w=w, k=k, z1=z1, z2=z2, + v1=v1, v2=v2, g1=g1, g2=g2, t1=t1, t2=t2, q1=q1, q2=q2, ra_units='hours', dec_units='degrees') np.testing.assert_almost_equal(cat2.ra, ra * coord.hours / coord.radians) np.testing.assert_almost_equal(cat2.dec, dec * coord.degrees / coord.radians) np.testing.assert_almost_equal(cat2.w, w) np.testing.assert_almost_equal(cat2.k, k) - np.testing.assert_almost_equal(cat2.g1, g1) - np.testing.assert_almost_equal(cat2.g2, g2) + np.testing.assert_almost_equal(cat2.z1, z1) + np.testing.assert_almost_equal(cat2.z2, z2) np.testing.assert_almost_equal(cat2.v1, v1) np.testing.assert_almost_equal(cat2.v2, v2) + np.testing.assert_almost_equal(cat2.g1, g1) + np.testing.assert_almost_equal(cat2.g2, g2) np.testing.assert_almost_equal(cat2.t1, t1) np.testing.assert_almost_equal(cat2.t2, t2) np.testing.assert_almost_equal(cat2.q1, q1) @@ -1317,10 +1424,12 @@ def test_direct(): assert_raises(TypeError, treecorr.Catalog, x=x, y=y, dec_units='degrees') assert_raises(TypeError, treecorr.Catalog, ra=ra, ra_units='hours') assert_raises(TypeError, treecorr.Catalog, dec=dec, dec_units='degrees') - assert_raises(TypeError, treecorr.Catalog, x=x, y=y, g1=g1) - assert_raises(TypeError, treecorr.Catalog, x=x, y=y, g2=g2) + assert_raises(TypeError, treecorr.Catalog, x=x, y=y, z1=z1) + assert_raises(TypeError, treecorr.Catalog, x=x, y=y, z2=z2) assert_raises(TypeError, treecorr.Catalog, x=x, y=y, v1=v1) assert_raises(TypeError, treecorr.Catalog, x=x, y=y, v2=v2) + assert_raises(TypeError, treecorr.Catalog, x=x, y=y, g1=g1) + assert_raises(TypeError, treecorr.Catalog, x=x, y=y, g2=g2) assert_raises(TypeError, treecorr.Catalog, x=x, y=y, t1=t1) assert_raises(TypeError, treecorr.Catalog, x=x, y=y, t2=t2) assert_raises(TypeError, treecorr.Catalog, x=x, y=y, q1=q1) @@ -1341,12 +1450,15 @@ def test_direct(): assert_raises(ValueError, treecorr.Catalog, x=x, y=y, w=w[4:]) assert_raises(ValueError, treecorr.Catalog, x=x, y=y, w=w, wpos=w[4:]) assert_raises(ValueError, treecorr.Catalog, x=x, y=y, w=w, k=k[4:]) - assert_raises(ValueError, treecorr.Catalog, x=x, y=y, w=w, g1=g1[4:], g2=g2[4:]) - assert_raises(ValueError, treecorr.Catalog, x=x, y=y, w=w, g1=g1[4:], g2=g2) - assert_raises(ValueError, treecorr.Catalog, x=x, y=y, w=w, g1=g1, g2=g2[4:]) + assert_raises(ValueError, treecorr.Catalog, x=x, y=y, w=w, z1=z1[4:], z2=z2[4:]) + assert_raises(ValueError, treecorr.Catalog, x=x, y=y, w=w, z1=z1[4:], z2=z2) + assert_raises(ValueError, treecorr.Catalog, x=x, y=y, w=w, z1=z1, z2=z2[4:]) assert_raises(ValueError, treecorr.Catalog, x=x, y=y, w=w, v1=v1[4:], v2=v2[4:]) assert_raises(ValueError, treecorr.Catalog, x=x, y=y, w=w, v1=v1[4:], v2=v2) assert_raises(ValueError, treecorr.Catalog, x=x, y=y, w=w, v1=v1, v2=v2[4:]) + assert_raises(ValueError, treecorr.Catalog, x=x, y=y, w=w, g1=g1[4:], g2=g2[4:]) + assert_raises(ValueError, treecorr.Catalog, x=x, y=y, w=w, g1=g1[4:], g2=g2) + assert_raises(ValueError, treecorr.Catalog, x=x, y=y, w=w, g1=g1, g2=g2[4:]) assert_raises(ValueError, treecorr.Catalog, x=x, y=y, w=w, t1=t1[4:], t2=t2[4:]) assert_raises(ValueError, treecorr.Catalog, x=x, y=y, w=w, t1=t1[4:], t2=t2) assert_raises(ValueError, treecorr.Catalog, x=x, y=y, w=w, t1=t1, t2=t2[4:]) @@ -1370,10 +1482,12 @@ def test_var(): # First without weights cats = [] allk = [] - allg1 = [] - allg2 = [] + allz1 = [] + allz2 = [] allv1 = [] allv2 = [] + allg1 = [] + allg2 = [] allt1 = [] allt2 = [] allq1 = [] @@ -1382,19 +1496,22 @@ def test_var(): x = rng.random_sample(nobj) y = rng.random_sample(nobj) k = rng.random_sample(nobj) - 0.5 - g1 = rng.random_sample(nobj) - 0.5 - g2 = rng.random_sample(nobj) - 0.5 + z1 = rng.random_sample(nobj) - 0.5 + z2 = rng.random_sample(nobj) - 0.5 v1 = rng.random_sample(nobj) - 0.5 v2 = rng.random_sample(nobj) - 0.5 + g1 = rng.random_sample(nobj) - 0.5 + g2 = rng.random_sample(nobj) - 0.5 t1 = rng.random_sample(nobj) - 0.5 t2 = rng.random_sample(nobj) - 0.5 q1 = rng.random_sample(nobj) - 0.5 q2 = rng.random_sample(nobj) - 0.5 - cat = treecorr.Catalog(x=x, y=y, g1=g1, g2=g2, k=k, v1=v1, v2=v2, + cat = treecorr.Catalog(x=x, y=y, g1=g1, g2=g2, k=k, z1=z1, z2=z2, v1=v1, v2=v2, t1=t1, t2=t2, q1=q1, q2=q2) vark = np.var(k, ddof=0) - varg = (np.var(g1, ddof=0) + np.var(g2, ddof=0))/2 + varz = (np.var(z1, ddof=0) + np.var(z2, ddof=0))/2 varv = (np.var(v1, ddof=0) + np.var(v2, ddof=0))/2 + varg = (np.var(g1, ddof=0) + np.var(g2, ddof=0))/2 vart = (np.var(t1, ddof=0) + np.var(t2, ddof=0))/2 varq = (np.var(q1, ddof=0) + np.var(q2, ddof=0))/2 assert np.isclose(cat.vark, vark) @@ -1414,27 +1531,32 @@ def test_var(): assert np.isclose(treecorr.calculateVarQ([cat]), varq) cats.append(cat) allk.extend(k) - allg1.extend(g1) - allg2.extend(g2) + allz1.extend(z1) + allz2.extend(z2) allv1.extend(v1) allv2.extend(v2) + allg1.extend(g1) + allg2.extend(g2) allt1.extend(t1) allt2.extend(t2) allq1.extend(q1) allq2.extend(q2) allk = np.array(allk) - allg1 = np.array(allg1) - allg2 = np.array(allg2) + allz1 = np.array(allz1) + allz2 = np.array(allz2) allv1 = np.array(allv1) allv2 = np.array(allv2) + allg1 = np.array(allg1) + allg2 = np.array(allg2) allt1 = np.array(allt1) allt2 = np.array(allt2) allq1 = np.array(allq1) allq2 = np.array(allq2) vark = np.var(allk, ddof=0) - varg = (np.var(allg1, ddof=0) + np.var(allg2, ddof=0))/2 + varz = (np.var(allz1, ddof=0) + np.var(allz2, ddof=0))/2 varv = (np.var(allv1, ddof=0) + np.var(allv2, ddof=0))/2 + varg = (np.var(allg1, ddof=0) + np.var(allg2, ddof=0))/2 vart = (np.var(allt1, ddof=0) + np.var(allt2, ddof=0))/2 varq = (np.var(allq1, ddof=0) + np.var(allq2, ddof=0))/2 assert np.isclose(treecorr.calculateVarK(cats), vark) @@ -1446,10 +1568,12 @@ def test_var(): # Now with weights cats = [] allk = [] - allg1 = [] - allg2 = [] + allz1 = [] + allz2 = [] allv1 = [] allv2 = [] + allg1 = [] + allg2 = [] allt1 = [] allt2 = [] allq1 = [] @@ -1460,21 +1584,26 @@ def test_var(): y = rng.random_sample(nobj) w = rng.random_sample(nobj) k = rng.random_sample(nobj) - g1 = rng.random_sample(nobj) - g2 = rng.random_sample(nobj) + z1 = rng.random_sample(nobj) + z2 = rng.random_sample(nobj) v1 = rng.random_sample(nobj) v2 = rng.random_sample(nobj) + g1 = rng.random_sample(nobj) + g2 = rng.random_sample(nobj) t1 = rng.random_sample(nobj) t2 = rng.random_sample(nobj) q1 = rng.random_sample(nobj) q2 = rng.random_sample(nobj) - cat = treecorr.Catalog(x=x, y=y, w=w, g1=g1, g2=g2, k=k, + cat = treecorr.Catalog(x=x, y=y, w=w, g1=g1, g2=g2, k=k, z1=z1, z2=z2, v1=v1, v2=v2, t1=t1, t2=t2, q1=q1, q2=q2) meank = np.sum(w*k)/np.sum(w) vark = np.sum(w**2 * (k-meank)**2) / np.sum(w) meang1 = np.sum(w*g1)/np.sum(w) meang2 = np.sum(w*g2)/np.sum(w) varg = np.sum(w**2 * ((g1-meang1)**2 + (g2-meang2)**2)) / (2*np.sum(w)) + meanz1 = np.sum(w*z1)/np.sum(w) + meanz2 = np.sum(w*z2)/np.sum(w) + varz = np.sum(w**2 * ((z1-meanz1)**2 + (z2-meanz2)**2)) / (2*np.sum(w)) meanv1 = np.sum(w*v1)/np.sum(w) meanv2 = np.sum(w*v2)/np.sum(w) varv = np.sum(w**2 * ((v1-meanv1)**2 + (v2-meanv2)**2)) / (2*np.sum(w)) @@ -1501,10 +1630,12 @@ def test_var(): assert np.isclose(treecorr.calculateVarQ([cat]), varq) cats.append(cat) allk.extend(k) - allg1.extend(g1) - allg2.extend(g2) + allz1.extend(z1) + allz2.extend(z2) allv1.extend(v1) allv2.extend(v2) + allg1.extend(g1) + allg2.extend(g2) allt1.extend(t1) allt2.extend(t2) allq1.extend(q1) @@ -1512,10 +1643,12 @@ def test_var(): allw.extend(w) allk = np.array(allk) - allg1 = np.array(allg1) - allg2 = np.array(allg2) + allz1 = np.array(allz1) + allz2 = np.array(allz2) allv1 = np.array(allv1) allv2 = np.array(allv2) + allg1 = np.array(allg1) + allg2 = np.array(allg2) allt1 = np.array(allt1) allt2 = np.array(allt2) allq1 = np.array(allq1) @@ -1526,6 +1659,9 @@ def test_var(): meang1 = np.sum(allw*allg1)/np.sum(allw) meang2 = np.sum(allw*allg2)/np.sum(allw) varg = np.sum(allw**2 * ((allg1-meang1)**2 + (allg2-meang2)**2)) / (2*np.sum(allw)) + meanz1 = np.sum(allw*allz1)/np.sum(allw) + meanz2 = np.sum(allw*allz2)/np.sum(allw) + varz = np.sum(allw**2 * ((allz1-meanz1)**2 + (allz2-meanz2)**2)) / (2*np.sum(allw)) meanv1 = np.sum(allw*allv1)/np.sum(allw) meanv2 = np.sum(allw*allv2)/np.sum(allw) varv = np.sum(allw**2 * ((allv1-meanv1)**2 + (allv2-meanv2)**2)) / (2*np.sum(allw)) @@ -1558,10 +1694,12 @@ def test_var(): # If variances are specified on input, use them. cats = [] allk = [] - allg1 = [] - allg2 = [] + allz1 = [] + allz2 = [] allv1 = [] allv2 = [] + allg1 = [] + allg2 = [] allt1 = [] allt2 = [] allq1 = [] @@ -1570,30 +1708,36 @@ def test_var(): x = rng.random_sample(nobj) y = rng.random_sample(nobj) k = rng.random_sample(nobj) - 0.5 - g1 = rng.random_sample(nobj) - 0.5 - g2 = rng.random_sample(nobj) - 0.5 + z1 = rng.random_sample(nobj) - 0.5 + z2 = rng.random_sample(nobj) - 0.5 v1 = rng.random_sample(nobj) - 0.5 v2 = rng.random_sample(nobj) - 0.5 + g1 = rng.random_sample(nobj) - 0.5 + g2 = rng.random_sample(nobj) - 0.5 t1 = rng.random_sample(nobj) - 0.5 t2 = rng.random_sample(nobj) - 0.5 q1 = rng.random_sample(nobj) - 0.5 q2 = rng.random_sample(nobj) - 0.5 vark = np.var(k, ddof=0) - varg = (np.var(g1, ddof=0) + np.var(g2, ddof=0))/2 + varz = (np.var(z1, ddof=0) + np.var(z2, ddof=0))/2 varv = (np.var(v1, ddof=0) + np.var(v2, ddof=0))/2 + varg = (np.var(g1, ddof=0) + np.var(g2, ddof=0))/2 vart = (np.var(t1, ddof=0) + np.var(t2, ddof=0))/2 varq = (np.var(q1, ddof=0) + np.var(q2, ddof=0))/2 - cat = treecorr.Catalog(x=x, y=y, g1=g1, g2=g2, k=k, + cat = treecorr.Catalog(x=x, y=y, g1=g1, g2=g2, k=k, z1=z1, z2=z2, v1=v1, v2=v2, t1=t1, t2=t2, q1=q1, q2=q2, - vark=3*vark, varg=5*varg, varv=7*varv, + vark=3*vark, varz=vark, varg=5*varg, varv=7*varv, vart=4*vart, varq=8*varq) assert np.isclose(cat.vark, 3*vark) + assert np.isclose(cat.varz, vark) assert np.isclose(cat.varg, 5*varg) assert np.isclose(cat.varv, 7*varv) assert np.isclose(cat.vart, 4*vart) assert np.isclose(cat.varq, 8*varq) assert np.isclose(treecorr.calculateVarK(cat), 3*vark) assert np.isclose(treecorr.calculateVarK([cat]), 3*vark) + assert np.isclose(treecorr.calculateVarZ(cat), vark) + assert np.isclose(treecorr.calculateVarZ([cat]), vark) assert np.isclose(treecorr.calculateVarG(cat), 5*varg) assert np.isclose(treecorr.calculateVarG([cat]), 5*varg) assert np.isclose(treecorr.calculateVarV(cat), 7*varv) @@ -1604,10 +1748,12 @@ def test_var(): assert np.isclose(treecorr.calculateVarQ([cat]), 8*varq) cats.append(cat) allk.extend(k) - allg1.extend(g1) - allg2.extend(g2) + allz1.extend(z1) + allz2.extend(z2) allv1.extend(v1) allv2.extend(v2) + allg1.extend(g1) + allg2.extend(g2) allt1.extend(t1) allt2.extend(t2) allq1.extend(q1) @@ -1623,8 +1769,9 @@ def test_var(): allq1 = np.array(allq1) allq2 = np.array(allq2) vark = np.var(allk, ddof=0) - varg = (np.var(allg1, ddof=0) + np.var(allg2, ddof=0))/2 + varz = (np.var(allz1, ddof=0) + np.var(allz2, ddof=0))/2 varv = (np.var(allv1, ddof=0) + np.var(allv2, ddof=0))/2 + varg = (np.var(allg1, ddof=0) + np.var(allg2, ddof=0))/2 vart = (np.var(allt1, ddof=0) + np.var(allt2, ddof=0))/2 varq = (np.var(allq1, ddof=0) + np.var(allq2, ddof=0))/2 # These aren't exactly the same because the means in each catalog are slightly different. @@ -1650,10 +1797,12 @@ def test_nan(): w = rng.random_sample(nobj) wpos = rng.random_sample(nobj) k = rng.random_sample(nobj) - g1 = rng.random_sample(nobj) - g2 = rng.random_sample(nobj) + z1 = rng.random_sample(nobj) + z2 = rng.random_sample(nobj) v1 = rng.random_sample(nobj) v2 = rng.random_sample(nobj) + g1 = rng.random_sample(nobj) + g2 = rng.random_sample(nobj) t1 = rng.random_sample(nobj) t2 = rng.random_sample(nobj) q1 = rng.random_sample(nobj) @@ -1669,10 +1818,12 @@ def test_nan(): w[rng.choice(nobj, nobj//100)] = np.nan wpos[rng.choice(nobj, nobj//100)] = np.nan k[rng.choice(nobj, nobj//100)] = np.nan - g1[rng.choice(nobj, nobj//100)] = np.nan - g2[rng.choice(nobj, nobj//100)] = np.nan + z1[rng.choice(nobj, nobj//100)] = np.nan + z2[rng.choice(nobj, nobj//100)] = np.nan v1[rng.choice(nobj, nobj//100)] = np.nan v2[rng.choice(nobj, nobj//100)] = np.nan + g1[rng.choice(nobj, nobj//100)] = np.nan + g2[rng.choice(nobj, nobj//100)] = np.nan t1[rng.choice(nobj, nobj//100)] = np.nan t2[rng.choice(nobj, nobj//100)] = np.nan q1[rng.choice(nobj, nobj//100)] = np.nan @@ -1685,10 +1836,12 @@ def test_nan(): print('w is nan at ',np.where(np.isnan(w))) print('wpos is nan at ',np.where(np.isnan(wpos))) print('k is nan at ',np.where(np.isnan(k))) - print('g1 is nan at ',np.where(np.isnan(g1))) - print('g2 is nan at ',np.where(np.isnan(g2))) + print('z1 is nan at ',np.where(np.isnan(z1))) + print('z2 is nan at ',np.where(np.isnan(z2))) print('v1 is nan at ',np.where(np.isnan(v1))) print('v2 is nan at ',np.where(np.isnan(v2))) + print('g1 is nan at ',np.where(np.isnan(g1))) + print('g2 is nan at ',np.where(np.isnan(g2))) print('t1 is nan at ',np.where(np.isnan(t1))) print('t2 is nan at ',np.where(np.isnan(t2))) print('q1 is nan at ',np.where(np.isnan(q1))) @@ -1713,25 +1866,27 @@ def test_nan(): np.testing.assert_almost_equal(cat1.w[mask], 0) with CaptureLog() as cl: - cat2 = treecorr.Catalog(ra=ra, dec=dec, r=r, w=w, wpos=wpos, g1=g1, g2=g2, - v1=v1, v2=v2, t1=t1, t2=t2, q1=q1, q2=q2, + cat2 = treecorr.Catalog(ra=ra, dec=dec, r=r, w=w, wpos=wpos, z1=z1, z2=z2, + v1=v1, v2=v2, g1=g1, g2=g2, t1=t1, t2=t2, q1=q1, q2=q2, ra_units='hours', dec_units='degrees', logger=cl.logger, keep_zero_weight=True) assert "NaNs found in ra column." in cl.output assert "NaNs found in dec column." in cl.output assert "NaNs found in r column." in cl.output - assert "NaNs found in g1 column." in cl.output - assert "NaNs found in g2 column." in cl.output + assert "NaNs found in z1 column." in cl.output + assert "NaNs found in z2 column." in cl.output assert "NaNs found in v1 column." in cl.output assert "NaNs found in v2 column." in cl.output + assert "NaNs found in g1 column." in cl.output + assert "NaNs found in g2 column." in cl.output assert "NaNs found in t1 column." in cl.output assert "NaNs found in t2 column." in cl.output assert "NaNs found in q1 column." in cl.output assert "NaNs found in q2 column." in cl.output assert "NaNs found in w column." in cl.output assert "NaNs found in wpos column." in cl.output - mask = (np.isnan(ra) | np.isnan(dec) | np.isnan(r) | - np.isnan(g1) | np.isnan(g2) | np.isnan(v1) | np.isnan(v2) | + mask = (np.isnan(ra) | np.isnan(dec) | np.isnan(r) | np.isnan(z1) | np.isnan(z2) | + np.isnan(v1) | np.isnan(v2) | np.isnan(g1) | np.isnan(g2) | np.isnan(t1) | np.isnan(t2) | np.isnan(q1) | np.isnan(q2) | np.isnan(wpos) | np.isnan(w)) good = ~mask @@ -1742,10 +1897,12 @@ def test_nan(): np.testing.assert_almost_equal(cat2.r[good], r[good]) np.testing.assert_almost_equal(cat2.w[good], w[good]) np.testing.assert_almost_equal(cat2.wpos[good], wpos[good]) - np.testing.assert_almost_equal(cat2.g1[good], g1[good]) - np.testing.assert_almost_equal(cat2.g2[good], g2[good]) + np.testing.assert_almost_equal(cat2.z1[good], z1[good]) + np.testing.assert_almost_equal(cat2.z2[good], z2[good]) np.testing.assert_almost_equal(cat2.v1[good], v1[good]) np.testing.assert_almost_equal(cat2.v2[good], v2[good]) + np.testing.assert_almost_equal(cat2.g1[good], g1[good]) + np.testing.assert_almost_equal(cat2.g2[good], g2[good]) np.testing.assert_almost_equal(cat2.t1[good], t1[good]) np.testing.assert_almost_equal(cat2.t2[good], t2[good]) np.testing.assert_almost_equal(cat2.q1[good], q1[good]) @@ -1754,10 +1911,11 @@ def test_nan(): # If no weight column, it is make automatically to deal with Nans. with CaptureLog() as cl: - cat3 = treecorr.Catalog(x=x, y=y, g1=g1, g2=g2, v1=v1, v2=v2, + cat3 = treecorr.Catalog(x=x, y=y, g1=g1, g2=g2, z1=z1, z2=z2, v1=v1, v2=v2, t1=t1, t2=t2, q1=q1, q2=q2, logger=cl.logger, keep_zero_weight=True) - mask = (np.isnan(x) | np.isnan(y) | np.isnan(g1) | np.isnan(g2) | np.isnan(v1) | np.isnan(v2) | + mask = (np.isnan(x) | np.isnan(y) | np.isnan(g1) | np.isnan(g2) | + np.isnan(z1) | np.isnan(z2) | np.isnan(v1) | np.isnan(v2) | np.isnan(t1) | np.isnan(t2) | np.isnan(q1) | np.isnan(q2)) good = ~mask assert cat3.ntot == nobj @@ -1765,10 +1923,12 @@ def test_nan(): np.testing.assert_almost_equal(cat3.x[good], x[good]) np.testing.assert_almost_equal(cat3.y[good], y[good]) np.testing.assert_almost_equal(cat3.w[good], 1.) - np.testing.assert_almost_equal(cat3.g1[good], g1[good]) - np.testing.assert_almost_equal(cat3.g2[good], g2[good]) + np.testing.assert_almost_equal(cat3.z1[good], z1[good]) + np.testing.assert_almost_equal(cat3.z2[good], z2[good]) np.testing.assert_almost_equal(cat3.v1[good], v1[good]) np.testing.assert_almost_equal(cat3.v2[good], v2[good]) + np.testing.assert_almost_equal(cat3.g1[good], g1[good]) + np.testing.assert_almost_equal(cat3.g2[good], g2[good]) np.testing.assert_almost_equal(cat3.t1[good], t1[good]) np.testing.assert_almost_equal(cat3.t2[good], t2[good]) np.testing.assert_almost_equal(cat3.q1[good], q1[good]) @@ -2152,22 +2312,24 @@ def test_field(): dec = rng.normal(-48.12, 4.3, (ngal,) ) k = rng.normal(0,s, (ngal,) ) - g1 = rng.normal(0,s, (ngal,) ) - g2 = rng.normal(0,s, (ngal,) ) + z1 = rng.normal(0,s, (ngal,) ) + z2 = rng.normal(0,s, (ngal,) ) v1 = rng.normal(0,s, (ngal,) ) v2 = rng.normal(0,s, (ngal,) ) + g1 = rng.normal(0,s, (ngal,) ) + g2 = rng.normal(0,s, (ngal,) ) t1 = rng.normal(0,s, (ngal,) ) t2 = rng.normal(0,s, (ngal,) ) q1 = rng.normal(0,s, (ngal,) ) q2 = rng.normal(0,s, (ngal,) ) - cat1 = treecorr.Catalog(x=x, y=y, z=z, g1=g1, g2=g2, k=k, + cat1 = treecorr.Catalog(x=x, y=y, z=z, g1=g1, g2=g2, k=k, z1=z1, z2=z2, v1=v1, v2=v2, t1=t1, t2=t2, q1=q1, q2=q2) cat2 = treecorr.Catalog(ra=ra, dec=dec, ra_units='hour', dec_units='deg', - w=w, g1=g1, g2=g2, k=k, + w=w, g1=g1, g2=g2, k=k, z1=z1, z2=z2, v1=v1, v2=v2, t1=t1, t2=t2, q1=q1, q2=q2) cat2.logger = None - cat3 = treecorr.Catalog(x=x, y=y, g1=g1, g2=g2, k=k, w=w, + cat3 = treecorr.Catalog(x=x, y=y, g1=g1, g2=g2, k=k, w=w, z1=z1, z2=z2, v1=v1, v2=v2, t1=t1, t2=t2, q1=q1, q2=q2) cat3 = cat3.copy() # This tests that post-pickled catalog still works correctly. cat4 = treecorr.Catalog(x=x, y=y, w=w) @@ -2246,6 +2408,29 @@ def test_field(): if platform.python_implementation() != 'PyPy': assert t2-t1 <= t1-t0 + t0 = time.time() + zfield1 = cat1.getZField() + zfield2 = cat2.getZField(min_size=0.01, max_size=1) + zfield3 = cat3.getZField(min_size=1, max_size=300, logger=logger) + t1 = time.time() + zfield1b = cat1.getZField() + zfield2b = cat2.getZField(min_size=0.01, max_size=1) + zfield3b = cat3.getZField(min_size=1, max_size=300, logger=logger) + t2 = time.time() + assert_raises(TypeError, cat4.getZField) + assert cat1.zfields.count == 1 + assert cat2.zfields.count == 1 + assert cat3.zfields.count == 1 + assert cat1.field is zfield1 + assert cat2.field is zfield2 + assert cat3.field is zfield3 + assert zfield1b is zfield1 + assert zfield2b is zfield2 + assert zfield3b is zfield3 + print('zfield: ',t1-t0,t2-t1) + if platform.python_implementation() != 'PyPy': + assert t2-t1 <= t1-t0 + t0 = time.time() vfield1 = cat1.getVField() vfield2 = cat2.getVField(min_size=0.01, max_size=1) @@ -2472,27 +2657,31 @@ def test_combine(): z = rng.random_sample(nobj) w = rng.random_sample(nobj) k = rng.random_sample(nobj) - g1 = rng.random_sample(nobj) - g2 = rng.random_sample(nobj) + z1 = rng.random_sample(nobj) + z2 = rng.random_sample(nobj) v1 = rng.random_sample(nobj) v2 = rng.random_sample(nobj) + g1 = rng.random_sample(nobj) + g2 = rng.random_sample(nobj) t1 = rng.random_sample(nobj) t2 = rng.random_sample(nobj) q1 = rng.random_sample(nobj) q2 = rng.random_sample(nobj) # This is the full catalog with all rows - cat1 = treecorr.Catalog(x=x, y=y, z=z, w=w, g1=g1, g2=g2, k=k, v1=v1, v2=v2, - t1=t1, t2=t2, q1=q1, q2=q2) + cat1 = treecorr.Catalog(x=x, y=y, z=z, w=w, g1=g1, g2=g2, k=k, z1=z1, z2=z2, + v1=v1, v2=v2, t1=t1, t2=t2, q1=q1, q2=q2) np.testing.assert_array_equal(cat1.x, x) np.testing.assert_array_equal(cat1.y, y) np.testing.assert_array_equal(cat1.z, z) np.testing.assert_array_equal(cat1.w, w) np.testing.assert_array_equal(cat1.k, k) - np.testing.assert_array_equal(cat1.g1, g1) - np.testing.assert_array_equal(cat1.g2, g2) + np.testing.assert_array_equal(cat1.z1, z1) + np.testing.assert_array_equal(cat1.z2, z2) np.testing.assert_array_equal(cat1.v1, v1) np.testing.assert_array_equal(cat1.v2, v2) + np.testing.assert_array_equal(cat1.g1, g1) + np.testing.assert_array_equal(cat1.g2, g2) np.testing.assert_array_equal(cat1.t1, t1) np.testing.assert_array_equal(cat1.t2, t2) np.testing.assert_array_equal(cat1.q1, q1) @@ -2500,6 +2689,7 @@ def test_combine(): # Now build it up slowly. cats = [treecorr.Catalog(x=x[i:j], y=y[i:j], z=z[i:j], w=w[i:j], k=k[i:j], + z1=z1[i:j], z2=z2[i:j], v1=v1[i:j], v2=v2[i:j], g1=g1[i:j], g2=g2[i:j], t1=t1[i:j], t2=t2[i:j], q1=q1[i:j], q2=q2[i:j]) for (i,j) in [(0,20), (20,33), (33,82), (82,83), (83,100)]] @@ -2509,10 +2699,12 @@ def test_combine(): np.testing.assert_array_equal(cat2.z, z) np.testing.assert_array_equal(cat2.w, w) np.testing.assert_array_equal(cat2.k, k) - np.testing.assert_array_equal(cat2.g1, g1) - np.testing.assert_array_equal(cat2.g2, g2) + np.testing.assert_array_equal(cat2.z1, z1) + np.testing.assert_array_equal(cat2.z2, z2) np.testing.assert_array_equal(cat2.v1, v1) np.testing.assert_array_equal(cat2.v2, v2) + np.testing.assert_array_equal(cat2.g1, g1) + np.testing.assert_array_equal(cat2.g2, g2) np.testing.assert_array_equal(cat2.t1, t1) np.testing.assert_array_equal(cat2.t2, t2) np.testing.assert_array_equal(cat2.q1, q1) @@ -2565,6 +2757,8 @@ def test_combine(): np.testing.assert_array_equal(cat5.r, z) np.testing.assert_array_equal(cat5.w, w) np.testing.assert_array_equal(cat5.k, k) + assert cat5.z1 is None + assert cat5.z2 is None assert cat5.v1 is None assert cat5.v2 is None assert cat5.g1 is None diff --git a/tests/test_config.py b/tests/test_config.py index ccfb66ff..edd2f31c 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following diff --git a/tests/test_gg.py b/tests/test_gg.py index ff401386..d3154bdb 100644 --- a/tests/test_gg.py +++ b/tests/test_gg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following @@ -181,6 +181,32 @@ def test_direct(): np.testing.assert_allclose(gg3b.xim, gg.xim) np.testing.assert_allclose(gg3b.xim_im, gg.xim_im) + # or using the Corr2 base class + with CaptureLog() as cl: + gg3c = treecorr.Corr2.from_file(ascii_name, logger=cl.logger) + assert ascii_name in cl.output + np.testing.assert_allclose(gg3c.npairs, gg.npairs) + np.testing.assert_allclose(gg3c.weight, gg.weight) + np.testing.assert_allclose(gg3c.meanr, gg.meanr) + np.testing.assert_allclose(gg3c.meanlogr, gg.meanlogr) + np.testing.assert_allclose(gg3c.xip, gg.xip) + np.testing.assert_allclose(gg3c.xip_im, gg.xip_im) + np.testing.assert_allclose(gg3c.xim, gg.xim) + np.testing.assert_allclose(gg3c.xim_im, gg.xim_im) + + # But cannot use a different class + with assert_raises(OSError): + treecorr.NGCorrelation.from_file(ascii_name) + # And gives error if not a valid treecorr output file. + with assert_raises(OSError): + treecorr.Corr2.from_file(config['file_name']) + with assert_raises(OSError): + treecorr.GGCorrelation.from_file(config['file_name']) + with assert_raises(OSError): + treecorr.Corr2.from_file('invalid_file') + with assert_raises(OSError): + treecorr.GGCorrelation.from_file('invalid_file') + try: import fitsio except ImportError: @@ -504,15 +530,14 @@ def test_gg(): gg = treecorr.GGCorrelation(bin_size=0.1, min_sep=1., max_sep=100., sep_units='arcmin', verbose=1) t0 = time.time() - gg.process(cat) + gg.process(cat, num_threads=1) t1 = time.time() print('Time for gg process = ',t1-t0) # Using nbins=None rather than omiting nbins is equivalent. gg2 = treecorr.GGCorrelation(bin_size=0.1, min_sep=1., max_sep=100., nbins=None, sep_units='arcmin') gg2.process(cat, num_threads=1) - gg.process(cat, num_threads=1) - assert gg2 == gg + assert gg2 == gg # Only exactly == if num_threads == 1 # log() != , but it should be close: np.testing.assert_allclose(gg.meanlogr, np.log(gg.meanr), atol=1.e-3) diff --git a/tests/test_ggg.py b/tests/test_ggg.py index c4ff76e6..10d4e257 100644 --- a/tests/test_ggg.py +++ b/tests/test_ggg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following @@ -3857,6 +3857,12 @@ def test_map3_logsas(): # on the more precise output. # The code to make the output file is present here, but disabled normally. + try: + import fitsio + except ImportError: + print('Skip test_map3_logsas, since fitsio not installed.') + return + gamma0 = 0.05 r0 = 10. L = 20.*r0 @@ -4355,14 +4361,15 @@ def test_direct_logmultipole_auto(): do_pickle(ggg) # Check that running via the corr3 script works correctly. - config = treecorr.config.read_config('configs/ggg_direct_logmultipole.yaml') - cat.write(config['file_name']) - treecorr.corr3(config) try: import fitsio except ImportError: pass else: + config = treecorr.config.read_config('configs/ggg_direct_logmultipole.yaml') + cat.write(config['file_name']) + treecorr.corr3(config) + data = fitsio.read(config['ggg_file_name']) np.testing.assert_allclose(data['d2_nom'], ggg.d2nom.flatten(), rtol=1.e-4) np.testing.assert_allclose(data['d3_nom'], ggg.d3nom.flatten(), rtol=1.e-4) @@ -4879,6 +4886,12 @@ def test_direct_logmultipole_cross12(): def test_map3_logmultipole(): # Same as test_map3_logsas, but use multipole algorithm and toSAS. + try: + import fitsio + except ImportError: + print('Skip test_map3_logmultipole, since fitsio not installed.') + return + gamma0 = 0.05 r0 = 10. L = 20.*r0 diff --git a/tests/test_helper.py b/tests/test_helper.py index 764fa8ac..1b1e181c 100644 --- a/tests/test_helper.py +++ b/tests/test_helper.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following @@ -32,10 +32,7 @@ def get_from_wiki(file_name, host=None): host = 'https://github.com/rmjarvis/TreeCorr/wiki/' url = host + file_name if not os.path.isfile(local_file_name): - try: - from urllib.request import urlopen - except ImportError: - from urllib import urlopen + from urllib.request import urlopen import shutil import ssl @@ -127,16 +124,13 @@ class CaptureLog(object): """ def __init__(self, level=3): + from io import StringIO logging_levels = { 0: logging.CRITICAL, 1: logging.WARNING, 2: logging.INFO, 3: logging.DEBUG } self.logger = logging.getLogger('CaptureLog') self.logger.setLevel(logging_levels[level]) - try: - from StringIO import StringIO - except ImportError: - from io import StringIO self.stream = StringIO() self.handler = logging.StreamHandler(self.stream) self.logger.addHandler(self.handler) @@ -215,10 +209,7 @@ def __exit__(self, type, value, traceback): def do_pickle(obj1, func=lambda x : x): """Check that the object is picklable. Also that it has basic == and != functionality. """ - try: - import cPickle as pickle - except ImportError: - import pickle + import pickle import copy print('Try pickling ',str(obj1)) diff --git a/tests/test_index.py b/tests/test_index.py index 2f597732..85a1d724 100644 --- a/tests/test_index.py +++ b/tests/test_index.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following diff --git a/tests/test_kg.py b/tests/test_kg.py index 1867f094..2b8bf4a4 100644 --- a/tests/test_kg.py +++ b/tests/test_kg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following @@ -170,6 +170,17 @@ def test_direct(): np.testing.assert_allclose(kg3b.xi, kg.xi) np.testing.assert_allclose(kg3b.xi_im, kg.xi_im) + # or using the Corr2 base class + with CaptureLog() as cl: + kg3c = treecorr.Corr2.from_file(ascii_name, logger=cl.logger) + assert ascii_name in cl.output + np.testing.assert_allclose(kg3c.npairs, kg.npairs) + np.testing.assert_allclose(kg3c.weight, kg.weight) + np.testing.assert_allclose(kg3c.meanr, kg.meanr) + np.testing.assert_allclose(kg3c.meanlogr, kg.meanlogr) + np.testing.assert_allclose(kg3c.xi, kg.xi) + np.testing.assert_allclose(kg3c.xi_im, kg.xi_im) + try: import fitsio except ImportError: diff --git a/tests/test_kk.py b/tests/test_kk.py index 297f2aed..9bc7e040 100644 --- a/tests/test_kk.py +++ b/tests/test_kk.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following @@ -146,6 +146,16 @@ def test_direct(): np.testing.assert_allclose(kk3b.meanlogr, kk.meanlogr) np.testing.assert_allclose(kk3b.xi, kk.xi) + # or using the Corr2 base class + with CaptureLog() as cl: + kk3c = treecorr.Corr2.from_file(ascii_name, logger=cl.logger) + assert ascii_name in cl.output + np.testing.assert_allclose(kk3c.npairs, kk.npairs) + np.testing.assert_allclose(kk3c.weight, kk.weight) + np.testing.assert_allclose(kk3c.meanr, kk.meanr) + np.testing.assert_allclose(kk3c.meanlogr, kk.meanlogr) + np.testing.assert_allclose(kk3c.xi, kk.xi) + try: import fitsio except ImportError: @@ -390,7 +400,7 @@ def test_kk(): # The Fourier transform is: kappa~(k) = 2 pi A s^2 exp(-s^2 k^2/2) / L^2 # P(k) = (1/2pi) <|kappa~(k)|^2> = 2 pi A^2 (s/L)^4 exp(-s^2 k^2) # xi(r) = (1/2pi) int( dk k P(k) J0(kr) ) - # = pi A^2 (s/L)^2 exp(-r^2/2s^2/4) + # = pi A^2 (s/L)^2 exp(-r^2/4s^2) # Note: I'm not sure I handled the L factors correctly, but the units at the end need # to be kappa^2, so it needs to be (s/L)^2. @@ -414,14 +424,13 @@ def test_kk(): cat = treecorr.Catalog(x=x, y=y, k=kappa, x_units='arcmin', y_units='arcmin') kk = treecorr.KKCorrelation(bin_size=0.1, min_sep=1., max_sep=20., sep_units='arcmin', verbose=1) - kk.process(cat) + kk.process(cat, num_threads=1) # Using nbins=None rather than omiting nbins is equivalent. kk2 = treecorr.KKCorrelation(bin_size=0.1, min_sep=1., max_sep=20., nbins=None, sep_units='arcmin') kk2.process(cat, num_threads=1) - kk.process(cat, num_threads=1) - assert kk2 == kk + assert kk2 == kk # Only exactly == if num_threads == 1 # log() != , but it should be close: print('meanlogr - log(meanr) = ',kk.meanlogr - np.log(kk.meanr)) diff --git a/tests/test_kkk.py b/tests/test_kkk.py index 24b1aba9..fe693a74 100644 --- a/tests/test_kkk.py +++ b/tests/test_kkk.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following @@ -2394,14 +2394,15 @@ def test_direct_logmultipole_auto(): do_pickle(kkk) # Check that running via the corr3 script works correctly. - config = treecorr.config.read_config('configs/kkk_direct_logmultipole.yaml') - cat.write(config['file_name']) - treecorr.corr3(config) try: import fitsio except ImportError: pass else: + config = treecorr.config.read_config('configs/kkk_direct_logmultipole.yaml') + cat.write(config['file_name']) + treecorr.corr3(config) + data = fitsio.read(config['kkk_file_name']) np.testing.assert_allclose(data['d2_nom'], kkk.d2nom.flatten(), rtol=1.e-4) np.testing.assert_allclose(data['d3_nom'], kkk.d3nom.flatten(), rtol=1.e-4) diff --git a/tests/test_kmeans.py b/tests/test_kmeans.py index 330a218d..7aaa10b9 100644 --- a/tests/test_kmeans.py +++ b/tests/test_kmeans.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following diff --git a/tests/test_kq.py b/tests/test_kq.py index e27c04de..8edb98d4 100644 --- a/tests/test_kq.py +++ b/tests/test_kq.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following @@ -171,6 +171,17 @@ def test_direct(): np.testing.assert_allclose(kq3b.xi, kq.xi) np.testing.assert_allclose(kq3b.xi_im, kq.xi_im) + # or using the Corr2 base class + with CaptureLog() as cl: + kq3c = treecorr.Corr2.from_file(ascii_name, logger=cl.logger) + assert ascii_name in cl.output + np.testing.assert_allclose(kq3b.npairs, kq.npairs) + np.testing.assert_allclose(kq3b.weight, kq.weight) + np.testing.assert_allclose(kq3b.meanr, kq.meanr) + np.testing.assert_allclose(kq3b.meanlogr, kq.meanlogr) + np.testing.assert_allclose(kq3b.xi, kq.xi) + np.testing.assert_allclose(kq3b.xi_im, kq.xi_im) + try: import fitsio except ImportError: diff --git a/tests/test_kt.py b/tests/test_kt.py index fd370475..204153d1 100644 --- a/tests/test_kt.py +++ b/tests/test_kt.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following @@ -171,6 +171,17 @@ def test_direct(): np.testing.assert_allclose(kt3b.xi, kt.xi) np.testing.assert_allclose(kt3b.xi_im, kt.xi_im) + # or using the Corr2 base class + with CaptureLog() as cl: + kt3c = treecorr.Corr2.from_file(ascii_name, logger=cl.logger) + assert ascii_name in cl.output + np.testing.assert_allclose(kt3c.npairs, kt.npairs) + np.testing.assert_allclose(kt3c.weight, kt.weight) + np.testing.assert_allclose(kt3c.meanr, kt.meanr) + np.testing.assert_allclose(kt3c.meanlogr, kt.meanlogr) + np.testing.assert_allclose(kt3c.xi, kt.xi) + np.testing.assert_allclose(kt3c.xi_im, kt.xi_im) + try: import fitsio except ImportError: diff --git a/tests/test_kv.py b/tests/test_kv.py index e52b7795..57e691c5 100644 --- a/tests/test_kv.py +++ b/tests/test_kv.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following @@ -171,6 +171,17 @@ def test_direct(): np.testing.assert_allclose(kv3b.xi, kv.xi) np.testing.assert_allclose(kv3b.xi_im, kv.xi_im) + # or using the Corr2 base class + with CaptureLog() as cl: + kv3c = treecorr.Corr2.from_file(ascii_name, logger=cl.logger) + assert ascii_name in cl.output + np.testing.assert_allclose(kv3c.npairs, kv.npairs) + np.testing.assert_allclose(kv3c.weight, kv.weight) + np.testing.assert_allclose(kv3c.meanr, kv.meanr) + np.testing.assert_allclose(kv3c.meanlogr, kv.meanlogr) + np.testing.assert_allclose(kv3c.xi, kv.xi) + np.testing.assert_allclose(kv3c.xi_im, kv.xi_im) + try: import fitsio except ImportError: diff --git a/tests/test_kz.py b/tests/test_kz.py new file mode 100644 index 00000000..9eed2459 --- /dev/null +++ b/tests/test_kz.py @@ -0,0 +1,773 @@ +# Copyright (c) 2003-2024 by Mike Jarvis +# +# TreeCorr is free software: redistribution and use in source and binary forms, +# with or without modification, are permitted provided that the following +# conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions, and the disclaimer given in the accompanying LICENSE +# file. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions, and the disclaimer given in the documentation +# and/or other materials provided with the distribution. + +import numpy as np +import time +import os +import coord +import treecorr + +from test_helper import do_pickle, CaptureLog +from test_helper import assert_raises, timer, assert_warns + +@timer +def test_direct(): + # If the catalogs are small enough, we can do a direct calculation to see if comes out right. + # This should exactly match the treecorr result if brute=True. + + ngal = 200 + s = 10. + rng = np.random.RandomState(8675309) + x1 = rng.normal(0,s, (ngal,) ) + y1 = rng.normal(0,s, (ngal,) ) + w1 = rng.random_sample(ngal) + k1 = rng.normal(5,1, (ngal,) ) + + x2 = rng.normal(0,s, (ngal,) ) + y2 = rng.normal(0,s, (ngal,) ) + w2 = rng.random_sample(ngal) + z12 = rng.normal(0,0.2, (ngal,) ) + z22 = rng.normal(0,0.2, (ngal,) ) + + cat1 = treecorr.Catalog(x=x1, y=y1, w=w1, k=k1) + cat2 = treecorr.Catalog(x=x2, y=y2, w=w2, z1=z12, z2=z22) + + min_sep = 1. + max_sep = 50. + nbins = 50 + bin_size = np.log(max_sep/min_sep) / nbins + kz = treecorr.KZCorrelation(min_sep=min_sep, max_sep=max_sep, nbins=nbins, brute=True) + kz.process(cat1, cat2) + + true_npairs = np.zeros(nbins, dtype=int) + true_weight = np.zeros(nbins, dtype=float) + true_xi = np.zeros(nbins, dtype=complex) + for i in range(ngal): + # It's hard to do all the pairs at once with numpy operations (although maybe possible). + # But we can at least do all the pairs for each entry in cat1 at once with arrays. + rsq = (x1[i]-x2)**2 + (y1[i]-y2)**2 + r = np.sqrt(rsq) + + ww = w1[i] * w2 + xi = ww * k1[i] * (z12 + 1j*z22) + + index = np.floor(np.log(r/min_sep) / bin_size).astype(int) + mask = (index >= 0) & (index < nbins) + np.add.at(true_npairs, index[mask], 1) + np.add.at(true_weight, index[mask], ww[mask]) + np.add.at(true_xi, index[mask], xi[mask]) + + true_xi /= true_weight + + print('true_npairs = ',true_npairs) + print('diff = ',kz.npairs - true_npairs) + np.testing.assert_array_equal(kz.npairs, true_npairs) + + print('true_weight = ',true_weight) + print('diff = ',kz.weight - true_weight) + np.testing.assert_allclose(kz.weight, true_weight, rtol=1.e-6, atol=1.e-8) + + print('true_xi = ',true_xi) + print('kz.xi = ',kz.xi) + print('kz.xi_im = ',kz.xi_im) + np.testing.assert_allclose(kz.xi, true_xi.real, rtol=1.e-6, atol=1.e-8) + np.testing.assert_allclose(kz.xi_im, true_xi.imag, rtol=1.e-6, atol=1.e-8) + + # Check that running via the corr2 script works correctly. + config = treecorr.config.read_config('configs/kz_direct.yaml') + try: + import fitsio + except ImportError: + pass + else: + cat1.write(config['file_name']) + cat2.write(config['file_name2']) + treecorr.corr2(config) + data = fitsio.read(config['kz_file_name']) + np.testing.assert_allclose(data['r_nom'], kz.rnom) + np.testing.assert_allclose(data['npairs'], kz.npairs) + np.testing.assert_allclose(data['weight'], kz.weight) + np.testing.assert_allclose(data['xi'], kz.xi) + np.testing.assert_allclose(data['xi_im'], kz.xi_im) + + # Invalid with only one file_name + del config['file_name2'] + with assert_raises(TypeError): + treecorr.corr2(config) + + # Repeat with binslop = 0, since code is different for bin_slop=0 and brute=True. + # And don't do any top-level recursion so we actually test not going to the leaves. + kz = treecorr.KZCorrelation(min_sep=min_sep, max_sep=max_sep, nbins=nbins, bin_slop=0, + max_top=0) + kz.process(cat1, cat2) + np.testing.assert_array_equal(kz.npairs, true_npairs) + np.testing.assert_allclose(kz.weight, true_weight, rtol=1.e-6, atol=1.e-8) + np.testing.assert_allclose(kz.xi, true_xi.real, atol=1.e-3) + np.testing.assert_allclose(kz.xi_im, true_xi.imag, atol=1.e-3) + + # With angle_slop = 0, it goes back to being basically exact (to single precision). + kz = treecorr.KZCorrelation(min_sep=min_sep, max_sep=max_sep, nbins=nbins, bin_slop=0, + angle_slop=0, max_top=0) + kz.process(cat1, cat2) + np.testing.assert_array_equal(kz.npairs, true_npairs) + np.testing.assert_allclose(kz.weight, true_weight, rtol=1.e-6, atol=1.e-8) + np.testing.assert_allclose(kz.xi, true_xi.real, rtol=1.e-6, atol=1.e-8) + np.testing.assert_allclose(kz.xi_im, true_xi.imag, rtol=1.e-6, atol=1.e-8) + + # Check a few basic operations with a KZCorrelation object. + do_pickle(kz) + + kz2 = kz.copy() + kz2 += kz + np.testing.assert_allclose(kz2.npairs, 2*kz.npairs) + np.testing.assert_allclose(kz2.weight, 2*kz.weight) + np.testing.assert_allclose(kz2.meanr, 2*kz.meanr) + np.testing.assert_allclose(kz2.meanlogr, 2*kz.meanlogr) + np.testing.assert_allclose(kz2.xi, 2*kz.xi) + np.testing.assert_allclose(kz2.xi_im, 2*kz.xi_im) + + kz2.clear() + kz2 += kz + np.testing.assert_allclose(kz2.npairs, kz.npairs) + np.testing.assert_allclose(kz2.weight, kz.weight) + np.testing.assert_allclose(kz2.meanr, kz.meanr) + np.testing.assert_allclose(kz2.meanlogr, kz.meanlogr) + np.testing.assert_allclose(kz2.xi, kz.xi) + np.testing.assert_allclose(kz2.xi_im, kz.xi_im) + + ascii_name = 'output/kz_ascii.txt' + kz.write(ascii_name, precision=16) + kz3 = treecorr.KZCorrelation(min_sep=min_sep, max_sep=max_sep, nbins=nbins, bin_type='Log') + kz3.read(ascii_name) + np.testing.assert_allclose(kz3.npairs, kz.npairs) + np.testing.assert_allclose(kz3.weight, kz.weight) + np.testing.assert_allclose(kz3.meanr, kz.meanr) + np.testing.assert_allclose(kz3.meanlogr, kz.meanlogr) + np.testing.assert_allclose(kz3.xi, kz.xi) + np.testing.assert_allclose(kz3.xi_im, kz.xi_im) + + # Check that the repr is minimal + assert repr(kz3) == f'KZCorrelation(min_sep={min_sep}, max_sep={max_sep}, nbins={nbins})' + + # Simpler API using from_file: + with CaptureLog() as cl: + kz3b = treecorr.KZCorrelation.from_file(ascii_name, logger=cl.logger) + assert ascii_name in cl.output + np.testing.assert_allclose(kz3b.npairs, kz.npairs) + np.testing.assert_allclose(kz3b.weight, kz.weight) + np.testing.assert_allclose(kz3b.meanr, kz.meanr) + np.testing.assert_allclose(kz3b.meanlogr, kz.meanlogr) + np.testing.assert_allclose(kz3b.xi, kz.xi) + np.testing.assert_allclose(kz3b.xi_im, kz.xi_im) + + # or using the Corr2 base class + with CaptureLog() as cl: + kz3c = treecorr.Corr2.from_file(ascii_name, logger=cl.logger) + assert ascii_name in cl.output + np.testing.assert_allclose(kz3c.npairs, kz.npairs) + np.testing.assert_allclose(kz3c.weight, kz.weight) + np.testing.assert_allclose(kz3c.meanr, kz.meanr) + np.testing.assert_allclose(kz3c.meanlogr, kz.meanlogr) + np.testing.assert_allclose(kz3c.xi, kz.xi) + np.testing.assert_allclose(kz3c.xi_im, kz.xi_im) + + try: + import fitsio + except ImportError: + pass + else: + fits_name = 'output/kz_fits.fits' + kz.write(fits_name) + kz4 = treecorr.KZCorrelation(min_sep=min_sep, max_sep=max_sep, nbins=nbins) + kz4.read(fits_name) + np.testing.assert_allclose(kz4.npairs, kz.npairs) + np.testing.assert_allclose(kz4.weight, kz.weight) + np.testing.assert_allclose(kz4.meanr, kz.meanr) + np.testing.assert_allclose(kz4.meanlogr, kz.meanlogr) + np.testing.assert_allclose(kz4.xi, kz.xi) + np.testing.assert_allclose(kz4.xi_im, kz.xi_im) + + kz4b = treecorr.KZCorrelation.from_file(fits_name) + np.testing.assert_allclose(kz4b.npairs, kz.npairs) + np.testing.assert_allclose(kz4b.weight, kz.weight) + np.testing.assert_allclose(kz4b.meanr, kz.meanr) + np.testing.assert_allclose(kz4b.meanlogr, kz.meanlogr) + np.testing.assert_allclose(kz4b.xi, kz.xi) + np.testing.assert_allclose(kz4b.xi_im, kz.xi_im) + + with assert_raises(TypeError): + kz2 += config + kz4 = treecorr.KZCorrelation(min_sep=min_sep/2, max_sep=max_sep, nbins=nbins) + with assert_raises(ValueError): + kz2 += kz4 + kz5 = treecorr.KZCorrelation(min_sep=min_sep, max_sep=max_sep*2, nbins=nbins) + with assert_raises(ValueError): + kz2 += kz5 + kz6 = treecorr.KZCorrelation(min_sep=min_sep, max_sep=max_sep, nbins=nbins*2) + with assert_raises(ValueError): + kz2 += kz6 + with assert_raises(ValueError): + kz.process(cat1, cat2, patch_method='nonlocal') + +@timer +def test_direct_spherical(): + # Repeat in spherical coords + + ngal = 100 + s = 10. + rng = np.random.RandomState(8675309) + x1 = rng.normal(0,s, (ngal,) ) + y1 = rng.normal(0,s, (ngal,) ) + 200 # Put everything at large y, so small angle on sky + z1 = rng.normal(0,s, (ngal,) ) + w1 = rng.random_sample(ngal) + k1 = rng.normal(5,1, (ngal,) ) + + x2 = rng.normal(0,s, (ngal,) ) + y2 = rng.normal(0,s, (ngal,) ) + 200 + z2 = rng.normal(0,s, (ngal,) ) + w2 = rng.random_sample(ngal) + z12 = rng.normal(0,0.2, (ngal,) ) + z22 = rng.normal(0,0.2, (ngal,) ) + + ra1, dec1 = coord.CelestialCoord.xyz_to_radec(x1,y1,z1) + ra2, dec2 = coord.CelestialCoord.xyz_to_radec(x2,y2,z2) + + cat1 = treecorr.Catalog(ra=ra1, dec=dec1, ra_units='rad', dec_units='rad', w=w1, k=k1) + cat2 = treecorr.Catalog(ra=ra2, dec=dec2, ra_units='rad', dec_units='rad', w=w2, z1=z12, z2=z22) + + min_sep = 1. + max_sep = 10. + nbins = 50 + bin_size = np.log(max_sep/min_sep) / nbins + kz = treecorr.KZCorrelation(min_sep=min_sep, max_sep=max_sep, nbins=nbins, + sep_units='deg', brute=True) + kz.process(cat1, cat2) + + r1 = np.sqrt(x1**2 + y1**2 + z1**2) + r2 = np.sqrt(x2**2 + y2**2 + z2**2) + x1 /= r1; y1 /= r1; z1 /= r1 + x2 /= r2; y2 /= r2; z2 /= r2 + + true_npairs = np.zeros(nbins, dtype=int) + true_weight = np.zeros(nbins, dtype=float) + true_xi = np.zeros(nbins, dtype=complex) + + c1 = [coord.CelestialCoord(r*coord.radians, d*coord.radians) for (r,d) in zip(ra1, dec1)] + c2 = [coord.CelestialCoord(r*coord.radians, d*coord.radians) for (r,d) in zip(ra2, dec2)] + for i in range(ngal): + for j in range(ngal): + rsq = (x1[i]-x2[j])**2 + (y1[i]-y2[j])**2 + (z1[i]-z2[j])**2 + r = np.sqrt(rsq) + r *= coord.radians / coord.degrees + + index = np.floor(np.log(r/min_sep) / bin_size).astype(int) + if index < 0 or index >= nbins: + continue + + ww = w1[i] * w2[j] + xi = ww * k1[i] * (z12[j] + 1j * z22[j]) + + true_npairs[index] += 1 + true_weight[index] += ww + true_xi[index] += xi + + true_xi /= true_weight + + print('true_npairs = ',true_npairs) + print('diff = ',kz.npairs - true_npairs) + np.testing.assert_array_equal(kz.npairs, true_npairs) + + print('true_weight = ',true_weight) + print('diff = ',kz.weight - true_weight) + np.testing.assert_allclose(kz.weight, true_weight, rtol=1.e-6, atol=1.e-8) + + print('true_xi = ',true_xi) + print('kz.xi = ',kz.xi) + np.testing.assert_allclose(kz.xi, true_xi.real, rtol=1.e-6, atol=1.e-8) + np.testing.assert_allclose(kz.xi_im, true_xi.imag, rtol=1.e-6, atol=1.e-8) + + # Check that running via the corr2 script works correctly. + config = treecorr.config.read_config('configs/kz_direct_spherical.yaml') + try: + import fitsio + except ImportError: + pass + else: + cat1.write(config['file_name']) + cat2.write(config['file_name2']) + treecorr.corr2(config) + data = fitsio.read(config['kz_file_name']) + np.testing.assert_allclose(data['r_nom'], kz.rnom) + np.testing.assert_allclose(data['npairs'], kz.npairs) + np.testing.assert_allclose(data['weight'], kz.weight) + np.testing.assert_allclose(data['xi'], kz.xi) + np.testing.assert_allclose(data['xi_im'], kz.xi_im) + + # Repeat with binslop = 0 + # And don't do any top-level recursion so we actually test not going to the leaves. + kz = treecorr.KZCorrelation(min_sep=min_sep, max_sep=max_sep, nbins=nbins, + sep_units='deg', bin_slop=0, max_top=0) + kz.process(cat1, cat2) + np.testing.assert_array_equal(kz.npairs, true_npairs) + np.testing.assert_allclose(kz.weight, true_weight, rtol=1.e-6, atol=1.e-8) + np.testing.assert_allclose(kz.xi, true_xi.real, atol=1.e-3) + np.testing.assert_allclose(kz.xi_im, true_xi.imag, atol=1.e-3) + + # With angle_slop = 0, it goes back to being basically exact (to single precision). + kz = treecorr.KZCorrelation(min_sep=min_sep, max_sep=max_sep, nbins=nbins, + sep_units='deg', bin_slop=0, angle_slop=0, max_top=0) + kz.process(cat1, cat2) + np.testing.assert_array_equal(kz.npairs, true_npairs) + np.testing.assert_allclose(kz.weight, true_weight, rtol=1.e-6, atol=1.e-8) + np.testing.assert_allclose(kz.xi, true_xi.real, rtol=1.e-6, atol=1.e-8) + np.testing.assert_allclose(kz.xi_im, true_xi.imag, rtol=1.e-6, atol=1.e-8) + + +@timer +def test_single(): + # Use z(r) = z0 exp(-r^2/2r0^2) (1-r^2/wr0^2) around a single lens + + nsource = 100000 + z0 = 0.05 + 1j * 0.02 + kappa = 0.23 + r0 = 10. + L = 5. * r0 + rng = np.random.RandomState(8675309) + x = (rng.random_sample(nsource)-0.5) * L + y = (rng.random_sample(nsource)-0.5) * L + r2 = (x**2 + y**2) + r = np.sqrt(r2) + z = z0 * np.exp(-0.5*r2/r0**2) * (1-0.5*r2/r0**2) + z1 = np.real(z) + z2 = np.imag(z) + + lens_cat = treecorr.Catalog(x=[0], y=[0], k=[kappa], x_units='arcmin', y_units='arcmin') + source_cat = treecorr.Catalog(x=x, y=y, z1=z1, z2=z2, x_units='arcmin', y_units='arcmin') + kz = treecorr.KZCorrelation(bin_size=0.1, min_sep=1., max_sep=20., sep_units='arcmin', + verbose=1) + kz.process(lens_cat, source_cat) + + # log() != , but it should be close: + print('meanlogr - log(meanr) = ',kz.meanlogr - np.log(kz.meanr)) + np.testing.assert_allclose(kz.meanlogr, np.log(kz.meanr), atol=1.e-3) + + r = kz.meanr + true_kz = kappa * z0 * np.exp(-0.5*r**2/r0**2) * (1-0.5*r**2/r0**2) + + print('kz.xi = ',kz.xi) + print('kz.xi_im = ',kz.xi_im) + print('true_kz = ',true_kz) + print('ratio = ',kz.xi / true_kz) + print('diff = ',kz.xi - true_kz) + print('max diff = ',max(abs(kz.xi - true_kz))) + np.testing.assert_allclose(kz.xi, np.real(true_kz), rtol=1.e-2, atol=1.e-4) + np.testing.assert_allclose(kz.xi_im, np.imag(true_kz), rtol=1.e-2, atol=1.e-4) + + # Check that we get the same result using the corr2 function: + lens_cat.write(os.path.join('data','kz_single_lens.dat')) + source_cat.write(os.path.join('data','kz_single_source.dat')) + config = treecorr.read_config('configs/kz_single.yaml') + config['verbose'] = 0 + treecorr.corr2(config) + corr2_output = np.genfromtxt(os.path.join('output','kz_single.out'), names=True, + skip_header=1) + print('kz.xi = ',kz.xi) + print('from corr2 output = ',corr2_output['xi']) + print('ratio = ',corr2_output['xi']/kz.xi) + print('diff = ',corr2_output['xi']-kz.xi) + np.testing.assert_allclose(corr2_output['xi'], kz.xi, rtol=1.e-4) + + print('xi_im from corr2 output = ',corr2_output['xi_im']) + np.testing.assert_allclose(corr2_output['xi_im'], kz.xi_im, rtol=1.e-4) + + +@timer +def test_kz(): + # Use z(r) = z0 exp(-r^2/2r0^2) (1-r^2/2r0^2) around a bunch of foreground lenses. + # For each lens, we divide this by a random kappa value assigned to that lens, so + # the final kz output shoudl be just z(r). + + nlens = 1000 + nsource = 50000 + r0 = 10. + L = 100. * r0 + + z0 = 0.05 + 1j*0.02 + rng = np.random.RandomState(8675309) + xl = (rng.random_sample(nlens)-0.5) * L + yl = (rng.random_sample(nlens)-0.5) * L + kl = rng.normal(0.23, 0.05, (nlens,) ) + xs = (rng.random_sample(nsource)-0.5) * L + ys = (rng.random_sample(nsource)-0.5) * L + z1 = np.zeros( (nsource,) ) + z2 = np.zeros( (nsource,) ) + for x,y,k in zip(xl,yl,kl): + dx = xs-x + dy = ys-y + r2 = dx**2 + dy**2 + r = np.sqrt(r2) + zz = z0 * np.exp(-0.5*r2/r0**2) * (1-0.5*r2/r0**2) / k + z1 += np.real(zz) + z2 += np.imag(zz) + + lens_cat = treecorr.Catalog(x=xl, y=yl, k=kl, x_units='arcmin', y_units='arcmin') + source_cat = treecorr.Catalog(x=xs, y=ys, z1=z1, z2=z2, x_units='arcmin', y_units='arcmin') + kz = treecorr.KZCorrelation(bin_size=0.1, min_sep=1., max_sep=20., sep_units='arcmin', + verbose=1) + kz.process(lens_cat, source_cat, num_threads=1) + + # Using nbins=None rather than omiting nbins is equivalent. + kz2 = treecorr.KZCorrelation(bin_size=0.1, min_sep=1., max_sep=20., nbins=None, sep_units='arcmin') + kz2.process(lens_cat, source_cat, num_threads=1) + assert kz2 == kz # (Only exact == if num_threads=1.) + + r = kz.meanr + true_kz = z0 * np.exp(-0.5*r**2/r0**2) * (1-0.5*r**2/r0**2) + + print('kz.xi = ',kz.xi) + print('kz.xi_im = ',kz.xi_im) + print('true_kz = ',true_kz) + print('ratio = ',kz.xi / true_kz) + print('diff = ',kz.xi - true_kz) + print('max diff = ',max(abs(kz.xi - true_kz))) + np.testing.assert_allclose(kz.xi, np.real(true_kz), rtol=0.1, atol=2.e-3) + np.testing.assert_allclose(kz.xi_im, np.imag(true_kz), rtol=0.1, atol=2.e-3) + + # Check that we get the same result using the corr2 function: + lens_cat.write(os.path.join('data','kz_lens.dat')) + source_cat.write(os.path.join('data','kz_source.dat')) + config = treecorr.read_config('configs/kz.yaml') + config['verbose'] = 0 + config['precision'] = 8 + treecorr.corr2(config) + corr2_output = np.genfromtxt(os.path.join('output','kz.out'), names=True, skip_header=1) + print('kz.xi = ',kz.xi) + print('from corr2 output = ',corr2_output['xi']) + print('ratio = ',corr2_output['xi']/kz.xi) + print('diff = ',corr2_output['xi']-kz.xi) + np.testing.assert_allclose(corr2_output['xi'], kz.xi, rtol=1.e-4) + + print('xi_im from corr2 output = ',corr2_output['xi_im']) + np.testing.assert_allclose(corr2_output['xi_im'], kz.xi_im, rtol=1.e-4) + + # Check the fits write option + try: + import fitsio + except ImportError: + pass + else: + out_file_name1 = os.path.join('output','kg_out1.fits') + kz.write(out_file_name1) + data = fitsio.read(out_file_name1) + np.testing.assert_almost_equal(data['r_nom'], np.exp(kz.logr)) + np.testing.assert_almost_equal(data['meanr'], kz.meanr) + np.testing.assert_almost_equal(data['meanlogr'], kz.meanlogr) + np.testing.assert_almost_equal(data['xi'], kz.xi) + np.testing.assert_almost_equal(data['xi_im'], kz.xi_im) + np.testing.assert_almost_equal(data['sigma'], np.sqrt(kz.varxi)) + np.testing.assert_almost_equal(data['weight'], kz.weight) + np.testing.assert_almost_equal(data['npairs'], kz.npairs) + + # Check the read function + kz2 = treecorr.KZCorrelation.from_file(out_file_name1) + np.testing.assert_almost_equal(kz2.logr, kz.logr) + np.testing.assert_almost_equal(kz2.meanr, kz.meanr) + np.testing.assert_almost_equal(kz2.meanlogr, kz.meanlogr) + np.testing.assert_almost_equal(kz2.xi, kz.xi) + np.testing.assert_almost_equal(kz2.xi_im, kz.xi_im) + np.testing.assert_almost_equal(kz2.varxi, kz.varxi) + np.testing.assert_almost_equal(kz2.weight, kz.weight) + np.testing.assert_almost_equal(kz2.npairs, kz.npairs) + assert kz2.coords == kz.coords + assert kz2.metric == kz.metric + assert kz2.sep_units == kz.sep_units + assert kz2.bin_type == kz.bin_type + + +@timer +def test_varxi(): + # Test that varxi is correct (or close) based on actual variance of many runs. + + z0 = 0.05 + 1j*0.05 + kappa0 = 0.03 + r0 = 10. + L = 50.*r0 + rng = np.random.RandomState(8675309) + + # Note: to get a good estimate of var(xi), you need a lot of runs. The number of + # runs matters much more than the number of galaxies for getting this to pass. + ngal = 1000 + nruns = 50000 + + file_name = 'data/test_varxi_kz.npz' + print(file_name) + if not os.path.isfile(file_name): + all_kzs = [] + for run in range(nruns): + print(f'{run}/{nruns}') + x = (rng.random_sample(ngal)-0.5) * L + y = (rng.random_sample(ngal)-0.5) * L + # Varied weights are hard, but at least check that non-unit weights work correctly. + w = np.ones_like(x) * 5 + r2 = (x**2 + y**2)/r0**2 + zz = z0 * np.exp(-r2/2.) * (1-r2/2) + z1 = np.real(zz) + z2 = np.imag(zz) + k = kappa0 * np.exp(-r2/2.) + # This time, add some shape noise (different each run). + z1 += rng.normal(0, 0.3, size=ngal) + z2 += rng.normal(0, 0.3, size=ngal) + k += rng.normal(0, 0.1, size=ngal) + + cat = treecorr.Catalog(x=x, y=y, w=w, z1=z1, z2=z2, k=k) + kz = treecorr.KZCorrelation(bin_size=0.1, min_sep=5., max_sep=50.) + kz.process(cat, cat) + all_kzs.append(kz) + + mean_xi = np.mean([kz.xi for kz in all_kzs], axis=0) + var_xi = np.var([kz.xi for kz in all_kzs], axis=0) + mean_varxi = np.mean([kz.varxi for kz in all_kzs], axis=0) + + np.savez(file_name, + mean_xi=mean_xi, var_xi=var_xi, mean_varxi=mean_varxi) + + data = np.load(file_name) + mean_xi = data['mean_xi'] + mean_varxi = data['mean_varxi'] + var_xi = data['var_xi'] + print('nruns = ',nruns) + print('mean_xi = ',mean_xi) + print('mean_varxi = ',mean_varxi) + print('var_xi = ',var_xi) + print('ratio = ',var_xi / mean_varxi) + print('max relerr for xi = ',np.max(np.abs((var_xi - mean_varxi)/var_xi))) + np.testing.assert_allclose(mean_varxi, var_xi, rtol=0.02) + + # Now the actual test that's based on current code, not just from the saved file. + # There is a bit more noise on a singe run, so the tolerance needs to be somewhat higher. + x = (rng.random_sample(ngal)-0.5) * L + y = (rng.random_sample(ngal)-0.5) * L + # Varied weights are hard, but at least check that non-unit weights work correctly. + w = np.ones_like(x) * 5 + r2 = (x**2 + y**2)/r0**2 + zz = z0 * np.exp(-r2/2.) * (1-r2/2) + z1 = np.real(zz) + z2 = np.imag(zz) + k = kappa0 * np.exp(-r2/2.) + # This time, add some shape noise (different each run). + z1 += rng.normal(0, 0.3, size=ngal) + z2 += rng.normal(0, 0.3, size=ngal) + k += rng.normal(0, 0.1, size=ngal) + + cat = treecorr.Catalog(x=x, y=y, w=w, z1=z1, z2=z2, k=k) + kz = treecorr.KZCorrelation(bin_size=0.1, min_sep=5., max_sep=50.) + kz.process(cat, cat) + + print('single run:') + print('ratio = ',kz.varxi / var_xi) + print('max relerr for xi = ',np.max(np.abs((kz.varxi - var_xi)/var_xi))) + np.testing.assert_allclose(kz.varxi, var_xi, rtol=0.5) + +@timer +def test_jk(): + + # Same multi-lens field we used for NZ patch test + z0 = 0.05 + 1j*0.03 + r0 = 30. + L = 30 * r0 + rng = np.random.RandomState(8675309) + + nsource = 100000 + nrand = 1000 + nlens = 300 + nruns = 1000 + npatch = 64 + + corr_params = dict(bin_size=0.3, min_sep=5, max_sep=30, bin_slop=0.1) + + def make_field(rng): + x1 = (rng.random(nlens)-0.5) * L + y1 = (rng.random(nlens)-0.5) * L + k = rng.random(nlens)*3 + 10 + x2 = (rng.random(nsource)-0.5) * L + y2 = (rng.random(nsource)-0.5) * L + + # Start with just the noise + z1 = rng.normal(0, 0.1, size=nsource) + z2 = rng.normal(0, 0.1, size=nsource) + + # Add in the signal from all lenses + for i in range(nlens): + x2i = x2 - x1[i] + y2i = y2 - y1[i] + r2 = (x2i**2 + y2i**2)/r0**2 + zz = z0 * np.exp(-r2/2.) * (1-r2/2) + z1 += np.real(zz) + z2 += np.imag(zz) + return x1, y1, k, x2, y2, z1, z2 + + file_name = 'data/test_kz_jk_{}.npz'.format(nruns) + print(file_name) + if not os.path.isfile(file_name): + all_kzs = [] + rng = np.random.default_rng() + for run in range(nruns): + x1, y1, k, x2, y2, z1, z2 = make_field(rng) + print(run,': ',np.mean(z1),np.std(z1),np.min(z1),np.max(z1)) + cat1 = treecorr.Catalog(x=x1, y=y1, k=k) + cat2 = treecorr.Catalog(x=x2, y=y2, z1=z1, z2=z2) + kz = treecorr.KZCorrelation(corr_params) + kz.process(cat1, cat2) + all_kzs.append(kz) + + mean_xi = np.mean([kz.xi for kz in all_kzs], axis=0) + var_xi = np.var([kz.xi for kz in all_kzs], axis=0) + mean_varxi = np.mean([kz.varxi for kz in all_kzs], axis=0) + + np.savez(file_name, + mean_xi=mean_xi, var_xi=var_xi, mean_varxi=mean_varxi) + + data = np.load(file_name) + mean_xi = data['mean_xi'] + mean_varxi = data['mean_varxi'] + var_xi = data['var_xi'] + + print('mean_xi = ',mean_xi) + print('mean_varxi = ',mean_varxi) + print('var_xi = ',var_xi) + print('ratio = ',var_xi / mean_varxi) + + rng = np.random.default_rng(1234) + x1, y1, k, x2, y2, z1, z2 = make_field(rng) + + cat1 = treecorr.Catalog(x=x1, y=y1, k=k) + cat2 = treecorr.Catalog(x=x2, y=y2, z1=z1, z2=z2) + kz1 = treecorr.KZCorrelation(corr_params) + t0 = time.time() + kz1.process(cat1, cat2) + t1 = time.time() + print('Time for non-patch processing = ',t1-t0) + + print('weight = ',kz1.weight) + print('xi = ',kz1.xi) + print('varxi = ',kz1.varxi) + print('pullsq for xi = ',(kz1.xi-mean_xi)**2/var_xi) + print('max pull for xi = ',np.sqrt(np.max((kz1.xi-mean_xi)**2/var_xi))) + np.testing.assert_array_less((kz1.xi-mean_xi)**2, 9*var_xi) # < 3 sigma pull + np.testing.assert_allclose(kz1.varxi, mean_varxi, rtol=0.1) + + # Now run with patches, but still with shot variance. Should be basically the same answer. + cat2p = treecorr.Catalog(x=x2, y=y2, z1=z1, z2=z2, npatch=npatch) + cat1p = treecorr.Catalog(x=x1, y=y1, k=k, patch_centers=cat2p.patch_centers) + kz2 = treecorr.KZCorrelation(corr_params) + t0 = time.time() + kz2.process(cat1p, cat2p) + t1 = time.time() + print('Time for patch processing = ',t1-t0) + print('weight = ',kz2.weight) + print('xi = ',kz2.xi) + print('xi1 = ',kz1.xi) + print('varxi = ',kz2.varxi) + np.testing.assert_allclose(kz2.weight, kz1.weight, rtol=1.e-2) + np.testing.assert_allclose(kz2.xi, kz1.xi, rtol=2.e-2) + np.testing.assert_allclose(kz2.varxi, kz1.varxi, rtol=1.e-2) + + # estimate_cov with var_method='shot' returns just the diagonal. + np.testing.assert_allclose(kz2.estimate_cov('shot'), kz2.varxi) + np.testing.assert_allclose(kz1.estimate_cov('shot'), kz1.varxi) + + # Now try jackknife variance estimate. + t0 = time.time() + cov2 = kz2.estimate_cov('jackknife') + t1 = time.time() + print('Time to calculate jackknife covariance = ',t1-t0) + print('varxi = ',np.diagonal(cov2)) + print('cf var_xi = ',var_xi) + np.testing.assert_allclose(np.diagonal(cov2), var_xi, rtol=0.6) + + # Check only using patches for one of the two catalogs. + kz3 = treecorr.KZCorrelation(corr_params, var_method='jackknife') + t0 = time.time() + kz3.process(cat1p, cat2) + t1 = time.time() + print('Time for only patches for cat1 processing = ',t1-t0) + print('varxi = ',kz3.varxi) + np.testing.assert_allclose(kz3.weight, kz1.weight, rtol=1.e-2) + np.testing.assert_allclose(kz3.xi, kz1.xi, rtol=1.e-2) + np.testing.assert_allclose(kz3.varxi, var_xi, rtol=0.5) + + kz4 = treecorr.KZCorrelation(corr_params, var_method='jackknife', rng=rng) + t0 = time.time() + kz4.process(cat1, cat2p) + t1 = time.time() + print('Time for only patches for cat2 processing = ',t1-t0) + print('varxi = ',kz4.varxi) + np.testing.assert_allclose(kz4.weight, kz1.weight, rtol=1.e-2) + np.testing.assert_allclose(kz4.xi, kz1.xi, rtol=2.e-2) + np.testing.assert_allclose(kz4.varxi, var_xi, rtol=0.9) + + # Use initialize/finalize + kz5 = treecorr.KZCorrelation(corr_params) + for k1, p1 in enumerate(cat1p.get_patches()): + for k2, p2 in enumerate(cat2p.get_patches()): + kz5.process(p1, p2, initialize=(k1==k2==0), finalize=(k1==k2==npatch-1)) + np.testing.assert_allclose(kz5.xi, kz2.xi) + np.testing.assert_allclose(kz5.weight, kz2.weight) + np.testing.assert_allclose(kz5.varxi, kz2.varxi) + + # Check that these still work after roundtripping through a file. + try: + import fitsio + except ImportError: + pass + else: + file_name = os.path.join('output','test_write_results_kz.fits') + kz2.write(file_name, write_patch_results=True) + kz5 = treecorr.KZCorrelation.from_file(file_name) + cov5 = kz5.estimate_cov('jackknife') + np.testing.assert_allclose(cov5, cov2) + + # Check some invalid actions + # Bad var_method + with assert_raises(ValueError): + kz2.estimate_cov('invalid') + # Not run on patches, but need patches + with assert_raises(ValueError): + kz1.estimate_cov('jackknife') + with assert_raises(ValueError): + kz1.estimate_cov('sample') + with assert_raises(ValueError): + kz1.estimate_cov('marked_bootstrap') + with assert_raises(ValueError): + kz1.estimate_cov('bootstrap') + + cat1a = treecorr.Catalog(x=x1[:100], y=y1[:100], npatch=10) + cat2a = treecorr.Catalog(x=x2[:100], y=y2[:100], z1=z1[:100], z2=z2[:100], npatch=10) + cat1b = treecorr.Catalog(x=x1[:100], y=y1[:100], npatch=2) + cat2b = treecorr.Catalog(x=x2[:100], y=y2[:100], z1=z1[:100], z2=z2[:100], npatch=2) + kz6 = treecorr.KZCorrelation(corr_params) + kz7 = treecorr.KZCorrelation(corr_params) + # All catalogs need to have the same number of patches + with assert_raises(RuntimeError): + kz6.process(cat1a,cat2b) + with assert_raises(RuntimeError): + kz7.process(cat1b,cat2a) + + +if __name__ == '__main__': + test_direct() + test_direct_spherical() + test_single() + test_kz() + test_varxi() + test_jk() diff --git a/tests/test_mpi.py b/tests/test_mpi.py index c2ea29ef..6bef290d 100644 --- a/tests/test_mpi.py +++ b/tests/test_mpi.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following @@ -12,18 +12,26 @@ # and/or other materials provided with the distribution. import sys -from mockmpi import mock_mpiexec +import warnings from test_helper import timer from mpi_test import setup, do_mpi_gg, do_mpi_ng, do_mpi_nk, do_mpi_nn, do_mpi_kk, do_mpi_kg, do_mpi_cov +skip=False + try: import fitsio except ImportError: # All the mpi tests use Aardvark.fit, so skip them when fitsio isn't installed. skip=True -else: - skip=False + +try: + from mockmpi import mock_mpiexec +except ImportError: + # Also skip if mockmpi is not installed. + # And warn about it so it shows up in pytest runs + warnings.warn("Skipping some tests because mockmpi is not installed.") + skip = True @timer def test_mpi_gg(): diff --git a/tests/test_mpi3pt.py b/tests/test_mpi3pt.py index fbb213ed..df81e8e5 100644 --- a/tests/test_mpi3pt.py +++ b/tests/test_mpi3pt.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following @@ -12,15 +12,16 @@ # and/or other materials provided with the distribution. import sys -from mockmpi import mock_mpiexec from test_helper import timer from mpi_test3pt import * try: import fitsio + from mockmpi import mock_mpiexec except ImportError: # All the mpi tests use Aardvark.fit, so skip them when fitsio isn't installed. + # Also skip if mockmpi is not installed. skip=True else: skip=False diff --git a/tests/test_ng.py b/tests/test_ng.py index 92fd91ee..ca53b64d 100644 --- a/tests/test_ng.py +++ b/tests/test_ng.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following @@ -104,6 +104,10 @@ def test_direct(): np.testing.assert_allclose(data['gamT'], ng.xi) np.testing.assert_allclose(data['gamX'], ng.xi_im) + # When not using corr2, it's invalid to specify invalid g1_col + with assert_raises(ValueError): + cat = treecorr.Catalog(config['file_name'], config) + # Invalid with only one file_name del config['file_name2'] with assert_raises(TypeError): @@ -180,6 +184,17 @@ def test_direct(): np.testing.assert_allclose(ng3b.xi, ng.xi) np.testing.assert_allclose(ng3b.xi_im, ng.xi_im) + # or using the Corr2 base class + with CaptureLog() as cl: + ng3c = treecorr.Corr2.from_file(ascii_name, logger=cl.logger) + assert ascii_name in cl.output + np.testing.assert_allclose(ng3c.npairs, ng.npairs) + np.testing.assert_allclose(ng3c.weight, ng.weight) + np.testing.assert_allclose(ng3c.meanr, ng.meanr) + np.testing.assert_allclose(ng3c.meanlogr, ng.meanlogr) + np.testing.assert_allclose(ng3c.xi, ng.xi) + np.testing.assert_allclose(ng3c.xi_im, ng.xi_im) + try: import fitsio except ImportError: @@ -605,7 +620,7 @@ def test_ng(): np.testing.assert_allclose(ng.xi, true_gt, rtol=0.1) np.testing.assert_allclose(ng.xi_im, 0, atol=5.e-3) - nrand = nlens * 3 + nrand = nlens * 10 xr = (rng.random_sample(nrand)-0.5) * L yr = (rng.random_sample(nrand)-0.5) * L rand_cat = treecorr.Catalog(x=xr, y=yr, x_units='arcmin', y_units='arcmin') diff --git a/tests/test_nk.py b/tests/test_nk.py index 4d893d3d..cacc9a89 100644 --- a/tests/test_nk.py +++ b/tests/test_nk.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following @@ -98,6 +98,10 @@ def test_direct(): np.testing.assert_allclose(data['weight'], nk.weight) np.testing.assert_allclose(data['kappa'], nk.xi) + # When not using corr2, it's invalid to specify invalid g1_col, g2_col + with assert_raises(ValueError): + cat = treecorr.Catalog(config['file_name'], config) + # Invalid with only one file_name del config['file_name2'] with assert_raises(TypeError): @@ -159,6 +163,16 @@ def test_direct(): np.testing.assert_allclose(nk3b.meanlogr, nk.meanlogr) np.testing.assert_allclose(nk3b.xi, nk.xi) + # or using the Corr2 base class + with CaptureLog() as cl: + nk3c = treecorr.Corr2.from_file(ascii_name, logger=cl.logger) + assert ascii_name in cl.output + np.testing.assert_allclose(nk3c.npairs, nk.npairs) + np.testing.assert_allclose(nk3c.weight, nk.weight) + np.testing.assert_allclose(nk3c.meanr, nk.meanr) + np.testing.assert_allclose(nk3c.meanlogr, nk.meanlogr) + np.testing.assert_allclose(nk3c.xi, nk.xi) + with assert_raises(TypeError): nk2 += config nk4 = treecorr.NKCorrelation(min_sep=min_sep/2, max_sep=max_sep, nbins=nbins) @@ -170,6 +184,8 @@ def test_direct(): nk6 = treecorr.NKCorrelation(min_sep=min_sep, max_sep=max_sep, nbins=nbins*2) with assert_raises(ValueError): nk2 += nk6 + with assert_raises(TypeError): + nk.process(cat1) try: import fitsio @@ -399,7 +415,7 @@ def test_nk(): print('max diff = ',max(abs(nk.xi - true_k))) np.testing.assert_allclose(nk.xi, true_k, rtol=0.1, atol=2.e-3) - nrand = nlens * 13 + nrand = nlens * 10 xr = (rng.random_sample(nrand)-0.5) * L yr = (rng.random_sample(nrand)-0.5) * L rand_cat = treecorr.Catalog(x=xr, y=yr, x_units='arcmin', y_units='arcmin') diff --git a/tests/test_nn.py b/tests/test_nn.py index 184e6ffb..59dd77ea 100644 --- a/tests/test_nn.py +++ b/tests/test_nn.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following @@ -1158,14 +1158,13 @@ def test_nn(): cat = treecorr.Catalog(x=x, y=y, x_units='arcmin', y_units='arcmin') dd = treecorr.NNCorrelation(bin_size=0.1, min_sep=1., max_sep=25., sep_units='arcmin') - dd.process(cat) + dd.process(cat, num_threads=1) print('dd.npairs = ',dd.npairs) # Using nbins=None rather than omitting nbins is equivalent. dd2 = treecorr.NNCorrelation(bin_size=0.1, min_sep=1., max_sep=25., nbins=None, sep_units='arcmin') dd2.process(cat, num_threads=1) - dd.process(cat, num_threads=1) - assert dd2 == dd + assert dd2 == dd # Only exactly == if num_threads == 1 # log() != , but it should be close: print('meanlogr - log(meanr) = ',dd.meanlogr - np.log(dd.meanr)) @@ -1275,6 +1274,18 @@ def test_nn(): assert dd2b.sep_units == dd.sep_units assert dd2b.bin_type == dd.bin_type + # or using the Corr2 base class + with CaptureLog() as cl: + dd2c = treecorr.Corr2.from_file(out_file_name, logger=cl.logger) + assert out_file_name in cl.output + np.testing.assert_allclose(dd2c.logr, dd.logr, rtol=1.e-3) + np.testing.assert_allclose(dd2c.meanr, dd.meanr, rtol=1.e-3) + np.testing.assert_allclose(dd2c.meanlogr, dd.meanlogr, rtol=1.e-3) + np.testing.assert_allclose(dd2c.npairs, dd.npairs, rtol=1.e-3) + np.testing.assert_allclose(dd2c.tot, dd.tot, rtol=1.e-3) + np.testing.assert_allclose(dd2c.xi, dd.xi, rtol=1.e-3) + np.testing.assert_allclose(dd2c.varxi, dd.varxi, rtol=1.e-3) + # Check the fits write option try: import fitsio diff --git a/tests/test_nnn.py b/tests/test_nnn.py index d4ef8cb6..b53ed398 100644 --- a/tests/test_nnn.py +++ b/tests/test_nnn.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following diff --git a/tests/test_nq.py b/tests/test_nq.py index 5c79f81c..4fa935b9 100644 --- a/tests/test_nq.py +++ b/tests/test_nq.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following @@ -104,6 +104,10 @@ def test_direct(): np.testing.assert_allclose(data['qR'], nq.xi) np.testing.assert_allclose(data['qR_im'], nq.xi_im) + # When not using corr2, it's invalid to specify invalid q1_col, q2_col + with assert_raises(ValueError): + cat = treecorr.Catalog(config['file_name'], config) + # Invalid with only one file_name del config['file_name2'] with assert_raises(TypeError): @@ -179,6 +183,17 @@ def test_direct(): np.testing.assert_allclose(nq3b.xi, nq.xi) np.testing.assert_allclose(nq3b.xi_im, nq.xi_im) + # or using the Corr2 base class + with CaptureLog() as cl: + nq3c = treecorr.Corr2.from_file(ascii_name, logger=cl.logger) + assert ascii_name in cl.output + np.testing.assert_allclose(nq3c.npairs, nq.npairs) + np.testing.assert_allclose(nq3c.weight, nq.weight) + np.testing.assert_allclose(nq3c.meanr, nq.meanr) + np.testing.assert_allclose(nq3c.meanlogr, nq.meanlogr) + np.testing.assert_allclose(nq3c.xi, nq.xi) + np.testing.assert_allclose(nq3c.xi_im, nq.xi_im) + try: import fitsio except ImportError: @@ -1096,7 +1111,7 @@ def make_spin4_field(rng): import fitsio patch_dir = 'output' low_mem = True - except: + except ImportError: # If we cannot write to a fits file, skip the save_patch_dir tests. patch_dir = None low_mem = False @@ -1339,7 +1354,7 @@ def test_matrix_r(): # # I.e. r is a spin-0 quantity, and q is a spin-4 quantity. # So we can compute the properly rotated Sum_k R_k by converting the R matrices into - # r and q complex numbers and computing NK and NQ correlation functions of those. + # r and q complex numbers and computing NZ and NQ correlation functions of those. # This realization was in fact the impetus to add spin-4 correlations to TreeCorr. # # The following test confirms that this calculation is equivalent to doing the direct @@ -1442,43 +1457,35 @@ def test_matrix_r(): r = (R11 + R22)/2 + 1j * (R12 - R21)/2 q = (R11 - R22)/2 + 1j * (R12 + R21)/2 - # cat2 = sources for everything but imag(r), which we need to do separately. - # Of course, BFD doesn't need that, so for BFD, would just have cat2 for the sources. - # TODO: Might be nice to allow k to be complex and include it in cat2... + # cat2 = sources + # Note: BFD can use k, rather than z1,z2, since r is real in that use case. + # And would use NKCorrelation below rather than NZ. cat2 = treecorr.Catalog(x=x, y=y, w=w, g1=Q1, g2=Q2, - k=np.real(r), q1=np.real(q), q2=np.imag(q)) - cat2b = treecorr.Catalog(x=x, y=y, w=w, k=np.imag(r)) + z1=np.real(r), z2=np.imag(r), q1=np.real(q), q2=np.imag(q)) # Perform all the correlations ng = treecorr.NGCorrelation(min_sep=min_sep, max_sep=max_sep, nbins=nbins, brute=True) ng.process(cat1, cat2) - nk = treecorr.NKCorrelation(min_sep=min_sep, max_sep=max_sep, nbins=nbins, brute=True) - nk.process(cat1, cat2) - # Note: BFD can skip nki, since r is real in that use case. - nki = treecorr.NKCorrelation(min_sep=min_sep, max_sep=max_sep, nbins=nbins, brute=True) - nki.process(cat1, cat2b) + nz = treecorr.NZCorrelation(min_sep=min_sep, max_sep=max_sep, nbins=nbins, brute=True) + nz.process(cat1, cat2) nq = treecorr.NQCorrelation(min_sep=min_sep, max_sep=max_sep, nbins=nbins, brute=True) nq.process(cat1, cat2) # First check that the raw outputs match the matrix calculation. print('true_npairs = ',true_npairs) print('diff ng = ',ng.npairs - true_npairs) - print('diff nk = ',nk.npairs - true_npairs) - print('diff nki = ',nki.npairs - true_npairs) + print('diff nz = ',nz.npairs - true_npairs) print('diff nq = ',nq.npairs - true_npairs) np.testing.assert_array_equal(ng.npairs, true_npairs) - np.testing.assert_array_equal(nk.npairs, true_npairs) - np.testing.assert_array_equal(nki.npairs, true_npairs) + np.testing.assert_array_equal(nz.npairs, true_npairs) np.testing.assert_array_equal(nq.npairs, true_npairs) print('true_weight = ',true_weight) print('diff ng = ',ng.weight - true_weight) - print('diff nk = ',nk.weight - true_weight) - print('diff nki = ',nki.weight - true_weight) + print('diff nz = ',nz.weight - true_weight) print('diff nq = ',nq.weight - true_weight) np.testing.assert_allclose(ng.weight, true_weight) - np.testing.assert_allclose(nk.weight, true_weight) - np.testing.assert_allclose(nki.weight, true_weight) + np.testing.assert_allclose(nz.weight, true_weight) np.testing.assert_allclose(nq.weight, true_weight) print('true_Qt = ',true_Qt) @@ -1489,21 +1496,20 @@ def test_matrix_r(): np.testing.assert_allclose(ng.xi_im, true_Qx, atol=1.e-8) print('true_Rtt = ',true_Rtt) - print('nk.xi + nq.xi = ',nk.xi + nq.xi) - np.testing.assert_allclose(nk.xi + nq.xi, true_Rtt, atol=1.e-8) + print('nz.xi + nq.xi = ',nz.xi + nq.xi) + np.testing.assert_allclose(nz.xi + nq.xi, true_Rtt, atol=1.e-8) print('true_Rtx = ',true_Rtx) - print('nki.xi + nq.xi_im = ',nki.xi + nq.xi_im) - np.testing.assert_allclose(nki.xi + nq.xi_im, true_Rtx, atol=1.e-8) + np.testing.assert_allclose(nz.xi_im + nq.xi_im, true_Rtx, atol=1.e-8) print('true_Rxt = ',true_Rxt) - print('-nki.xi + nq.xi_im = ',-nki.xi + nq.xi_im) - np.testing.assert_allclose(-nki.xi + nq.xi_im, true_Rxt, atol=1.e-8) + print('-nz.xi_im + nq.xi_im = ',-nz.xi_im + nq.xi_im) + np.testing.assert_allclose(-nz.xi_im + nq.xi_im, true_Rxt, atol=1.e-8) print('true_Rxx = ',true_Rxx) - print('nk.xi - nq.xi = ',nk.xi - nq.xi) - np.testing.assert_allclose(nk.xi - nq.xi, true_Rxx, atol=1.e-8) + print('nz.xi - nq.xi = ',nz.xi - nq.xi) + np.testing.assert_allclose(nz.xi - nq.xi, true_Rxx, atol=1.e-8) # Now finish the calculation using r,q. # g = (rQ - qQ*) / (|r|^2-|q|^2) - r = nk.xi + 1j * nki.xi # Again, for BFD, r = nk.xi, since nki is 0. + r = nz.xi + 1j * nz.xi_im # Again, for BFD, r = nz.xi, since nz_im is 0. q = nq.xi + 1j * nq.xi_im Q = ng.xi + 1j * ng.xi_im g = (r * Q - q * np.conj(Q)) / (np.abs(r)**2 - np.abs(q)**2) diff --git a/tests/test_nt.py b/tests/test_nt.py index 76105f21..095b84bb 100644 --- a/tests/test_nt.py +++ b/tests/test_nt.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following @@ -104,6 +104,10 @@ def test_direct(): np.testing.assert_allclose(data['tR'], nt.xi) np.testing.assert_allclose(data['tR_im'], nt.xi_im) + # When not using corr2, it's invalid to specify invalid t1_col, t2_col + with assert_raises(ValueError): + cat = treecorr.Catalog(config['file_name'], config) + # Invalid with only one file_name del config['file_name2'] with assert_raises(TypeError): @@ -179,6 +183,17 @@ def test_direct(): np.testing.assert_allclose(nt3b.xi, nt.xi) np.testing.assert_allclose(nt3b.xi_im, nt.xi_im) + # or using the Corr2 base class + with CaptureLog() as cl: + nt3c = treecorr.Corr2.from_file(ascii_name, logger=cl.logger) + assert ascii_name in cl.output + np.testing.assert_allclose(nt3c.npairs, nt.npairs) + np.testing.assert_allclose(nt3c.weight, nt.weight) + np.testing.assert_allclose(nt3c.meanr, nt.meanr) + np.testing.assert_allclose(nt3c.meanlogr, nt.meanlogr) + np.testing.assert_allclose(nt3c.xi, nt.xi) + np.testing.assert_allclose(nt3c.xi_im, nt.xi_im) + try: import fitsio except ImportError: @@ -609,7 +624,7 @@ def test_nt(): np.testing.assert_allclose(nt.xi, true_tr, rtol=0.1) np.testing.assert_allclose(nt.xi_im, 0, atol=5.e-3) - nrand = nlens * 3 + nrand = nlens * 10 xr = (rng.random_sample(nrand)-0.5) * L yr = (rng.random_sample(nrand)-0.5) * L rand_cat = treecorr.Catalog(x=xr, y=yr, x_units='arcmin', y_units='arcmin') @@ -1087,7 +1102,7 @@ def make_spin3_field(rng): import fitsio patch_dir = 'output' low_mem = True - except: + except ImportError: # If we cannot write to a fits file, skip the save_patch_dir tests. patch_dir = None low_mem = False diff --git a/tests/test_nv.py b/tests/test_nv.py index bdc32b0c..caf2bbe3 100644 --- a/tests/test_nv.py +++ b/tests/test_nv.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following @@ -104,6 +104,10 @@ def test_direct(): np.testing.assert_allclose(data['vR'], nv.xi) np.testing.assert_allclose(data['vT'], nv.xi_im) + # When not using corr2, it's invalid to specify invalid v1_col, v2_col + with assert_raises(ValueError): + cat = treecorr.Catalog(config['file_name'], config) + # Invalid with only one file_name del config['file_name2'] with assert_raises(TypeError): @@ -179,6 +183,17 @@ def test_direct(): np.testing.assert_allclose(nv3b.xi, nv.xi) np.testing.assert_allclose(nv3b.xi_im, nv.xi_im) + # or using the Corr2 base class + with CaptureLog() as cl: + nv3c = treecorr.Corr2.from_file(ascii_name, logger=cl.logger) + assert ascii_name in cl.output + np.testing.assert_allclose(nv3c.npairs, nv.npairs) + np.testing.assert_allclose(nv3c.weight, nv.weight) + np.testing.assert_allclose(nv3c.meanr, nv.meanr) + np.testing.assert_allclose(nv3c.meanlogr, nv.meanlogr) + np.testing.assert_allclose(nv3c.xi, nv.xi) + np.testing.assert_allclose(nv3c.xi_im, nv.xi_im) + try: import fitsio except ImportError: @@ -1094,7 +1109,7 @@ def make_velocity_field(rng): import fitsio patch_dir = 'output' low_mem = True - except: + except ImportError: # If we cannot write to a fits file, skip the save_patch_dir tests. patch_dir = None low_mem = False diff --git a/tests/test_nz.py b/tests/test_nz.py new file mode 100644 index 00000000..240601ad --- /dev/null +++ b/tests/test_nz.py @@ -0,0 +1,1082 @@ +# Copyright (c) 2003-2024 by Mike Jarvis +# +# TreeCorr is free software: redistribution and use in source and binary forms, +# with or without modification, are permitted provided that the following +# conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions, and the disclaimer given in the accompanying LICENSE +# file. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions, and the disclaimer given in the documentation +# and/or other materials provided with the distribution. + +import numpy as np +import treecorr +import os +import sys +import coord +import time +from unittest import mock + +from test_helper import do_pickle, CaptureLog +from test_helper import assert_raises, timer, assert_warns + +@timer +def test_direct(): + # If the catalogs are small enough, we can do a direct calculation to see if comes out right. + # This should exactly match the treecorr result if brute=True. + + ngal = 200 + s = 10. + rng = np.random.RandomState(8675309) + x1 = rng.normal(0,s, (ngal,) ) + y1 = rng.normal(0,s, (ngal,) ) + w1 = rng.random_sample(ngal) + + x2 = rng.normal(0,s, (ngal,) ) + y2 = rng.normal(0,s, (ngal,) ) + w2 = rng.random_sample(ngal) + z12 = rng.normal(0,0.2, (ngal,) ) + z22 = rng.normal(0,0.2, (ngal,) ) + + cat1 = treecorr.Catalog(x=x1, y=y1, w=w1) + cat2 = treecorr.Catalog(x=x2, y=y2, w=w2, z1=z12, z2=z22) + + min_sep = 1. + max_sep = 50. + nbins = 50 + bin_size = np.log(max_sep/min_sep) / nbins + nz = treecorr.NZCorrelation(min_sep=min_sep, max_sep=max_sep, nbins=nbins, brute=True) + nz.process(cat1, cat2) + + true_npairs = np.zeros(nbins, dtype=int) + true_weight = np.zeros(nbins, dtype=float) + true_xi = np.zeros(nbins, dtype=complex) + for i in range(ngal): + # It's hard to do all the pairs at once with numpy operations (although maybe possible). + # But we can at least do all the pairs for each entry in cat1 at once with arrays. + rsq = (x1[i]-x2)**2 + (y1[i]-y2)**2 + r = np.sqrt(rsq) + + ww = w1[i] * w2 + xi = ww * (z12 + 1j*z22) + + index = np.floor(np.log(r/min_sep) / bin_size).astype(int) + mask = (index >= 0) & (index < nbins) + np.add.at(true_npairs, index[mask], 1) + np.add.at(true_weight, index[mask], ww[mask]) + np.add.at(true_xi, index[mask], xi[mask]) + + true_xi /= true_weight + + print('true_npairs = ',true_npairs) + print('diff = ',nz.npairs - true_npairs) + np.testing.assert_array_equal(nz.npairs, true_npairs) + + print('true_weight = ',true_weight) + print('diff = ',nz.weight - true_weight) + np.testing.assert_allclose(nz.weight, true_weight, rtol=1.e-6, atol=1.e-8) + + print('true_xi = ',true_xi) + print('nz.xi = ',nz.xi) + print('nz.xi_im = ',nz.xi_im) + np.testing.assert_allclose(nz.xi, true_xi.real, rtol=1.e-6, atol=1.e-8) + np.testing.assert_allclose(nz.xi_im, true_xi.imag, rtol=1.e-6, atol=1.e-8) + + # Check that running via the corr2 script works correctly. + config = treecorr.config.read_config('configs/nz_direct.yaml') + try: + import fitsio + except ImportError: + pass + else: + cat1.write(config['file_name']) + cat2.write(config['file_name2']) + with CaptureLog() as cl: + treecorr.corr2(config, logger=cl.logger) + assert "skipping z1_col" in cl.output + data = fitsio.read(config['nz_file_name']) + np.testing.assert_allclose(data['r_nom'], nz.rnom) + np.testing.assert_allclose(data['npairs'], nz.npairs) + np.testing.assert_allclose(data['weight'], nz.weight) + np.testing.assert_allclose(data['z_real'], nz.xi) + np.testing.assert_allclose(data['z_imag'], nz.xi_im) + + # When not using corr2, it's invalid to specify invalid z1_col, z2_col + with assert_raises(ValueError): + cat = treecorr.Catalog(config['file_name'], config) + + # Invalid with only one file_name + del config['file_name2'] + with assert_raises(TypeError): + treecorr.corr2(config) + config['file_name2'] = 'data/nz_direct_cat2.fits' + # Invalid to request compoensated if no rand_file + config['nz_statistic'] = 'compensated' + with assert_raises(TypeError): + treecorr.corr2(config) + + # Repeat with binslop = 0 + # And don't do any top-level recursion so we actually test not going to the leaves. + nz = treecorr.NZCorrelation(min_sep=min_sep, max_sep=max_sep, nbins=nbins, bin_slop=0, + max_top=0) + nz.process(cat1, cat2) + np.testing.assert_array_equal(nz.npairs, true_npairs) + np.testing.assert_allclose(nz.weight, true_weight, rtol=1.e-6, atol=1.e-8) + np.testing.assert_allclose(nz.xi, true_xi.real, atol=1.e-4) + np.testing.assert_allclose(nz.xi_im, true_xi.imag, atol=2.e-4) + + # With angle_slop = 0, it goes back to being basically exact (to single precision). + nz = treecorr.NZCorrelation(min_sep=min_sep, max_sep=max_sep, nbins=nbins, bin_slop=0, + angle_slop=0, max_top=0) + nz.process(cat1, cat2) + np.testing.assert_array_equal(nz.npairs, true_npairs) + np.testing.assert_allclose(nz.weight, true_weight, rtol=1.e-6, atol=1.e-8) + np.testing.assert_allclose(nz.xi, true_xi.real, rtol=1.e-6, atol=1.e-8) + np.testing.assert_allclose(nz.xi_im, true_xi.imag, rtol=1.e-6, atol=1.e-8) + + # Check a few basic operations with a NZCorrelation object. + do_pickle(nz) + + nz2 = nz.copy() + nz2 += nz + np.testing.assert_allclose(nz2.npairs, 2*nz.npairs) + np.testing.assert_allclose(nz2.weight, 2*nz.weight) + np.testing.assert_allclose(nz2.meanr, 2*nz.meanr) + np.testing.assert_allclose(nz2.meanlogr, 2*nz.meanlogr) + np.testing.assert_allclose(nz2.xi, 2*nz.xi) + np.testing.assert_allclose(nz2.xi_im, 2*nz.xi_im) + + nz2.clear() + nz2 += nz + np.testing.assert_allclose(nz2.npairs, nz.npairs) + np.testing.assert_allclose(nz2.weight, nz.weight) + np.testing.assert_allclose(nz2.meanr, nz.meanr) + np.testing.assert_allclose(nz2.meanlogr, nz.meanlogr) + np.testing.assert_allclose(nz2.xi, nz.xi) + np.testing.assert_allclose(nz2.xi_im, nz.xi_im) + + ascii_name = 'output/nz_ascii.txt' + nz.write(ascii_name, precision=16) + nz3 = treecorr.NZCorrelation(min_sep=min_sep, max_sep=max_sep, nbins=nbins, bin_type='Log') + nz3.read(ascii_name) + np.testing.assert_allclose(nz3.npairs, nz.npairs) + np.testing.assert_allclose(nz3.weight, nz.weight) + np.testing.assert_allclose(nz3.meanr, nz.meanr) + np.testing.assert_allclose(nz3.meanlogr, nz.meanlogr) + np.testing.assert_allclose(nz3.xi, nz.xi) + np.testing.assert_allclose(nz3.xi_im, nz.xi_im) + + # Check that the repr is minimal + assert repr(nz3) == f'NZCorrelation(min_sep={min_sep}, max_sep={max_sep}, nbins={nbins})' + + # Simpler API using from_file: + with CaptureLog() as cl: + nz3b = treecorr.NZCorrelation.from_file(ascii_name, logger=cl.logger) + assert ascii_name in cl.output + np.testing.assert_allclose(nz3b.npairs, nz.npairs) + np.testing.assert_allclose(nz3b.weight, nz.weight) + np.testing.assert_allclose(nz3b.meanr, nz.meanr) + np.testing.assert_allclose(nz3b.meanlogr, nz.meanlogr) + np.testing.assert_allclose(nz3b.xi, nz.xi) + np.testing.assert_allclose(nz3b.xi_im, nz.xi_im) + + # or using the Corr2 base class + with CaptureLog() as cl: + nz3c = treecorr.Corr2.from_file(ascii_name, logger=cl.logger) + assert ascii_name in cl.output + np.testing.assert_allclose(nz3c.npairs, nz.npairs) + np.testing.assert_allclose(nz3c.weight, nz.weight) + np.testing.assert_allclose(nz3c.meanr, nz.meanr) + np.testing.assert_allclose(nz3c.meanlogr, nz.meanlogr) + np.testing.assert_allclose(nz3c.xi, nz.xi) + np.testing.assert_allclose(nz3c.xi_im, nz.xi_im) + + try: + import fitsio + except ImportError: + pass + else: + fits_name = 'output/nz_fits.fits' + nz.write(fits_name) + nz4 = treecorr.NZCorrelation(min_sep=min_sep, max_sep=max_sep, nbins=nbins) + nz4.read(fits_name) + np.testing.assert_allclose(nz4.npairs, nz.npairs) + np.testing.assert_allclose(nz4.weight, nz.weight) + np.testing.assert_allclose(nz4.meanr, nz.meanr) + np.testing.assert_allclose(nz4.meanlogr, nz.meanlogr) + np.testing.assert_allclose(nz4.xi, nz.xi) + np.testing.assert_allclose(nz4.xi_im, nz.xi_im) + + nz4b = treecorr.NZCorrelation.from_file(fits_name) + np.testing.assert_allclose(nz4b.npairs, nz.npairs) + np.testing.assert_allclose(nz4b.weight, nz.weight) + np.testing.assert_allclose(nz4b.meanr, nz.meanr) + np.testing.assert_allclose(nz4b.meanlogr, nz.meanlogr) + np.testing.assert_allclose(nz4b.xi, nz.xi) + np.testing.assert_allclose(nz4b.xi_im, nz.xi_im) + + with assert_raises(TypeError): + nz2 += config + nz4 = treecorr.NZCorrelation(min_sep=min_sep/2, max_sep=max_sep, nbins=nbins) + with assert_raises(ValueError): + nz2 += nz4 + nz5 = treecorr.NZCorrelation(min_sep=min_sep, max_sep=max_sep*2, nbins=nbins) + with assert_raises(ValueError): + nz2 += nz5 + nz6 = treecorr.NZCorrelation(min_sep=min_sep, max_sep=max_sep, nbins=nbins*2) + with assert_raises(ValueError): + nz2 += nz6 + with assert_raises(ValueError): + nz.process(cat1, cat2, patch_method='nonlocal') + + +@timer +def test_direct_spherical(): + # Repeat in spherical coords + + ngal = 100 + s = 10. + rng = np.random.RandomState(8675309) + x1 = rng.normal(0,s, (ngal,) ) + y1 = rng.normal(0,s, (ngal,) ) + 200 # Put everything at large y, so small angle on sky + z1 = rng.normal(0,s, (ngal,) ) + w1 = rng.random_sample(ngal) + + x2 = rng.normal(0,s, (ngal,) ) + y2 = rng.normal(0,s, (ngal,) ) + 200 + z2 = rng.normal(0,s, (ngal,) ) + w2 = rng.random_sample(ngal) + z12 = rng.normal(0,0.2, (ngal,) ) + z22 = rng.normal(0,0.2, (ngal,) ) + + ra1, dec1 = coord.CelestialCoord.xyz_to_radec(x1,y1,z1) + ra2, dec2 = coord.CelestialCoord.xyz_to_radec(x2,y2,z2) + + cat1 = treecorr.Catalog(ra=ra1, dec=dec1, ra_units='rad', dec_units='rad', w=w1) + cat2 = treecorr.Catalog(ra=ra2, dec=dec2, ra_units='rad', dec_units='rad', w=w2, z1=z12, z2=z22) + + min_sep = 1. + max_sep = 10. + nbins = 50 + bin_size = np.log(max_sep/min_sep) / nbins + nz = treecorr.NZCorrelation(min_sep=min_sep, max_sep=max_sep, nbins=nbins, + sep_units='deg', brute=True) + nz.process(cat1, cat2) + + r1 = np.sqrt(x1**2 + y1**2 + z1**2) + r2 = np.sqrt(x2**2 + y2**2 + z2**2) + x1 /= r1; y1 /= r1; z1 /= r1 + x2 /= r2; y2 /= r2; z2 /= r2 + + true_npairs = np.zeros(nbins, dtype=int) + true_weight = np.zeros(nbins, dtype=float) + true_xi = np.zeros(nbins, dtype=complex) + + c1 = [coord.CelestialCoord(r*coord.radians, d*coord.radians) for (r,d) in zip(ra1, dec1)] + c2 = [coord.CelestialCoord(r*coord.radians, d*coord.radians) for (r,d) in zip(ra2, dec2)] + for i in range(ngal): + for j in range(ngal): + rsq = (x1[i]-x2[j])**2 + (y1[i]-y2[j])**2 + (z1[i]-z2[j])**2 + r = np.sqrt(rsq) + r *= coord.radians / coord.degrees + + index = np.floor(np.log(r/min_sep) / bin_size).astype(int) + if index < 0 or index >= nbins: + continue + + ww = w1[i] * w2[j] + xi = ww * (z12[j] + 1j * z22[j]) + + true_npairs[index] += 1 + true_weight[index] += ww + true_xi[index] += xi + + true_xi /= true_weight + + print('true_npairs = ',true_npairs) + print('diff = ',nz.npairs - true_npairs) + np.testing.assert_array_equal(nz.npairs, true_npairs) + + print('true_weight = ',true_weight) + print('diff = ',nz.weight - true_weight) + np.testing.assert_allclose(nz.weight, true_weight, rtol=1.e-6, atol=1.e-8) + + print('true_xi = ',true_xi) + print('nz.xi = ',nz.xi) + print('nz.xi_im = ',nz.xi_im) + np.testing.assert_allclose(nz.xi, true_xi.real, rtol=1.e-6, atol=1.e-8) + np.testing.assert_allclose(nz.xi_im, true_xi.imag, rtol=1.e-6, atol=1.e-8) + + # Check that running via the corr2 script works correctly. + config = treecorr.config.read_config('configs/nz_direct_spherical.yaml') + try: + import fitsio + except ImportError: + pass + else: + cat1.write(config['file_name']) + cat2.write(config['file_name2']) + treecorr.corr2(config) + data = fitsio.read(config['nz_file_name']) + np.testing.assert_allclose(data['r_nom'], nz.rnom) + np.testing.assert_allclose(data['npairs'], nz.npairs) + np.testing.assert_allclose(data['weight'], nz.weight) + np.testing.assert_allclose(data['z_real'], nz.xi) + np.testing.assert_allclose(data['z_imag'], nz.xi_im) + + # Repeat with binslop = 0 + # And don't do any top-level recursion so we actually test not going to the leaves. + nz = treecorr.NZCorrelation(min_sep=min_sep, max_sep=max_sep, nbins=nbins, + sep_units='deg', bin_slop=0, max_top=0) + nz.process(cat1, cat2) + np.testing.assert_array_equal(nz.npairs, true_npairs) + np.testing.assert_allclose(nz.weight, true_weight, rtol=1.e-6, atol=1.e-8) + np.testing.assert_allclose(nz.xi, true_xi.real, rtol=1.e-6, atol=1.e-8) + np.testing.assert_allclose(nz.xi_im, true_xi.imag, rtol=1.e-6, atol=1.e-8) + + +@timer +def test_single(): + # Use z(r) = z0 exp(-r^2/2r0^2) (1-r^2/2r0^2) around a single lens + + nsource = 300000 + z0 = 0.05 + 1j * 0.02 + r0 = 10. + L = 5. * r0 + rng = np.random.RandomState(8675309) + x = (rng.random_sample(nsource)-0.5) * L + y = (rng.random_sample(nsource)-0.5) * L + r2 = (x**2 + y**2) + r = np.sqrt(r2) + z = z0 * np.exp(-0.5*r2/r0**2) * (1.-0.5*r2/r0**2) + z1 = np.real(z) + z2 = np.imag(z) + + lens_cat = treecorr.Catalog(x=[0], y=[0], x_units='arcmin', y_units='arcmin') + source_cat = treecorr.Catalog(x=x, y=y, z1=z1, z2=z2, x_units='arcmin', y_units='arcmin') + nz = treecorr.NZCorrelation(bin_size=0.1, min_sep=1., max_sep=20., sep_units='arcmin', + verbose=1) + nz.process(lens_cat, source_cat) + + # log() != , but it should be close: + print('meanlogr - log(meanr) = ',nz.meanlogr - np.log(nz.meanr)) + np.testing.assert_allclose(nz.meanlogr, np.log(nz.meanr), atol=1.e-3) + + r = nz.meanr + true_z = z0 * np.exp(-0.5*r**2/r0**2) * (1.-0.5*r**2/r0**2) + + print('nz.xi = ',nz.xi) + print('nz.xi_im = ',nz.xi_im) + print('true_z = ',true_z) + print('ratio = ',nz.xi / true_z) + print('diff = ',nz.xi - true_z) + print('max diff = ',max(abs(nz.xi - true_z))) + np.testing.assert_allclose(nz.xi, np.real(true_z), rtol=1.e-2, atol=1.e-4) + np.testing.assert_allclose(nz.xi_im, np.imag(true_z), rtol=1.e-2, atol=1.e-4) + + # Check that we get the same result using the corr2 function: + lens_cat.write(os.path.join('data','nz_single_lens.dat')) + source_cat.write(os.path.join('data','nz_single_source.dat')) + config = treecorr.read_config('configs/nz_single.yaml') + config['verbose'] = 0 + treecorr.corr2(config) + corr2_output = np.genfromtxt(os.path.join('output','nz_single.out'), names=True, + skip_header=1) + print('nz.xi = ',nz.xi) + print('from corr2 output = ',corr2_output['z_real']) + print('ratio = ',corr2_output['z_real']/nz.xi) + print('diff = ',corr2_output['z_real']-nz.xi) + np.testing.assert_allclose(corr2_output['z_real'], nz.xi, rtol=1.e-3) + np.testing.assert_allclose(corr2_output['z_imag'], nz.xi_im, rtol=1.e-3) + + +@timer +def test_nz(): + # Use z(r) = z0 exp(-r^2/2r0^2) (1-r^2/2r0^2) around a bunch of foreground lenses. + + nlens = 1000 + nsource = 100000 + z0 = 0.05 + 1j*0.02 + r0 = 10. + L = 100. * r0 + rng = np.random.RandomState(8675309) + xl = (rng.random_sample(nlens)-0.5) * L + yl = (rng.random_sample(nlens)-0.5) * L + xs = (rng.random_sample(nsource)-0.5) * L + ys = (rng.random_sample(nsource)-0.5) * L + z1 = np.zeros( (nsource,) ) + z2 = np.zeros( (nsource,) ) + for x,y in zip(xl,yl): + dx = xs-x + dy = ys-y + r2 = dx**2 + dy**2 + zz = z0 * np.exp(-0.5*r2/r0**2) * (1.-0.5*r2/r0**2) + z1 += np.real(zz) + z2 += np.imag(zz) + + lens_cat = treecorr.Catalog(x=xl, y=yl, x_units='arcmin', y_units='arcmin') + source_cat = treecorr.Catalog(x=xs, y=ys, z1=z1, z2=z2, x_units='arcmin', y_units='arcmin') + nz = treecorr.NZCorrelation(bin_size=0.1, min_sep=1., max_sep=20., sep_units='arcmin', + verbose=1) + nz.process(lens_cat, source_cat) + + # Using nbins=None rather than omitting nbins is equivalent. + nz2 = treecorr.NZCorrelation(bin_size=0.1, min_sep=1., max_sep=20., nbins=None, sep_units='arcmin') + nz2.process(lens_cat, source_cat, num_threads=1) + nz.process(lens_cat, source_cat, num_threads=1) + assert nz2 == nz + + r = nz.meanr + true_z = z0 * np.exp(-0.5*r**2/r0**2) * (1.-0.5*r**2/r0**2) + + print('nz.xi = ',nz.xi) + print('nz.xi_im = ',nz.xi_im) + print('true_z = ',true_z) + print('ratio = ',nz.xi / true_z) + print('diff = ',nz.xi - true_z) + print('max diff = ',max(abs(nz.xi - true_z))) + np.testing.assert_allclose(nz.xi, np.real(true_z), rtol=0.1, atol=2.e-3) + np.testing.assert_allclose(nz.xi_im, np.imag(true_z), rtol=0.1, atol=2.e-3) + + nrand = nlens * 10 + xr = (rng.random_sample(nrand)-0.5) * L + yr = (rng.random_sample(nrand)-0.5) * L + rand_cat = treecorr.Catalog(x=xr, y=yr, x_units='arcmin', y_units='arcmin') + rz = treecorr.NZCorrelation(bin_size=0.1, min_sep=1., max_sep=20., sep_units='arcmin', + verbose=1) + rz.process(rand_cat, source_cat) + print('rz.xi = ',rz.xi) + xi, xi_im, varxi = nz.calculateXi(rz=rz) + print('compensated xi = ',xi) + print('compensated xi_im = ',xi_im) + print('true_z = ',true_z) + np.testing.assert_allclose(xi, np.real(true_z), rtol=0.05, atol=1.e-3) + np.testing.assert_allclose(xi_im, np.imag(true_z), rtol=0.05, atol=1.e-3) + + # Check that we get the same result using the corr2 function: + config = treecorr.read_config('configs/nz.yaml') + try: + import fitsio + except ImportError: + pass + else: + lens_cat.write(os.path.join('data','nz_lens.fits')) + source_cat.write(os.path.join('data','nz_source.fits')) + rand_cat.write(os.path.join('data','nz_rand.fits')) + config['verbose'] = 0 + config['precision'] = 8 + treecorr.corr2(config) + corr2_output = np.genfromtxt(os.path.join('output','nz.out'), names=True, skip_header=1) + print('nz.xi = ',nz.xi) + print('xi = ',xi) + print('from corr2 output = ',corr2_output['z_real']) + print('ratio = ',corr2_output['z_real']/xi) + print('diff = ',corr2_output['z_real']-xi) + np.testing.assert_allclose(corr2_output['z_real'], xi) + print('xi_im from corr2 output = ',corr2_output['z_imag']) + np.testing.assert_allclose(corr2_output['z_imag'], xi_im) + + # In the corr2 context, you can turn off the compensated bit, even if there are randoms + # (e.g. maybe you only want randoms for some nn calculation, but not nz.) + config['nz_statistic'] = 'simple' + treecorr.corr2(config) + corr2_output = np.genfromtxt(os.path.join('output','nz.out'), names=True, skip_header=1) + xi_simple, _, _ = nz.calculateXi() + np.testing.assert_equal(xi_simple, nz.xi) + np.testing.assert_allclose(corr2_output['z_real'], xi_simple) + + # Check the fits write option + out_file_name1 = os.path.join('output','nz_out1.fits') + nz.write(out_file_name1) + data = fitsio.read(out_file_name1) + np.testing.assert_almost_equal(data['r_nom'], np.exp(nz.logr)) + np.testing.assert_almost_equal(data['meanr'], nz.meanr) + np.testing.assert_almost_equal(data['meanlogr'], nz.meanlogr) + np.testing.assert_almost_equal(data['z_real'], nz.xi) + np.testing.assert_almost_equal(data['z_imag'], nz.xi_im) + np.testing.assert_almost_equal(data['sigma'], np.sqrt(nz.varxi)) + np.testing.assert_almost_equal(data['weight'], nz.weight) + np.testing.assert_almost_equal(data['npairs'], nz.npairs) + + out_file_name2 = os.path.join('output','nz_out2.fits') + nz.write(out_file_name2, rz=rz) + data = fitsio.read(out_file_name2) + np.testing.assert_almost_equal(data['r_nom'], np.exp(nz.logr)) + np.testing.assert_almost_equal(data['meanr'], nz.meanr) + np.testing.assert_almost_equal(data['meanlogr'], nz.meanlogr) + np.testing.assert_almost_equal(data['z_real'], xi) + np.testing.assert_almost_equal(data['z_imag'], xi_im) + np.testing.assert_almost_equal(data['sigma'], np.sqrt(varxi)) + np.testing.assert_almost_equal(data['weight'], nz.weight) + np.testing.assert_almost_equal(data['npairs'], nz.npairs) + + # Check the read function + nz2 = treecorr.NZCorrelation.from_file(out_file_name2) + np.testing.assert_almost_equal(nz2.logr, nz.logr) + np.testing.assert_almost_equal(nz2.meanr, nz.meanr) + np.testing.assert_almost_equal(nz2.meanlogr, nz.meanlogr) + np.testing.assert_almost_equal(nz2.xi, nz.xi) + np.testing.assert_almost_equal(nz2.xi_im, nz.xi_im) + np.testing.assert_almost_equal(nz2.varxi, nz.varxi) + np.testing.assert_almost_equal(nz2.weight, nz.weight) + np.testing.assert_almost_equal(nz2.npairs, nz.npairs) + assert nz2.coords == nz.coords + assert nz2.metric == nz.metric + assert nz2.sep_units == nz.sep_units + assert nz2.bin_type == nz.bin_type + + +@timer +def test_pieces(): + # Test that we can do the calculation in pieces and recombine the results + + try: + import fitsio + except ImportError: + print('Skip test_pieces, since fitsio not installed.') + return + + ncats = 3 + nlens = 1000 + nsource = 30000 + z0 = 0.05 + 1j*0.03 + r0 = 10. + L = 50. * r0 + rng = np.random.RandomState(8675309) + xl = (rng.random_sample(nlens)-0.5) * L + yl = (rng.random_sample(nlens)-0.5) * L + xs = (rng.random_sample( (nsource,ncats) )-0.5) * L + ys = (rng.random_sample( (nsource,ncats) )-0.5) * L + z1 = np.zeros( (nsource,ncats) ) + z2 = np.zeros( (nsource,ncats) ) + w = rng.random_sample( (nsource,ncats) ) + 0.5 + for x,y in zip(xl,yl): + dx = xs-x + dy = ys-y + r2 = dx**2 + dy**2 + zz = z0 * np.exp(-0.5*r2/r0**2) * (1.-r2/r0**2) + z1 += np.real(zz) + z2 += np.imag(zz) + + lens_cat = treecorr.Catalog(x=xl, y=yl, x_units='arcmin', y_units='arcmin') + source_cats = [ treecorr.Catalog(x=xs[:,k], y=ys[:,k], z1=z1[:,k], z2=z2[:,k], w=w[:,k], + x_units='arcmin', y_units='arcmin') for k in range(ncats) ] + full_source_cat = treecorr.Catalog(x=xs.flatten(), y=ys.flatten(), w=w.flatten(), + z1=z1.flatten(), z2=z2.flatten(), + x_units='arcmin', y_units='arcmin') + + t0 = time.time() + for k in range(ncats): + # These could each be done on different machines in a real world application. + nz = treecorr.NZCorrelation(bin_size=0.1, min_sep=1., max_sep=25., sep_units='arcmin', + verbose=1) + # These should use process_cross, not process, since we don't want to call finalize. + nz.process_cross(lens_cat, source_cats[k]) + nz.write(os.path.join('output','nz_piece_%d.fits'%k)) + + pieces_nz = treecorr.NZCorrelation(bin_size=0.1, min_sep=1., max_sep=25., sep_units='arcmin') + for k in range(ncats): + nz = pieces_nz.copy() + nz.read(os.path.join('output','nz_piece_%d.fits'%k)) + pieces_nz += nz + varz = treecorr.calculateVarZ(source_cats) + pieces_nz.finalize(varz) + t1 = time.time() + print('time for piece-wise processing (including I/O) = ',t1-t0) + + full_nz = treecorr.NZCorrelation(bin_size=0.1, min_sep=1., max_sep=25., sep_units='arcmin', + verbose=1) + full_nz.process(lens_cat, full_source_cat) + t2 = time.time() + print('time for full processing = ',t2-t1) + + print('max error in meanr = ',np.max(pieces_nz.meanr - full_nz.meanr),) + print(' max meanr = ',np.max(full_nz.meanr)) + print('max error in meanlogr = ',np.max(pieces_nz.meanlogr - full_nz.meanlogr),) + print(' max meanlogr = ',np.max(full_nz.meanlogr)) + print('max error in weight = ',np.max(pieces_nz.weight - full_nz.weight),) + print(' max weight = ',np.max(full_nz.weight)) + print('max error in xi = ',np.max(pieces_nz.xi - full_nz.xi),) + print(' max xi = ',np.max(full_nz.xi)) + print('max error in xi_im = ',np.max(pieces_nz.xi_im - full_nz.xi_im),) + print(' max xi_im = ',np.max(full_nz.xi_im)) + print('max error in varxi = ',np.max(pieces_nz.varxi - full_nz.varxi),) + print(' max varxi = ',np.max(full_nz.varxi)) + np.testing.assert_allclose(pieces_nz.meanr, full_nz.meanr, rtol=2.e-3) + np.testing.assert_allclose(pieces_nz.meanlogr, full_nz.meanlogr, atol=2.e-3) + np.testing.assert_allclose(pieces_nz.weight, full_nz.weight, rtol=3.e-2) + np.testing.assert_allclose(pieces_nz.xi, full_nz.xi, rtol=0.1) + np.testing.assert_allclose(pieces_nz.xi_im, full_nz.xi_im, atol=2.e-3) + np.testing.assert_allclose(pieces_nz.varxi, full_nz.varxi, rtol=3.e-2) + + # A different way to do this can produce results that are essentially identical to the + # full calculation. We can use wpos = w, but set w = 0 for the items in the pieces catalogs + # that we don't want to include. This will force the tree to be built identically in each + # case, but only use the subset of items in the calculation. The sum of all these should + # be identical to the full calculation aside from order of calculation differences. + # However, we lose some to speed, since there are a lot more wasted calculations along the + # way that have to be duplicated in each piece. + w2 = [ np.empty(w.shape) for k in range(ncats) ] + for k in range(ncats): + w2[k][:,:] = 0. + w2[k][:,k] = w[:,k] + source_cats2 = [ treecorr.Catalog(x=xs.flatten(), y=ys.flatten(), + z1=z1.flatten(), z2=z2.flatten(), + wpos=w.flatten(), w=w2[k].flatten(), + x_units='arcmin', y_units='arcmin') for k in range(ncats) ] + + t3 = time.time() + nz2 = [ full_nz.copy() for k in range(ncats) ] + for k in range(ncats): + nz2[k].clear() + nz2[k].process_cross(lens_cat, source_cats2[k]) + + pieces_nz2 = full_nz.copy() + pieces_nz2.clear() + for k in range(ncats): + pieces_nz2 += nz2[k] + pieces_nz2.finalize(varz) + t4 = time.time() + print('time for zero-weight piece-wise processing = ',t4-t3) + + print('max error in meanr = ',np.max(pieces_nz2.meanr - full_nz.meanr),) + print(' max meanr = ',np.max(full_nz.meanr)) + print('max error in meanlogr = ',np.max(pieces_nz2.meanlogr - full_nz.meanlogr),) + print(' max meanlogr = ',np.max(full_nz.meanlogr)) + print('max error in weight = ',np.max(pieces_nz2.weight - full_nz.weight),) + print(' max weight = ',np.max(full_nz.weight)) + print('max error in xi = ',np.max(pieces_nz2.xi - full_nz.xi),) + print(' max xi = ',np.max(full_nz.xi)) + print('max error in xi_im = ',np.max(pieces_nz2.xi_im - full_nz.xi_im),) + print(' max xi_im = ',np.max(full_nz.xi_im)) + print('max error in varxi = ',np.max(pieces_nz2.varxi - full_nz.varxi),) + print(' max varxi = ',np.max(full_nz.varxi)) + np.testing.assert_allclose(pieces_nz2.meanr, full_nz.meanr, rtol=1.e-7) + np.testing.assert_allclose(pieces_nz2.meanlogr, full_nz.meanlogr, rtol=1.e-7) + np.testing.assert_allclose(pieces_nz2.weight, full_nz.weight, rtol=1.e-7) + np.testing.assert_allclose(pieces_nz2.xi, full_nz.xi, rtol=1.e-7) + np.testing.assert_allclose(pieces_nz2.xi_im, full_nz.xi_im, atol=1.e-10) + np.testing.assert_allclose(pieces_nz2.varxi, full_nz.varxi, rtol=1.e-7) + + # Can also do this with initialize/finalize options + pieces_nz3 = full_nz.copy() + t3 = time.time() + for k in range(ncats): + pieces_nz3.process(lens_cat, source_cats2[k], initialize=(k==0), finalize=(k==ncats-1)) + t4 = time.time() + print('time for initialize/finalize processing = ',t4-t3) + + np.testing.assert_allclose(pieces_nz3.meanr, full_nz.meanr, rtol=1.e-7) + np.testing.assert_allclose(pieces_nz3.meanlogr, full_nz.meanlogr, rtol=1.e-7) + np.testing.assert_allclose(pieces_nz3.weight, full_nz.weight, rtol=1.e-7) + np.testing.assert_allclose(pieces_nz3.xi, full_nz.xi, rtol=1.e-7) + np.testing.assert_allclose(pieces_nz3.xi_im, full_nz.xi_im, atol=1.e-10) + np.testing.assert_allclose(pieces_nz3.varxi, full_nz.varxi, rtol=1.e-7) + + # Try this with corr2 + lens_cat.write(os.path.join('data','nz_wpos_lens.fits')) + for i, sc in enumerate(source_cats2): + sc.write(os.path.join('data','nz_wpos_source%d.fits'%i)) + config = treecorr.read_config('configs/nz_wpos.yaml') + config['verbose'] = 0 + treecorr.corr2(config) + data = fitsio.read(config['nz_file_name']) + print('data.dtype = ',data.dtype) + np.testing.assert_allclose(data['meanr'], pieces_nz3.meanr) + np.testing.assert_allclose(data['meanlogr'], pieces_nz3.meanlogr) + np.testing.assert_allclose(data['weight'], pieces_nz3.weight) + np.testing.assert_allclose(data['z_real'], pieces_nz3.xi) + np.testing.assert_allclose(data['z_imag'], pieces_nz3.xi_im) + np.testing.assert_allclose(data['sigma']**2, pieces_nz3.varxi) + + +@timer +def test_varxi(): + # Test that varxi is correct (or close) based on actual variance of many runs. + + z0 = 0.05 + 1j*0.05 + r0 = 10. + L = 10 * r0 + rng = np.random.RandomState(8675309) + + nsource = 1000 + nrand = 10 + nruns = 50000 + lens = treecorr.Catalog(x=[0], y=[0]) + + file_name = 'data/test_varxi_nz.npz' + print(file_name) + if not os.path.isfile(file_name): + all_nzs = [] + all_rzs = [] + for run in range(nruns): + print(f'{run}/{nruns}') + x2 = (rng.random_sample(nsource)-0.5) * L + y2 = (rng.random_sample(nsource)-0.5) * L + x3 = (rng.random_sample(nrand)-0.5) * L + y3 = (rng.random_sample(nrand)-0.5) * L + + r2 = (x2**2 + y2**2)/r0**2 + zz = z0 * np.exp(-r2/2.) * (1.-r2/2) + z1 = np.real(zz) + z2 = np.imag(zz) + # This time, add some shape noise (different each run). + z1 += rng.normal(0, 0.1, size=nsource) + z2 += rng.normal(0, 0.1, size=nsource) + # Varied weights are hard, but at least check that non-unit weights work correctly. + w = np.ones_like(x2) * 5 + + source = treecorr.Catalog(x=x2, y=y2, w=w, z1=z1, z2=z2) + rand = treecorr.Catalog(x=x3, y=y3) + nz = treecorr.NZCorrelation(bin_size=0.3, min_sep=6., max_sep=15., angle_slop=0.3) + rz = treecorr.NZCorrelation(bin_size=0.3, min_sep=6., max_sep=15., angle_slop=0.3) + nz.process(lens, source) + rz.process(rand, source) + all_nzs.append(nz) + all_rzs.append(rz) + + all_xis = [nz.calculateXi() for nz in all_nzs] + var_xi_1 = np.var([xi[0] for xi in all_xis], axis=0) + mean_varxi_1 = np.mean([xi[2] for xi in all_xis], axis=0) + + all_xis = [nz.calculateXi(rz=rz) for (nz,rz) in zip(all_nzs, all_rzs)] + var_xi_2 = np.var([xi[0] for xi in all_xis], axis=0) + mean_varxi_2 = np.mean([xi[2] for xi in all_xis], axis=0) + + np.savez(file_name, + var_xi_1=var_xi_1, mean_varxi_1=mean_varxi_1, + var_xi_2=var_xi_2, mean_varxi_2=mean_varxi_2) + + data = np.load(file_name) + mean_varxi_1 = data['mean_varxi_1'] + var_xi_1 = data['var_xi_1'] + mean_varxi_2 = data['mean_varxi_2'] + var_xi_2 = data['var_xi_2'] + + print('nruns = ',nruns) + print('Uncompensated:') + print('mean_varxi = ',mean_varxi_1) + print('var_xi = ',var_xi_1) + print('ratio = ',var_xi_1 / mean_varxi_1) + print('max relerr for xi = ',np.max(np.abs((var_xi_1 - mean_varxi_1)/var_xi_1))) + print('diff = ',var_xi_1 - mean_varxi_1) + np.testing.assert_allclose(mean_varxi_1, var_xi_1, rtol=0.02) + + print('Compensated:') + print('mean_varxi = ',mean_varxi_2) + print('var_xi = ',var_xi_2) + print('ratio = ',var_xi_2 / mean_varxi_2) + print('max relerr for xi = ',np.max(np.abs((var_xi_2 - mean_varxi_2)/var_xi_2))) + print('diff = ',var_xi_2 - mean_varxi_2) + np.testing.assert_allclose(mean_varxi_2, var_xi_2, rtol=0.04) + + # Now the actual test that's based on current code, not just from the saved file. + # There is a bit more noise on a singe run, so the tolerance needs to be somewhat higher. + x2 = (rng.random_sample(nsource)-0.5) * L + y2 = (rng.random_sample(nsource)-0.5) * L + x3 = (rng.random_sample(nrand)-0.5) * L + y3 = (rng.random_sample(nrand)-0.5) * L + + r2 = (x2**2 + y2**2)/r0**2 + zz = z0 * np.exp(-r2/2.) * (1.-r2/2.) + z1 = np.real(zz) + z2 = np.imag(zz) + z1 += rng.normal(0, 0.1, size=nsource) + z2 += rng.normal(0, 0.1, size=nsource) + w = np.ones_like(x2) * 5 + + source = treecorr.Catalog(x=x2, y=y2, w=w, z1=z1, z2=z2) + rand = treecorr.Catalog(x=x3, y=y3) + nz = treecorr.NZCorrelation(bin_size=0.3, min_sep=6., max_sep=15., angle_slop=0.3) + rz = treecorr.NZCorrelation(bin_size=0.3, min_sep=6., max_sep=15., angle_slop=0.3) + nz.process(lens, source) + rz.process(rand, source) + + print('single run:') + print('Uncompensated') + print('ratio = ',nz.varxi / var_xi_1) + print('max relerr for xi = ',np.max(np.abs((nz.varxi - var_xi_1)/var_xi_1))) + np.testing.assert_allclose(nz.varxi, var_xi_1, rtol=0.6) + + xi, xi_im, varxi = nz.calculateXi(rz=rz) + print('Compensated') + print('ratio = ',varxi / var_xi_2) + print('max relerr for xi = ',np.max(np.abs((varxi - var_xi_2)/var_xi_2))) + np.testing.assert_allclose(varxi, var_xi_2, rtol=0.6) + +@timer +def test_jk(): + + # Similar to the profile we use above, but multiple "lenses". + z0 = 0.05 + 1j*0.05 + r0 = 30. + L = 30 * r0 + rng = np.random.RandomState(8675309) + + nsource = 100000 + nrand = 1000 + nlens = 300 + nruns = 1000 + npatch = 64 + + corr_params = dict(bin_size=0.3, min_sep=5, max_sep=30, bin_slop=0.1) + + def make_field(rng): + x1 = (rng.random(nlens)-0.5) * L + y1 = (rng.random(nlens)-0.5) * L + w = rng.random(nlens) + 10 + x2 = (rng.random(nsource)-0.5) * L + y2 = (rng.random(nsource)-0.5) * L + x3 = (rng.random(nrand)-0.5) * L + y3 = (rng.random(nrand)-0.5) * L + + # Start with just the noise + z1 = rng.normal(0, 0.1, size=nsource) + z2 = rng.normal(0, 0.1, size=nsource) + + # Add in the signal from all lenses + for i in range(nlens): + x2i = x2 - x1[i] + y2i = y2 - y1[i] + r2 = (x2i**2 + y2i**2)/r0**2 + zz = w[i] * z0 * np.exp(-r2/2.) * (1-r2/2) + z1 += np.real(zz) + z2 += np.imag(zz) + return x1, y1, w, x2, y2, z1, z2, x3, y3 + + file_name = 'data/test_nz_jk_{}.npz'.format(nruns) + print(file_name) + if not os.path.isfile(file_name): + all_nzs = [] + all_rzs = [] + rng = np.random.default_rng() + for run in range(nruns): + x1, y1, w, x2, y2, z1, z2, x3, y3 = make_field(rng) + print(run,': ',np.mean(z1),np.std(z1),np.min(z1),np.max(z1)) + cat1 = treecorr.Catalog(x=x1, y=y1, w=w) + cat2 = treecorr.Catalog(x=x2, y=y2, z1=z1, z2=z2) + cat3 = treecorr.Catalog(x=x3, y=y3) + nz = treecorr.NZCorrelation(corr_params) + rz = treecorr.NZCorrelation(corr_params) + nz.process(cat1, cat2) + rz.process(cat3, cat2) + all_nzs.append(nz) + all_rzs.append(rz) + + mean_xi = np.mean([nz.xi for nz in all_nzs], axis=0) + var_xi = np.var([nz.xi for nz in all_nzs], axis=0) + mean_varxi = np.mean([nz.varxi for nz in all_nzs], axis=0) + + for nz, rz in zip(all_nzs, all_rzs): + nz.calculateXi(rz=rz) + + mean_xi_r = np.mean([nz.xi for nz in all_nzs], axis=0) + var_xi_r = np.var([nz.xi for nz in all_nzs], axis=0) + mean_varxi_r = np.mean([nz.varxi for nz in all_nzs], axis=0) + + np.savez(file_name, + mean_xi=mean_xi, var_xi=var_xi, mean_varxi=mean_varxi, + mean_xi_r=mean_xi_r, var_xi_r=var_xi_r, mean_varxi_r=mean_varxi_r) + + data = np.load(file_name) + mean_xi = data['mean_xi'] + mean_varxi = data['mean_varxi'] + var_xi = data['var_xi'] + + print('mean_xi = ',mean_xi) + print('mean_varxi = ',mean_varxi) + print('var_xi = ',var_xi) + print('ratio = ',var_xi / mean_varxi) + + rng = np.random.default_rng(1234) + x1, y1, w, x2, y2, z1, z2, x3, y3 = make_field(rng) + + cat1 = treecorr.Catalog(x=x1, y=y1, w=w) + cat2 = treecorr.Catalog(x=x2, y=y2, z1=z1, z2=z2) + nz1 = treecorr.NZCorrelation(corr_params) + t0 = time.time() + nz1.process(cat1, cat2) + t1 = time.time() + print('Time for non-patch processing = ',t1-t0) + + print('weight = ',nz1.weight) + print('xi = ',nz1.xi) + print('varxi = ',nz1.varxi) + print('pullsq for xi = ',(nz1.xi-mean_xi)**2/var_xi) + print('max pull for xi = ',np.sqrt(np.max((nz1.xi-mean_xi)**2/var_xi))) + np.testing.assert_array_less((nz1.xi-mean_xi)**2, 9*var_xi) # < 3 sigma pull + np.testing.assert_allclose(nz1.varxi, mean_varxi, rtol=0.1) + + # Now run with patches, but still with shot variance. Should be basically the same answer. + try: + import fitsio + patch_dir = 'output' + low_mem = True + except ImportError: + # If we cannot write to a fits file, skip the save_patch_dir tests. + patch_dir = None + low_mem = False + cat2p = treecorr.Catalog(x=x2, y=y2, z1=z1, z2=z2, npatch=npatch, save_patch_dir=patch_dir) + if low_mem: + cat2p.write_patches() # Force rewrite of any existing saved patches. + cat1p = treecorr.Catalog(x=x1, y=y1, w=w, patch_centers=cat2p.patch_centers) + print('tot w = ',np.sum(w)) + print('Patch\tNlens\tNsource') + for i in range(npatch): + print('%d\t%d\t%d'%(i,np.sum(cat1p.w[cat1p.patch==i]),np.sum(cat2p.w[cat2p.patch==i]))) + nz2 = treecorr.NZCorrelation(corr_params) + t0 = time.time() + nz2.process(cat1p, cat2p) + t1 = time.time() + print('Time for patch processing = ',t1-t0) + print('weight = ',nz2.weight) + print('xi = ',nz2.xi) + print('xi1 = ',nz1.xi) + print('varxi = ',nz2.varxi) + print('ratio = ',nz2.xi/nz1.xi) + np.testing.assert_allclose(nz2.weight, nz1.weight, rtol=1.e-2) + np.testing.assert_allclose(nz2.xi, nz1.xi, rtol=1.e-2) + np.testing.assert_allclose(nz2.varxi, nz1.varxi, rtol=1.e-2) + + # estimate_cov with var_method='shot' returns just the diagonal. + np.testing.assert_allclose(nz2.estimate_cov('shot'), nz2.varxi) + np.testing.assert_allclose(nz1.estimate_cov('shot'), nz1.varxi) + + # Now try jackknife variance estimate. + t0 = time.time() + cov2 = nz2.estimate_cov('jackknife') + t1 = time.time() + print('Time to calculate jackknife covariance = ',t1-t0) + print('varxi = ',np.diagonal(cov2)) + print('cf var_xi = ',var_xi) + np.testing.assert_allclose(np.diagonal(cov2), var_xi, rtol=0.5) + + # Check only using patches for one of the two catalogs. + # Not as good as using patches for both, but not much worse. + nz3 = treecorr.NZCorrelation(corr_params, var_method='jackknife') + t0 = time.time() + nz3.process(cat1p, cat2) + t1 = time.time() + print('Time for only patches for cat1 processing = ',t1-t0) + print('varxi = ',nz3.varxi) + np.testing.assert_allclose(nz3.weight, nz1.weight, rtol=1.e-2) + np.testing.assert_allclose(nz3.xi, nz1.xi, rtol=1.e-2) + np.testing.assert_allclose(nz3.varxi, var_xi, rtol=0.5) + + nz4 = treecorr.NZCorrelation(corr_params, var_method='jackknife', rng=rng) + t0 = time.time() + nz4.process(cat1, cat2p) + t1 = time.time() + print('Time for only patches for cat2 processing = ',t1-t0) + print('varxi = ',nz4.varxi) + np.testing.assert_allclose(nz4.weight, nz1.weight, rtol=1.e-2) + np.testing.assert_allclose(nz4.xi, nz1.xi, rtol=1.e-2) + np.testing.assert_allclose(nz4.varxi, var_xi, rtol=0.6) + + # Use initialize/finalize + nz5 = treecorr.NZCorrelation(corr_params) + for k1, p1 in enumerate(cat1p.get_patches()): + for k2, p2 in enumerate(cat2p.get_patches()): + nz5.process(p1, p2, initialize=(k1==k2==0), finalize=(k1==k2==npatch-1)) + np.testing.assert_allclose(nz5.xi, nz2.xi) + np.testing.assert_allclose(nz5.weight, nz2.weight) + np.testing.assert_allclose(nz5.varxi, nz2.varxi) + + # Check that these still work after roundtripping through a file. + try: + import fitsio + except ImportError: + pass + else: + file_name = os.path.join('output','test_write_results_nz.fits') + nz2.write(file_name, write_patch_results=True) + nz5 = treecorr.NZCorrelation.from_file(file_name) + cov5 = nz5.estimate_cov('jackknife') + np.testing.assert_allclose(cov5, cov2) + + # Use a random catalog + mean_xi_r = data['mean_xi_r'] + mean_varxi_r = data['mean_varxi_r'] + var_xi_r = data['var_xi_r'] + + print('mean_xi = ',mean_xi_r) + print('mean_varxi = ',mean_varxi_r) + print('var_xi = ',var_xi_r) + print('ratio = ',var_xi_r / mean_varxi_r) + + cat3 = treecorr.Catalog(x=x3, y=y3) + rz5 = treecorr.NZCorrelation(corr_params) + rz5.process(cat3, cat2) + nz5 = nz1.copy() + nz5.calculateXi(rz=rz5) + print('weight = ',nz5.weight) + print('xi = ',nz5.xi) + print('varxi = ',nz5.varxi) + print('ratio = ',nz5.varxi / var_xi_r) + print('pullsq for xi = ',(nz5.xi-mean_xi_r)**2/var_xi_r) + print('max pull for xi = ',np.sqrt(np.max((nz5.xi-mean_xi_r)**2/var_xi_r))) + np.testing.assert_array_less((nz5.xi-mean_xi_r)**2, 9*var_xi_r) # < 3 sigma pull + np.testing.assert_allclose(nz5.varxi, mean_varxi_r, rtol=0.1) + + # Repeat with patches + cat3p = treecorr.Catalog(x=x3, y=y3, patch_centers=cat2p.patch_centers) + rz6 = treecorr.NZCorrelation(corr_params) + rz6.process(cat3p, cat2p, low_mem=low_mem) + nz6 = nz2.copy() + nz6.calculateXi(rz=rz6) + cov6 = nz6.estimate_cov('jackknife') + np.testing.assert_allclose(np.diagonal(cov6), var_xi_r, rtol=0.5) + + # Use a random catalog without patches. + rz7 = treecorr.NZCorrelation(corr_params) + rz7.process(cat3, cat2p) + nz7 = nz4.copy() + nz7.calculateXi(rz=rz7) + cov7 = nz7.estimate_cov('jackknife') + np.testing.assert_allclose(np.diagonal(cov7), var_xi_r, rtol=0.7) + + nz8 = nz2.copy() + nz8.calculateXi(rz=rz7) + cov8 = nz8.estimate_cov('jackknife') + np.testing.assert_allclose(np.diagonal(cov8), var_xi_r, rtol=0.5) + + # Check some invalid actions + # Bad var_method + with assert_raises(ValueError): + nz2.estimate_cov('invalid') + # Not run on patches, but need patches + with assert_raises(ValueError): + nz1.estimate_cov('jackknife') + with assert_raises(ValueError): + nz1.estimate_cov('sample') + with assert_raises(ValueError): + nz1.estimate_cov('marked_bootstrap') + with assert_raises(ValueError): + nz1.estimate_cov('bootstrap') + # rz also needs patches (at least for the g part). + with assert_raises(RuntimeError): + nz2.calculateXi(rz=nz1) + + cat1a = treecorr.Catalog(x=x1[:100], y=y1[:100], npatch=10) + cat2a = treecorr.Catalog(x=x2[:100], y=y2[:100], z1=z1[:100], z2=z2[:100], npatch=10) + cat1b = treecorr.Catalog(x=x1[:100], y=y1[:100], npatch=2) + cat2b = treecorr.Catalog(x=x2[:100], y=y2[:100], z1=z1[:100], z2=z2[:100], npatch=2) + nz6 = treecorr.NZCorrelation(corr_params) + nz7 = treecorr.NZCorrelation(corr_params) + # All catalogs need to have the same number of patches + with assert_raises(RuntimeError): + nz6.process(cat1a,cat2b) + with assert_raises(RuntimeError): + nz7.process(cat1b,cat2a) + + +if __name__ == '__main__': + test_direct() + test_direct_spherical() + test_single() + test_nz() + test_pieces() + test_varxi() + test_jk() diff --git a/tests/test_patch.py b/tests/test_patch.py index c35a6c0c..116417e3 100644 --- a/tests/test_patch.py +++ b/tests/test_patch.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following @@ -16,10 +16,7 @@ import coord import time import treecorr -try: - import cPickle as pickle -except ImportError: - import pickle +import pickle from test_helper import assert_raises, do_pickle, timer, get_from_wiki, CaptureLog, clear_save diff --git a/tests/test_patch3pt.py b/tests/test_patch3pt.py index b7e45be1..b6dd5bc6 100644 --- a/tests/test_patch3pt.py +++ b/tests/test_patch3pt.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following @@ -16,10 +16,7 @@ import coord import time import treecorr -try: - import cPickle as pickle -except ImportError: - import pickle +import pickle from test_helper import assert_raises, do_pickle, timer, get_from_wiki, CaptureLog, clear_save from test_helper import profile @@ -511,7 +508,7 @@ def test_kkk_logruv_jk(): cov = kkkp.estimate_cov('jackknife') print(np.diagonal(cov)) print('max log(ratio) = ',np.max(np.abs(np.log(np.diagonal(cov))-np.log(var_kkk)))) - np.testing.assert_allclose(np.log(np.diagonal(cov)), np.log(var_kkk), atol=0.3*tol_factor) + np.testing.assert_allclose(np.log(np.diagonal(cov)), np.log(var_kkk), atol=0.5*tol_factor) print('sample:') cov = kkkp.estimate_cov('sample') @@ -541,7 +538,7 @@ def test_kkk_logruv_jk(): cov = kkkp.estimate_cov('jackknife') print(np.diagonal(cov)) print('max log(ratio) = ',np.max(np.abs(np.log(np.diagonal(cov))-np.log(var_kkk)))) - np.testing.assert_allclose(np.log(np.diagonal(cov)), np.log(var_kkk), atol=0.3*tol_factor) + np.testing.assert_allclose(np.log(np.diagonal(cov)), np.log(var_kkk), atol=0.5*tol_factor) print('sample:') cov = kkkp.estimate_cov('sample') @@ -571,7 +568,7 @@ def test_kkk_logruv_jk(): cov = kkkp.estimate_cov('jackknife') print(np.diagonal(cov)) print('max log(ratio) = ',np.max(np.abs(np.log(np.diagonal(cov))-np.log(var_kkk)))) - np.testing.assert_allclose(np.log(np.diagonal(cov)), np.log(var_kkk), atol=0.3*tol_factor) + np.testing.assert_allclose(np.log(np.diagonal(cov)), np.log(var_kkk), atol=0.5*tol_factor) print('sample:') cov = kkkp.estimate_cov('sample') diff --git a/tests/test_periodic.py b/tests/test_periodic.py index 6dd50d2c..7863cbb2 100644 --- a/tests/test_periodic.py +++ b/tests/test_periodic.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following @@ -15,6 +15,7 @@ import time import os import treecorr +import warnings from test_helper import assert_raises, timer @@ -248,15 +249,14 @@ def test_halotools(): # Note: halotools as of version 0.6 use astropy.extern.six, which is deprecated. # Ignore the warning that is emitted about this. And in later astropy versions, it # now raises a ModuleNotFoundError. So put it inside this try block. - import warnings with warnings.catch_warnings(): warnings.simplefilter('ignore', category=AstropyWarning) from halotools.mock_observables import npairs_3d except ImportError: + warnings.warn("Skipping some tests because halotools or astropy is not installed.") print('Skipping test_halotools, since either halotools or astropy is not installed.') return - # Compare the Periodic metric with the same calculation in halotools # This first bit is directly from the documentation for halotools.npairs_3d # https://halotools.readthedocs.io/en/latest/api/halotools.mock_observables.npairs_3d.html diff --git a/tests/test_qq.py b/tests/test_qq.py index 9a391eb5..b90fef41 100644 --- a/tests/test_qq.py +++ b/tests/test_qq.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following @@ -198,6 +198,19 @@ def test_direct(): np.testing.assert_allclose(qq3b.xim, qq.xim) np.testing.assert_allclose(qq3b.xim_im, qq.xim_im) + # or using the Corr2 base class + with CaptureLog() as cl: + qq3c = treecorr.Corr2.from_file(ascii_name, logger=cl.logger) + assert ascii_name in cl.output + np.testing.assert_allclose(qq3c.npairs, qq.npairs) + np.testing.assert_allclose(qq3c.weight, qq.weight) + np.testing.assert_allclose(qq3c.meanr, qq.meanr) + np.testing.assert_allclose(qq3c.meanlogr, qq.meanlogr) + np.testing.assert_allclose(qq3c.xip, qq.xip) + np.testing.assert_allclose(qq3c.xip_im, qq.xip_im) + np.testing.assert_allclose(qq3c.xim, qq.xim) + np.testing.assert_allclose(qq3c.xim_im, qq.xim_im) + try: import fitsio except ImportError: @@ -427,11 +440,6 @@ def test_qq(): verbose=1) qq.process(cat) - qq2 = treecorr.QQCorrelation(bin_size=0.1, min_sep=10., max_sep=100., sep_units='arcmin') - qq2.process(cat, num_threads=1) - qq.process(cat, num_threads=1) - assert qq2 == qq - # log() != , but it should be close: print('meanlogr - log(meanr) = ',qq.meanlogr - np.log(qq.meanr)) np.testing.assert_allclose(qq.meanlogr, np.log(qq.meanr), atol=1.e-3) diff --git a/tests/test_reader.py b/tests/test_reader.py index c5e9e878..0d24e28a 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following @@ -14,6 +14,7 @@ import os import sys import numpy as np +import warnings from unittest import mock from treecorr.reader import FitsReader, HdfReader, PandasReader, AsciiReader, ParquetReader @@ -26,6 +27,8 @@ def test_fits_reader(): try: import fitsio except ImportError: + # Just once emit a real warning, so it shows up when running pytest. + warnings.warn("Skipping some tests because fitsio is not installed.") print('Skip test_fits_reader, since fitsio not installed') return @@ -172,6 +175,7 @@ def test_hdf_reader(): try: import h5py # noqa: F401 except ImportError: + warnings.warn("Skipping some tests because h5py is not installed.") print('Skipping HdfReader tests, since h5py not installed.') return @@ -282,6 +286,7 @@ def test_parquet_reader(): import pandas # noqa: F401 import pyarrow # noqa: F401 except ImportError: + warnings.warn("Skipping some tests because pyarrow is not installed.") print('Skipping ParquetReader tests, since pandas or pyarrow not installed.') return @@ -547,6 +552,7 @@ def test_pandas_reader(): try: import pandas # noqa: F401 except ImportError: + warnings.warn("Skipping some tests because pandas is not installed.") print('Skipping PandasReader tests, since pandas not installed.') return diff --git a/tests/test_rperp.py b/tests/test_rperp.py index 241dd803..9a29ae60 100644 --- a/tests/test_rperp.py +++ b/tests/test_rperp.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following diff --git a/tests/test_tt.py b/tests/test_tt.py index 810d6349..3d20a279 100644 --- a/tests/test_tt.py +++ b/tests/test_tt.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following @@ -197,6 +197,19 @@ def test_direct(): np.testing.assert_allclose(tt3b.xim, tt.xim) np.testing.assert_allclose(tt3b.xim_im, tt.xim_im) + # or using the Corr2 base class + with CaptureLog() as cl: + tt3c = treecorr.Corr2.from_file(ascii_name, logger=cl.logger) + assert ascii_name in cl.output + np.testing.assert_allclose(tt3c.npairs, tt.npairs) + np.testing.assert_allclose(tt3c.weight, tt.weight) + np.testing.assert_allclose(tt3c.meanr, tt.meanr) + np.testing.assert_allclose(tt3c.meanlogr, tt.meanlogr) + np.testing.assert_allclose(tt3c.xip, tt.xip) + np.testing.assert_allclose(tt3c.xip_im, tt.xip_im) + np.testing.assert_allclose(tt3c.xim, tt.xim) + np.testing.assert_allclose(tt3c.xim_im, tt.xim_im) + try: import fitsio except ImportError: @@ -426,11 +439,6 @@ def test_tt(): verbose=1) tt.process(cat) - tt2 = treecorr.TTCorrelation(bin_size=0.1, min_sep=10., max_sep=100., sep_units='arcmin') - tt2.process(cat, num_threads=1) - tt.process(cat, num_threads=1) - assert tt2 == tt - # log() != , but it should be close: print('meanlogr - log(meanr) = ',tt.meanlogr - np.log(tt.meanr)) np.testing.assert_allclose(tt.meanlogr, np.log(tt.meanr), atol=1.e-3) diff --git a/tests/test_twod.py b/tests/test_twod.py index 469a0b14..95d60c85 100644 --- a/tests/test_twod.py +++ b/tests/test_twod.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following diff --git a/tests/test_vv.py b/tests/test_vv.py index 8e3214e0..887094b6 100644 --- a/tests/test_vv.py +++ b/tests/test_vv.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following @@ -197,6 +197,19 @@ def test_direct(): np.testing.assert_allclose(vv3b.xim, vv.xim) np.testing.assert_allclose(vv3b.xim_im, vv.xim_im) + # or using the Corr2 base class + with CaptureLog() as cl: + vv3c = treecorr.Corr2.from_file(ascii_name, logger=cl.logger) + assert ascii_name in cl.output + np.testing.assert_allclose(vv3c.npairs, vv.npairs) + np.testing.assert_allclose(vv3c.weight, vv.weight) + np.testing.assert_allclose(vv3c.meanr, vv.meanr) + np.testing.assert_allclose(vv3c.meanlogr, vv.meanlogr) + np.testing.assert_allclose(vv3c.xip, vv.xip) + np.testing.assert_allclose(vv3c.xip_im, vv.xip_im) + np.testing.assert_allclose(vv3c.xim, vv.xim) + np.testing.assert_allclose(vv3c.xim_im, vv.xim_im) + try: import fitsio except ImportError: @@ -428,11 +441,6 @@ def test_vv(): verbose=1) vv.process(cat) - vv2 = treecorr.VVCorrelation(bin_size=0.1, min_sep=1., max_sep=100., sep_units='arcmin') - vv2.process(cat, num_threads=1) - vv.process(cat, num_threads=1) - assert vv2 == vv - # log() != , but it should be close: print('meanlogr - log(meanr) = ',vv.meanlogr - np.log(vv.meanr)) np.testing.assert_allclose(vv.meanlogr, np.log(vv.meanr), atol=1.e-3) diff --git a/tests/test_zz.py b/tests/test_zz.py new file mode 100644 index 00000000..db26e828 --- /dev/null +++ b/tests/test_zz.py @@ -0,0 +1,882 @@ +# Copyright (c) 2003-2024 by Mike Jarvis +# +# TreeCorr is free software: redistribution and use in source and binary forms, +# with or without modification, are permitted provided that the following +# conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions, and the disclaimer given in the accompanying LICENSE +# file. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions, and the disclaimer given in the documentation +# and/or other materials provided with the distribution. + +import numpy as np +import os +import time +import coord +import treecorr + +from test_helper import get_from_wiki, do_pickle, CaptureLog +from test_helper import assert_raises, timer, assert_warns + + +@timer +def test_direct(): + # If the catalogs are small enough, we can do a direct calculation to see if comes out right. + # This should exactly match the treecorr result if brute_force=True + + ngal = 200 + s = 10. + rng = np.random.RandomState(8675309) + x1 = rng.normal(0,s, (ngal,) ) + y1 = rng.normal(0,s, (ngal,) ) + w1 = rng.random_sample(ngal) + z11 = rng.normal(0,0.2, (ngal,) ) + z21 = rng.normal(0,0.2, (ngal,) ) + + x2 = rng.normal(0,s, (ngal,) ) + y2 = rng.normal(0,s, (ngal,) ) + w2 = rng.random_sample(ngal) + z12 = rng.normal(0,0.2, (ngal,) ) + z22 = rng.normal(0,0.2, (ngal,) ) + + cat1 = treecorr.Catalog(x=x1, y=y1, w=w1, z1=z11, z2=z21) + cat2 = treecorr.Catalog(x=x2, y=y2, w=w2, z1=z12, z2=z22) + + min_sep = 1. + max_sep = 50. + nbins = 50 + bin_size = np.log(max_sep/min_sep) / nbins + zz = treecorr.ZZCorrelation(min_sep=min_sep, max_sep=max_sep, nbins=nbins, brute=True) + zz.process(cat1, cat2) + + true_npairs = np.zeros(nbins, dtype=int) + true_weight = np.zeros(nbins, dtype=float) + true_xip = np.zeros(nbins, dtype=complex) + true_xim = np.zeros(nbins, dtype=complex) + for i in range(ngal): + # It's hard to do all the pairs at once with numpy operations (although maybe possible). + # But we can at least do all the pairs for each entry in cat1 at once with arrays. + rsq = (x1[i]-x2)**2 + (y1[i]-y2)**2 + r = np.sqrt(rsq) + + ww = w1[i] * w2 + xip = ww * (z11[i] + 1j*z21[i]) * (z12 - 1j*z22) + xim = ww * (z11[i] + 1j*z21[i]) * (z12 + 1j*z22) + + index = np.floor(np.log(r/min_sep) / bin_size).astype(int) + mask = (index >= 0) & (index < nbins) + np.add.at(true_npairs, index[mask], 1) + np.add.at(true_weight, index[mask], ww[mask]) + np.add.at(true_xip, index[mask], xip[mask]) + np.add.at(true_xim, index[mask], xim[mask]) + + true_xip /= true_weight + true_xim /= true_weight + + print('true_npairs = ',true_npairs) + print('diff = ',zz.npairs - true_npairs) + np.testing.assert_array_equal(zz.npairs, true_npairs) + + print('true_weight = ',true_weight) + print('diff = ',zz.weight - true_weight) + np.testing.assert_allclose(zz.weight, true_weight, rtol=1.e-6, atol=1.e-8) + + print('true_xip = ',true_xip) + print('zz.xip = ',zz.xip) + print('zz.xip_im = ',zz.xip_im) + np.testing.assert_allclose(zz.xip, true_xip.real, rtol=1.e-6, atol=1.e-8) + np.testing.assert_allclose(zz.xip_im, true_xip.imag, rtol=1.e-6, atol=1.e-8) + print('true_xim = ',true_xim) + print('zz.xim = ',zz.xim) + print('zz.xim_im = ',zz.xim_im) + np.testing.assert_allclose(zz.xim, true_xim.real, rtol=1.e-6, atol=1.e-8) + np.testing.assert_allclose(zz.xim_im, true_xim.imag, rtol=1.e-6, atol=1.e-8) + + # Check that running via the corr2 script works correctly. + config = treecorr.config.read_config('configs/zz_direct.yaml') + try: + import fitsio + except ImportError: + pass + else: + cat1.write(config['file_name']) + cat2.write(config['file_name2']) + treecorr.corr2(config) + data = fitsio.read(config['zz_file_name']) + np.testing.assert_allclose(data['r_nom'], zz.rnom) + np.testing.assert_allclose(data['npairs'], zz.npairs) + np.testing.assert_allclose(data['weight'], zz.weight) + np.testing.assert_allclose(data['xip'], zz.xip) + np.testing.assert_allclose(data['xip_im'], zz.xip_im) + np.testing.assert_allclose(data['xim'], zz.xim) + np.testing.assert_allclose(data['xim_im'], zz.xim_im) + + # Repeat with binslop = 0. + # And don't do any top-level recursion so we actually test not going to the leaves. + zz = treecorr.ZZCorrelation(min_sep=min_sep, max_sep=max_sep, nbins=nbins, bin_slop=0, + max_top=0) + zz.process(cat1, cat2) + np.testing.assert_array_equal(zz.npairs, true_npairs) + np.testing.assert_allclose(zz.weight, true_weight, rtol=1.e-6, atol=1.e-8) + np.testing.assert_allclose(zz.xip, true_xip.real, rtol=1.e-6, atol=1.e-8) + np.testing.assert_allclose(zz.xip_im, true_xip.imag, rtol=1.e-6, atol=1.e-8) + print('true_xim = ',true_xim) + print('zz.xim = ',zz.xim) + print('zz.xim_im = ',zz.xim_im) + print('diff = ',zz.xim - true_xim.real) + print('max diff = ',np.max(np.abs(zz.xim - true_xim.real))) + print('rel diff = ',(zz.xim - true_xim.real)/true_xim.real) + np.testing.assert_allclose(zz.xim, true_xim.real, atol=3.e-4) + np.testing.assert_allclose(zz.xim_im, true_xim.imag, atol=1.e-4) + + # With angle_slop = 0, it goes back to being basically exact (to single precision). + zz = treecorr.ZZCorrelation(min_sep=min_sep, max_sep=max_sep, nbins=nbins, bin_slop=0, + angle_slop=0, max_top=0) + zz.process(cat1, cat2) + np.testing.assert_array_equal(zz.npairs, true_npairs) + np.testing.assert_allclose(zz.weight, true_weight, rtol=1.e-6, atol=1.e-8) + np.testing.assert_allclose(zz.xip, true_xip.real, rtol=1.e-6, atol=1.e-8) + np.testing.assert_allclose(zz.xip_im, true_xip.imag, rtol=1.e-6, atol=1.e-8) + np.testing.assert_allclose(zz.xim, true_xim.real, rtol=1.e-6, atol=1.e-8) + np.testing.assert_allclose(zz.xim_im, true_xim.imag, rtol=1.e-6, atol=1.e-8) + + # Check a few basic operations with a ZZCorrelation object. + do_pickle(zz) + + zz2 = zz.copy() + zz2 += zz + np.testing.assert_allclose(zz2.npairs, 2*zz.npairs) + np.testing.assert_allclose(zz2.weight, 2*zz.weight) + np.testing.assert_allclose(zz2.meanr, 2*zz.meanr) + np.testing.assert_allclose(zz2.meanlogr, 2*zz.meanlogr) + np.testing.assert_allclose(zz2.xip, 2*zz.xip) + np.testing.assert_allclose(zz2.xip_im, 2*zz.xip_im) + np.testing.assert_allclose(zz2.xim, 2*zz.xim) + np.testing.assert_allclose(zz2.xim_im, 2*zz.xim_im) + + zz2.clear() + zz2 += zz + np.testing.assert_allclose(zz2.npairs, zz.npairs) + np.testing.assert_allclose(zz2.weight, zz.weight) + np.testing.assert_allclose(zz2.meanr, zz.meanr) + np.testing.assert_allclose(zz2.meanlogr, zz.meanlogr) + np.testing.assert_allclose(zz2.xip, zz.xip) + np.testing.assert_allclose(zz2.xip_im, zz.xip_im) + np.testing.assert_allclose(zz2.xim, zz.xim) + np.testing.assert_allclose(zz2.xim_im, zz.xim_im) + + ascii_name = 'output/zz_ascii.txt' + zz.write(ascii_name, precision=16) + zz3 = treecorr.ZZCorrelation(min_sep=min_sep, max_sep=max_sep, nbins=nbins, bin_type='Log') + zz3.read(ascii_name) + np.testing.assert_allclose(zz3.npairs, zz.npairs) + np.testing.assert_allclose(zz3.weight, zz.weight) + np.testing.assert_allclose(zz3.meanr, zz.meanr) + np.testing.assert_allclose(zz3.meanlogr, zz.meanlogr) + np.testing.assert_allclose(zz3.xip, zz.xip) + np.testing.assert_allclose(zz3.xip_im, zz.xip_im) + np.testing.assert_allclose(zz3.xim, zz.xim) + np.testing.assert_allclose(zz3.xim_im, zz.xim_im) + + # Check that the repr is minimal + assert repr(zz3) == f'ZZCorrelation(min_sep={min_sep}, max_sep={max_sep}, nbins={nbins})' + + # Simpler API using from_file: + with CaptureLog() as cl: + zz3b = treecorr.ZZCorrelation.from_file(ascii_name, logger=cl.logger) + assert ascii_name in cl.output + np.testing.assert_allclose(zz3b.npairs, zz.npairs) + np.testing.assert_allclose(zz3b.weight, zz.weight) + np.testing.assert_allclose(zz3b.meanr, zz.meanr) + np.testing.assert_allclose(zz3b.meanlogr, zz.meanlogr) + np.testing.assert_allclose(zz3b.xip, zz.xip) + np.testing.assert_allclose(zz3b.xip_im, zz.xip_im) + np.testing.assert_allclose(zz3b.xim, zz.xim) + np.testing.assert_allclose(zz3b.xim_im, zz.xim_im) + + # or using the Corr2 base class + with CaptureLog() as cl: + zz3c = treecorr.Corr2.from_file(ascii_name, logger=cl.logger) + assert ascii_name in cl.output + np.testing.assert_allclose(zz3c.npairs, zz.npairs) + np.testing.assert_allclose(zz3c.weight, zz.weight) + np.testing.assert_allclose(zz3c.meanr, zz.meanr) + np.testing.assert_allclose(zz3c.meanlogr, zz.meanlogr) + np.testing.assert_allclose(zz3c.xip, zz.xip) + np.testing.assert_allclose(zz3c.xip_im, zz.xip_im) + np.testing.assert_allclose(zz3c.xim, zz.xim) + np.testing.assert_allclose(zz3c.xim_im, zz.xim_im) + + try: + import fitsio + except ImportError: + pass + else: + fits_name = 'output/zz_fits.fits' + zz.write(fits_name) + zz4 = treecorr.ZZCorrelation(min_sep=min_sep, max_sep=max_sep, nbins=nbins) + zz4.read(fits_name) + np.testing.assert_allclose(zz4.npairs, zz.npairs) + np.testing.assert_allclose(zz4.weight, zz.weight) + np.testing.assert_allclose(zz4.meanr, zz.meanr) + np.testing.assert_allclose(zz4.meanlogr, zz.meanlogr) + np.testing.assert_allclose(zz4.xip, zz.xip) + np.testing.assert_allclose(zz4.xip_im, zz.xip_im) + np.testing.assert_allclose(zz4.xim, zz.xim) + np.testing.assert_allclose(zz4.xim_im, zz.xim_im) + + zz4b = treecorr.ZZCorrelation.from_file(fits_name) + np.testing.assert_allclose(zz4b.npairs, zz.npairs) + np.testing.assert_allclose(zz4b.weight, zz.weight) + np.testing.assert_allclose(zz4b.meanr, zz.meanr) + np.testing.assert_allclose(zz4b.meanlogr, zz.meanlogr) + np.testing.assert_allclose(zz4b.xip, zz.xip) + np.testing.assert_allclose(zz4b.xip_im, zz.xip_im) + np.testing.assert_allclose(zz4b.xim, zz.xim) + np.testing.assert_allclose(zz4b.xim_im, zz.xim_im) + + with assert_raises(TypeError): + zz2 += config + zz4 = treecorr.ZZCorrelation(min_sep=min_sep/2, max_sep=max_sep, nbins=nbins) + with assert_raises(ValueError): + zz2 += zz4 + zz5 = treecorr.ZZCorrelation(min_sep=min_sep, max_sep=max_sep*2, nbins=nbins) + with assert_raises(ValueError): + zz2 += zz5 + zz6 = treecorr.ZZCorrelation(min_sep=min_sep, max_sep=max_sep, nbins=nbins*2) + with assert_raises(ValueError): + zz2 += zz6 + with assert_raises(ValueError): + zz.process(cat1, cat2, patch_method='nonlocal') + +@timer +def test_direct_spherical(): + # Repeat in spherical coords + + ngal = 100 + s = 10. + rng = np.random.RandomState(8675309) + x1 = rng.normal(0,s, (ngal,) ) + y1 = rng.normal(0,s, (ngal,) ) + 200 # Put everything at large y, so small angle on sky + z1 = rng.normal(0,s, (ngal,) ) + w1 = rng.random_sample(ngal) + z11 = rng.normal(0,0.2, (ngal,) ) + z21 = rng.normal(0,0.2, (ngal,) ) + + x2 = rng.normal(0,s, (ngal,) ) + y2 = rng.normal(0,s, (ngal,) ) + 200 + z2 = rng.normal(0,s, (ngal,) ) + w2 = rng.random_sample(ngal) + z12 = rng.normal(0,0.2, (ngal,) ) + z22 = rng.normal(0,0.2, (ngal,) ) + + ra1, dec1 = coord.CelestialCoord.xyz_to_radec(x1,y1,z1) + ra2, dec2 = coord.CelestialCoord.xyz_to_radec(x2,y2,z2) + + cat1 = treecorr.Catalog(ra=ra1, dec=dec1, ra_units='rad', dec_units='rad', w=w1, z1=z11, z2=z21) + cat2 = treecorr.Catalog(ra=ra2, dec=dec2, ra_units='rad', dec_units='rad', w=w2, z1=z12, z2=z22) + + min_sep = 1. + max_sep = 10. + nbins = 50 + bin_size = np.log(max_sep/min_sep) / nbins + zz = treecorr.ZZCorrelation(min_sep=min_sep, max_sep=max_sep, nbins=nbins, + sep_units='deg', brute=True) + zz.process(cat1, cat2) + + r1 = np.sqrt(x1**2 + y1**2 + z1**2) + r2 = np.sqrt(x2**2 + y2**2 + z2**2) + x1 /= r1; y1 /= r1; z1 /= r1 + x2 /= r2; y2 /= r2; z2 /= r2 + + north_pole = coord.CelestialCoord(0*coord.radians, 90*coord.degrees) + + true_npairs = np.zeros(nbins, dtype=int) + true_weight = np.zeros(nbins, dtype=float) + true_xip = np.zeros(nbins, dtype=complex) + true_xim = np.zeros(nbins, dtype=complex) + + rad_min_sep = min_sep * coord.degrees / coord.radians + c1 = [coord.CelestialCoord(r*coord.radians, d*coord.radians) for (r,d) in zip(ra1, dec1)] + c2 = [coord.CelestialCoord(r*coord.radians, d*coord.radians) for (r,d) in zip(ra2, dec2)] + for i in range(ngal): + for j in range(ngal): + rsq = (x1[i]-x2[j])**2 + (y1[i]-y2[j])**2 + (z1[i]-z2[j])**2 + r = np.sqrt(rsq) + + index = np.floor(np.log(r/rad_min_sep) / bin_size).astype(int) + if index < 0 or index >= nbins: + continue + + zz1 = z11[i] + 1j * z21[i] + zz2 = z12[j] + 1j * z22[j] + + ww = w1[i] * w2[j] + xip = ww * zz1 * np.conjugate(zz2) + xim = ww * zz1 * zz2 + + true_npairs[index] += 1 + true_weight[index] += ww + true_xip[index] += xip + true_xim[index] += xim + + true_xip /= true_weight + true_xim /= true_weight + + print('true_npairs = ',true_npairs) + print('diff = ',zz.npairs - true_npairs) + np.testing.assert_array_equal(zz.npairs, true_npairs) + + print('true_weight = ',true_weight) + print('diff = ',zz.weight - true_weight) + np.testing.assert_allclose(zz.weight, true_weight, rtol=1.e-6, atol=1.e-8) + + print('true_xip = ',true_xip) + print('zz.xip = ',zz.xip) + print('zz.xip_im = ',zz.xip_im) + np.testing.assert_allclose(zz.xip, true_xip.real, rtol=1.e-6, atol=1.e-8) + np.testing.assert_allclose(zz.xip_im, true_xip.imag, rtol=1.e-6, atol=1.e-8) + print('true_xim = ',true_xim) + print('zz.xim = ',zz.xim) + print('zz.xim_im = ',zz.xim_im) + np.testing.assert_allclose(zz.xim, true_xim.real, rtol=1.e-6, atol=1.e-8) + np.testing.assert_allclose(zz.xim_im, true_xim.imag, rtol=1.e-6, atol=1.e-8) + + # Check that running via the corr2 script works correctly. + try: + import fitsio + except ImportError: + pass + else: + config = treecorr.config.read_config('configs/zz_direct_spherical.yaml') + cat1.write(config['file_name']) + cat2.write(config['file_name2']) + treecorr.corr2(config) + data = fitsio.read(config['zz_file_name']) + np.testing.assert_allclose(data['r_nom'], zz.rnom) + np.testing.assert_allclose(data['npairs'], zz.npairs) + np.testing.assert_allclose(data['weight'], zz.weight) + np.testing.assert_allclose(data['xip'], zz.xip) + np.testing.assert_allclose(data['xip_im'], zz.xip_im) + np.testing.assert_allclose(data['xim'], zz.xim) + np.testing.assert_allclose(data['xim_im'], zz.xim_im) + + # Repeat with binslop = 0 + # And don't do any top-level recursion so we actually test not going to the leaves. + zz = treecorr.ZZCorrelation(min_sep=min_sep, max_sep=max_sep, nbins=nbins, + sep_units='deg', bin_slop=0, max_top=0) + zz.process(cat1, cat2) + np.testing.assert_array_equal(zz.npairs, true_npairs) + np.testing.assert_allclose(zz.weight, true_weight, rtol=1.e-6, atol=1.e-8) + np.testing.assert_allclose(zz.xip, true_xip.real, rtol=1.e-6, atol=3.e-7) + np.testing.assert_allclose(zz.xip_im, true_xip.imag, rtol=1.e-6, atol=2.e-7) + np.testing.assert_allclose(zz.xim, true_xim.real, atol=1.e-4) + np.testing.assert_allclose(zz.xim_im, true_xim.imag, atol=1.e-4) + + # With angle_slop = 0, it goes back to being basically exact (to single precision). + zz = treecorr.ZZCorrelation(min_sep=min_sep, max_sep=max_sep, nbins=nbins, + sep_units='deg', bin_slop=0, angle_slop=0, max_top=0) + zz.process(cat1, cat2) + np.testing.assert_array_equal(zz.npairs, true_npairs) + np.testing.assert_allclose(zz.weight, true_weight, rtol=1.e-6, atol=1.e-8) + np.testing.assert_allclose(zz.xip, true_xip.real, rtol=1.e-6, atol=1.e-8) + np.testing.assert_allclose(zz.xip_im, true_xip.imag, rtol=1.e-6, atol=1.e-8) + np.testing.assert_allclose(zz.xim, true_xim.real, rtol=1.e-6, atol=1.e-8) + np.testing.assert_allclose(zz.xim_im, true_xim.imag, rtol=1.e-6, atol=1.e-8) + + +@timer +def test_zz(): + # Similar to the math in test_gg(), but use a functional form that has a radial vector, + # rather than radial shear pattern. + # Also, the xi- integral here uses J2, not J4. + + # Use z(r) = z0 exp(-r^2/2r0^2) + # + # The Fourier transform is: z~(k) = 2 pi z0 r0^2 exp(-r0^2 k^2/2) / L^2 + # P(k) = (1/2pi) <|z~(k)|^2> = 2 pi |z0|^2 (r0/L)^4 exp(-r0^2 k^2) + # xi+(r) = (1/2pi) int( dk k P(k) J0(kr) ) + # = pi |z0|^2 (r0/L)^2 exp(-r^2/4r0^2) + # xi-(r) = (1/2pi) int( dk k P(k) J2(kr) ) + # = pi z0^2 (r0/L)^2 exp(-r^2/4r0^2) + + z0 = 0.05 + 1j * 0.03 + r0 = 10. + if __name__ == "__main__": + ngal = 1000000 + L = 50.*r0 # Not infinity, so this introduces some error. Our integrals were to infinity. + tol_factor = 1 + else: + ngal = 100000 + L = 50.*r0 + # Rather than have a single set tolerance, we tune the tolerances for the above + # __main__ setup, but scale up by a factor of 5 for the quicker run. + tol_factor = 5 + rng = np.random.RandomState(8675309) + x = (rng.random_sample(ngal)-0.5) * L + y = (rng.random_sample(ngal)-0.5) * L + r2 = (x**2 + y**2)/r0**2 + zz = z0 * np.exp(-r2/2.) + z1 = np.real(zz) + z2 = np.imag(zz) + + cat = treecorr.Catalog(x=x, y=y, z1=z1, z2=z2, x_units='arcmin', y_units='arcmin') + zz = treecorr.ZZCorrelation(bin_size=0.1, min_sep=1., max_sep=100., sep_units='arcmin', + verbose=1) + zz.process(cat) + + # log() != , but it should be close: + print('meanlogr - log(meanr) = ',zz.meanlogr - np.log(zz.meanr)) + np.testing.assert_allclose(zz.meanlogr, np.log(zz.meanr), atol=1.e-3) + + r = zz.meanr + temp = np.pi * (r0/L)**2 * np.exp(-0.25*r**2/r0**2) + true_xip = temp * np.abs(z0**2) + true_xim = temp * z0**2 + + print('zz.xip = ',zz.xip) + print('true_xip = ',true_xip) + print('ratio = ',zz.xip / true_xip) + print('diff = ',zz.xip - true_xip) + print('max diff = ',max(abs(zz.xip - true_xip))) + # It's within 10% everywhere except at the zero crossings. + np.testing.assert_allclose(zz.xip, true_xip, rtol=0.1 * tol_factor, atol=1.e-7 * tol_factor) + print('xip_im = ',zz.xip_im) + np.testing.assert_allclose(zz.xip_im, 0, atol=2.e-7 * tol_factor) + + print('zz.xim = ',zz.xim) + print('true_xim = ',true_xim) + print('ratio = ',zz.xim / true_xim) + print('diff = ',zz.xim - true_xim) + print('max diff = ',max(abs(zz.xim - true_xim))) + np.testing.assert_allclose(zz.xim, np.real(true_xim), rtol=0.1 * tol_factor, atol=2.e-7 * tol_factor) + print('xim_im = ',zz.xim_im) + np.testing.assert_allclose(zz.xim_im, np.imag(true_xim), rtol=0.1 * tol_factor, atol=2.e-7 * tol_factor) + + # Should also work as a cross-correlation with itself + zz.process(cat,cat) + np.testing.assert_allclose(zz.meanlogr, np.log(zz.meanr), atol=1.e-3) + assert max(abs(zz.xip - true_xip)) < 3.e-7 * tol_factor + assert max(abs(zz.xip_im)) < 2.e-7 * tol_factor + assert max(abs(zz.xim - np.real(true_xim))) < 3.e-7 * tol_factor + assert max(abs(zz.xim_im - np.imag(true_xim))) < 3.e-7 * tol_factor + + # Check that we get the same result using the corr2 function: + cat.write(os.path.join('data','zz.dat')) + config = treecorr.read_config('configs/zz.yaml') + config['verbose'] = 0 + config['precision'] = 8 + treecorr.corr2(config) + corr2_output = np.genfromtxt(os.path.join('output','zz.out'), names=True, skip_header=1) + np.testing.assert_allclose(corr2_output['xip'], zz.xip, rtol=1.e-4) + np.testing.assert_allclose(corr2_output['xim'], zz.xim, rtol=1.e-4) + np.testing.assert_allclose(corr2_output['xip_im'], zz.xip_im, rtol=1.e-4, atol=1.e-12) + np.testing.assert_allclose(corr2_output['xim_im'], zz.xim_im, rtol=1.e-4) + + # Check the fits write option + out_file_name = os.path.join('output','zz_out.dat') + zz.write(out_file_name, precision=16) + data = np.genfromtxt(out_file_name, names=True, skip_header=1) + np.testing.assert_allclose(data['r_nom'], np.exp(zz.logr)) + np.testing.assert_allclose(data['meanr'], zz.meanr) + np.testing.assert_allclose(data['meanlogr'], zz.meanlogr) + np.testing.assert_allclose(data['xip'], zz.xip) + np.testing.assert_allclose(data['xim'], zz.xim) + np.testing.assert_allclose(data['xip_im'], zz.xip_im) + np.testing.assert_allclose(data['xim_im'], zz.xim_im) + np.testing.assert_allclose(data['sigma_xip'], np.sqrt(zz.varxip)) + np.testing.assert_allclose(data['sigma_xim'], np.sqrt(zz.varxim)) + np.testing.assert_allclose(data['weight'], zz.weight) + np.testing.assert_allclose(data['npairs'], zz.npairs) + + # Check the read function + zz2 = treecorr.ZZCorrelation.from_file(out_file_name) + np.testing.assert_allclose(zz2.logr, zz.logr) + np.testing.assert_allclose(zz2.meanr, zz.meanr) + np.testing.assert_allclose(zz2.meanlogr, zz.meanlogr) + np.testing.assert_allclose(zz2.xip, zz.xip) + np.testing.assert_allclose(zz2.xim, zz.xim) + np.testing.assert_allclose(zz2.xip_im, zz.xip_im) + np.testing.assert_allclose(zz2.xim_im, zz.xim_im) + np.testing.assert_allclose(zz2.varxip, zz.varxip) + np.testing.assert_allclose(zz2.varxim, zz.varxim) + np.testing.assert_allclose(zz2.weight, zz.weight) + np.testing.assert_allclose(zz2.npairs, zz.npairs) + assert zz2.coords == zz.coords + assert zz2.metric == zz.metric + assert zz2.sep_units == zz.sep_units + assert zz2.bin_type == zz.bin_type + + +@timer +def test_varxi(): + # Test that varxip, varxim are correct (or close) based on actual variance of many runs. + + # Same v pattern as in test_zz(). Although the signal doesn't actually matter at all here. + z0 = 0.05 + 1j*0.05 + r0 = 10. + L = 50.*r0 + rng = np.random.RandomState(8675309) + + # Note: to get a good estimate of var(xi), you need a lot of runs. The number of + # runs matters much more than the number of galaxies for getting this to pass. + ngal = 1000 + nruns = 50000 + + file_name = 'data/test_varxi_zz.npz' + print(file_name) + if not os.path.isfile(file_name): + all_zzs = [] + + for run in range(nruns): + print(f'{run}/{nruns}') + # In addition to the shape noise below, there is shot noise from the random x,y positions. + x = (rng.random_sample(ngal)-0.5) * L + y = (rng.random_sample(ngal)-0.5) * L + # Varied weights are hard, but at least check that non-unit weights work correctly. + w = np.ones_like(x) * 5 + r2 = (x**2 + y**2)/r0**2 + zz = z0 * np.exp(-r2/2.) + z1 = np.real(zz) + z2 = np.imag(zz) + # This time, add some shape noise (different each run). + z1 += rng.normal(0, 0.3, size=ngal) + z2 += rng.normal(0, 0.3, size=ngal) + + cat = treecorr.Catalog(x=x, y=y, w=w, z1=z1, z2=z2, x_units='arcmin', y_units='arcmin') + zz = treecorr.ZZCorrelation(bin_size=0.1, min_sep=10., max_sep=100., sep_units='arcmin', + verbose=1) + zz.process(cat) + all_zzs.append(zz) + + mean_xip = np.mean([zz.xip for zz in all_zzs], axis=0) + var_xip = np.var([zz.xip for zz in all_zzs], axis=0) + mean_xim = np.mean([zz.xim for zz in all_zzs], axis=0) + var_xim = np.var([zz.xim for zz in all_zzs], axis=0) + mean_varxip = np.mean([zz.varxip for zz in all_zzs], axis=0) + mean_varxim = np.mean([zz.varxim for zz in all_zzs], axis=0) + + np.savez(file_name, + mean_xip=mean_xip, var_xip=var_xip, mean_varxip=mean_varxip, + mean_xim=mean_xim, var_xim=var_xim, mean_varxim=mean_varxim) + + data = np.load(file_name) + mean_xip = data['mean_xip'] + var_xip = data['var_xip'] + mean_varxip = data['mean_varxip'] + mean_xim = data['mean_xim'] + var_xim = data['var_xim'] + mean_varxim = data['mean_varxim'] + + print('nruns = ',nruns) + print('mean_xip = ',mean_xip) + print('mean_xim = ',mean_xim) + print('mean_varxip = ',mean_varxip) + print('mean_varxim = ',mean_varxim) + print('var_xip = ',var_xip) + print('ratio = ',var_xip / mean_varxip) + print('var_xim = ',var_xim) + print('ratio = ',var_xim / mean_varxim) + print('max relerr for xip = ',np.max(np.abs((var_xip - mean_varxip)/var_xip))) + print('max relerr for xim = ',np.max(np.abs((var_xim - mean_varxim)/var_xim))) + np.testing.assert_allclose(mean_varxip, var_xip, rtol=0.02) + np.testing.assert_allclose(mean_varxim, var_xim, rtol=0.02) + + # Now the actual test that's based on current code, not just from the saved file. + # There is a bit more noise on a singe run, so the tolerance needs to be somewhat higher. + x = (rng.random_sample(ngal)-0.5) * L + y = (rng.random_sample(ngal)-0.5) * L + # Varied weights are hard, but at least check that non-unit weights work correctly. + w = np.ones_like(x) * 5 + r2 = (x**2 + y**2)/r0**2 + zz = z0 * np.exp(-r2/2.) + z1 = np.real(zz) + z2 = np.imag(zz) + # This time, add some shape noise (different each run). + z1 += rng.normal(0, 0.3, size=ngal) + z2 += rng.normal(0, 0.3, size=ngal) + + cat = treecorr.Catalog(x=x, y=y, w=w, z1=z1, z2=z2, x_units='arcmin', y_units='arcmin') + zz = treecorr.ZZCorrelation(bin_size=0.1, min_sep=10., max_sep=100., sep_units='arcmin', + verbose=1) + zz.process(cat) + print('single run:') + print('max relerr for xip = ',np.max(np.abs((zz.varxip - var_xip)/var_xip))) + print('max relerr for xim = ',np.max(np.abs((zz.varxip - var_xim)/var_xim))) + np.testing.assert_allclose(zz.varxip, var_xip, rtol=0.3) + np.testing.assert_allclose(zz.varxim, var_xim, rtol=0.3) + +@timer +def test_jk(): + + # Same multi-lens field we used for NV patch test + r0 = 30. + L = 30 * r0 + rng = np.random.RandomState(8675309) + + nsource = 100000 + nlens = 300 + nruns = 1000 + npatch = 64 + + corr_params = dict(bin_size=0.3, min_sep=20, max_sep=50, bin_slop=0.1) + + def make_field(rng): + x1 = (rng.random(nlens)-0.5) * L + y1 = (rng.random(nlens)-0.5) * L + w = rng.random(nlens) + 10 + x2 = (rng.random(nsource)-0.5) * L + y2 = (rng.random(nsource)-0.5) * L + + # Start with just the noise + z1 = rng.normal(0, 0.2, size=nsource) + z2 = rng.normal(0, 0.2, size=nsource) + + # Add in the signal from all lenses + for i in range(nlens): + x2i = x2 - x1[i] + y2i = y2 - y1[i] + r2 = (x2i**2 + y2i**2)/r0**2 + z0 = rng.normal(0, 0.03) + 1j * rng.normal(0, 0.03) + zz = w[i] * z0 * np.exp(-r2/2.) + z1 += np.real(zz) + z2 += np.imag(zz) + return x1, y1, w, x2, y2, z1, z2 + + file_name = 'data/test_zz_jk_{}.npz'.format(nruns) + print(file_name) + if not os.path.isfile(file_name): + all_zzs = [] + rng = np.random.default_rng() + for run in range(nruns): + x1, y1, w, x2, y2, z1, z2 = make_field(rng) + print(run,': ',np.mean(z1),np.std(z1),np.min(z1),np.max(z1)) + cat = treecorr.Catalog(x=x2, y=y2, z1=z1, z2=z2) + zz = treecorr.ZZCorrelation(corr_params) + zz.process(cat) + all_zzs.append(zz) + + print('xip = ',np.array([zz.xip for zz in all_zzs])) + mean_xip = np.mean([zz.xip for zz in all_zzs], axis=0) + mean_xim = np.mean([zz.xim for zz in all_zzs], axis=0) + var_xip = np.var([zz.xip for zz in all_zzs], axis=0) + var_xim = np.var([zz.xim for zz in all_zzs], axis=0) + mean_varxip = np.mean([zz.varxip for zz in all_zzs], axis=0) + mean_varxim = np.mean([zz.varxim for zz in all_zzs], axis=0) + + np.savez(file_name, + mean_xip=mean_xip, var_xip=var_xip, mean_varxip=mean_varxip, + mean_xim=mean_xim, var_xim=var_xim, mean_varxim=mean_varxim) + + data = np.load(file_name) + mean_xip = data['mean_xip'] + mean_xim = data['mean_xim'] + mean_varxip = data['mean_varxip'] + mean_varxim = data['mean_varxim'] + var_xip = data['var_xip'] + var_xim = data['var_xim'] + + print('mean_xip = ',mean_xip) + print('mean_varxip = ',mean_varxip) + print('var_xip = ',var_xip) + print('ratio = ',var_xip / mean_varxip) + print('mean_xim = ',mean_xim) + print('mean_varxim = ',mean_varxim) + print('var_xim = ',var_xim) + print('ratio = ',var_xim / mean_varxim) + + rng = np.random.default_rng(1234) + x1, y1, w, x2, y2, z1, z2 = make_field(rng) + + cat = treecorr.Catalog(x=x2, y=y2, z1=z1, z2=z2) + zz1 = treecorr.ZZCorrelation(corr_params) + t0 = time.time() + zz1.process(cat) + t1 = time.time() + print('Time for non-patch processing = ',t1-t0) + + print('weight = ',zz1.weight) + print('xip = ',zz1.xip) + print('varxip = ',zz1.varxip) + print('pullsq for xip = ',(zz1.xip-mean_xip)**2/var_xip) + print('max pull for xip = ',np.sqrt(np.max((zz1.xip-mean_xip)**2/var_xip))) + print('max pull for xim = ',np.sqrt(np.max((zz1.xim-mean_xim)**2/var_xim))) + np.testing.assert_array_less((zz1.xip-mean_xip)**2, 9*var_xip) # < 3 sigma pull + np.testing.assert_array_less((zz1.xim-mean_xim)**2, 9*var_xim) # < 3 sigma pull + np.testing.assert_allclose(zz1.varxip, mean_varxip, rtol=0.1) + np.testing.assert_allclose(zz1.varxim, mean_varxim, rtol=0.1) + + # Now run with patches, but still with shot variance. Should be basically the same answer. + catp = treecorr.Catalog(x=x2, y=y2, z1=z1, z2=z2, npatch=npatch) + print('tot w = ',np.sum(w)) + print('Patch\tNsource') + for i in range(npatch): + print('%d\t%d'%(i,np.sum(catp.w[catp.patch==i]))) + zz2 = treecorr.ZZCorrelation(corr_params) + t0 = time.time() + zz2.process(catp) + t1 = time.time() + print('Time for patch processing = ',t1-t0) + print('weight = ',zz2.weight) + print('xip = ',zz2.xip) + print('xip1 = ',zz1.xip) + print('varxip = ',zz2.varxip) + print('xim = ',zz2.xim) + print('xim1 = ',zz1.xim) + print('varxim = ',zz2.varxim) + np.testing.assert_allclose(zz2.weight, zz1.weight, rtol=1.e-2) + np.testing.assert_allclose(zz2.xip, zz1.xip, rtol=1.e-2) + np.testing.assert_allclose(zz2.xim, zz1.xim, rtol=1.e-2) + np.testing.assert_allclose(zz2.varxip, zz1.varxip, rtol=1.e-2) + np.testing.assert_allclose(zz2.varxim, zz1.varxim, rtol=1.e-2) + + # Now try jackknife variance estimate. + t0 = time.time() + cov2 = zz2.estimate_cov('jackknife') + t1 = time.time() + print('Time to calculate jackknife covariance = ',t1-t0) + print('cov.diag = ',np.diagonal(cov2)) + print('cf var_xip = ',var_xip) + print('cf var_xim = ',var_xim) + np.testing.assert_allclose(np.diagonal(cov2)[:4], var_xip, rtol=0.3) + np.testing.assert_allclose(np.diagonal(cov2)[4:], var_xim, rtol=0.5) + + # Use initialize/finalize + zz3 = treecorr.ZZCorrelation(corr_params) + for k1, p1 in enumerate(catp.get_patches()): + zz3.process(p1, initialize=(k1==0), finalize=(k1==npatch-1)) + for k2, p2 in enumerate(catp.get_patches()): + if k2 <= k1: continue + zz3.process(p1, p2, initialize=False, finalize=False) + np.testing.assert_allclose(zz3.xip, zz2.xip) + np.testing.assert_allclose(zz3.xim, zz2.xim) + np.testing.assert_allclose(zz3.weight, zz2.weight) + + # Check that these still work after roundtripping through a file. + try: + import fitsio + except ImportError: + pass + else: + file_name = os.path.join('output','test_write_results_zz.fits') + zz2.write(file_name, write_patch_results=True) + zz3 = treecorr.ZZCorrelation.from_file(file_name) + cov3 = zz3.estimate_cov('jackknife') + np.testing.assert_allclose(cov3, cov2) + + # Check some invalid actions + # Bad var_method + with assert_raises(ValueError): + zz2.estimate_cov('invalid') + # Not run on patches, but need patches + with assert_raises(ValueError): + zz1.estimate_cov('jackknife') + with assert_raises(ValueError): + zz1.estimate_cov('sample') + with assert_raises(ValueError): + zz1.estimate_cov('marked_bootstrap') + with assert_raises(ValueError): + zz1.estimate_cov('bootstrap') + + cata = treecorr.Catalog(x=x2[:100], y=y2[:100], z1=z1[:100], z2=z2[:100], npatch=10) + catb = treecorr.Catalog(x=x2[:100], y=y2[:100], z1=z1[:100], z2=z2[:100], npatch=2) + zz4 = treecorr.ZZCorrelation(corr_params) + zz5 = treecorr.ZZCorrelation(corr_params) + # All catalogs need to have the same number of patches + with assert_raises(RuntimeError): + zz4.process(cata,catb) + with assert_raises(RuntimeError): + zz5.process(catb,cata) + +@timer +def test_twod(): + from test_twod import corr2d + try: + from scipy.spatial.distance import pdist, squareform + except ImportError: + print('Skipping test_twod, since uses scipy, and scipy is not installed.') + return + + # N random points in 2 dimensions + rng = np.random.RandomState(8675309) + N = 200 + x = rng.uniform(-20, 20, N) + y = rng.uniform(-20, 20, N) + + # Give the points a multivariate Gaussian random field for v + L1 = [[0.33, 0.09], [-0.01, 0.26]] # Some arbitrary correlation matrix + invL1 = np.linalg.inv(L1) + dists = pdist(np.array([x,y]).T, metric='mahalanobis', VI=invL1) + K = np.exp(-0.5 * dists**2) + K = squareform(K) + np.fill_diagonal(K, 1.) + + A = 2.3 + sigma = A/10. + + # Make v + z1 = rng.multivariate_normal(np.zeros(N), K*(A**2)) + z1 += rng.normal(scale=sigma, size=N) + z2 = rng.multivariate_normal(np.zeros(N), K*(A**2)) + z2 += rng.normal(scale=sigma, size=N) + z = z1 + 1j * z2 + + # Calculate the 2D correlation using brute force + max_sep = 21. + nbins = 21 + xi_brut = corr2d(x, y, z, np.conj(z), rmax=max_sep, bins=nbins) + + # And using TreeCorr + cat = treecorr.Catalog(x=x, y=y, z1=z1, z2=z2) + zz = treecorr.ZZCorrelation(max_sep=max_sep, bin_size=2., bin_type='TwoD', brute=True) + zz.process(cat) + print('max abs diff = ',np.max(np.abs(zz.xip - xi_brut))) + print('max rel diff = ',np.max(np.abs(zz.xip - xi_brut)/np.abs(zz.xip))) + np.testing.assert_allclose(zz.xip, xi_brut, atol=2.e-7) + + zz = treecorr.ZZCorrelation(max_sep=max_sep, bin_size=2., bin_type='TwoD', bin_slop=0.05) + zz.process(cat) + print('max abs diff = ',np.max(np.abs(zz.xip - xi_brut))) + print('max rel diff = ',np.max(np.abs(zz.xip - xi_brut)/np.abs(zz.xip))) + np.testing.assert_allclose(zz.xip, xi_brut, atol=2.e-7) + + # Check I/O + try: + import fitsio + except ImportError: + pass + else: + fits_name = 'output/zz_twod.fits' + zz.write(fits_name) + zz2 = treecorr.ZZCorrelation.from_file(fits_name) + np.testing.assert_allclose(zz2.npairs, zz.npairs) + np.testing.assert_allclose(zz2.weight, zz.weight) + np.testing.assert_allclose(zz2.meanr, zz.meanr) + np.testing.assert_allclose(zz2.meanlogr, zz.meanlogr) + np.testing.assert_allclose(zz2.xip, zz.xip) + np.testing.assert_allclose(zz2.xip_im, zz.xip_im) + np.testing.assert_allclose(zz2.xim, zz.xim) + np.testing.assert_allclose(zz2.xim_im, zz.xim_im) + + ascii_name = 'output/zz_twod.txt' + zz.write(ascii_name, precision=16) + zz3 = treecorr.ZZCorrelation.from_file(ascii_name) + np.testing.assert_allclose(zz3.npairs, zz.npairs) + np.testing.assert_allclose(zz3.weight, zz.weight) + np.testing.assert_allclose(zz3.meanr, zz.meanr) + np.testing.assert_allclose(zz3.meanlogr, zz.meanlogr) + np.testing.assert_allclose(zz3.xip, zz.xip) + np.testing.assert_allclose(zz3.xip_im, zz.xip_im) + np.testing.assert_allclose(zz3.xim, zz.xim) + np.testing.assert_allclose(zz3.xim_im, zz.xim_im) + + +if __name__ == '__main__': + test_direct() + test_direct_spherical() + test_zz() + test_varxi() + test_jk() + test_twod() diff --git a/treecorr/__init__.py b/treecorr/__init__.py index 96fd847f..3d580255 100644 --- a/treecorr/__init__.py +++ b/treecorr/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following @@ -32,16 +32,21 @@ from .util import set_omp_threads, get_omp_threads, set_max_omp_threads from .catalog import Catalog, read_catalogs -from .catalog import calculateVarK, calculateVarG, calculateVarV, calculateVarT, calculateVarQ +from .catalog import calculateVarK, calculateVarG, calculateVarZ +from .catalog import calculateVarV, calculateVarT, calculateVarQ from .corr2base import Corr2, estimate_multi_cov, build_multi_cov_design_matrix from .corr3base import Corr3 -from .field import Field, NField, KField, VField, GField +from .field import Field, NField, KField, ZField, VField, GField, TField, QField from .nncorrelation import NNCorrelation from .nkcorrelation import NKCorrelation from .kkcorrelation import KKCorrelation +from .nzcorrelation import NZCorrelation, BaseNZCorrelation +from .kzcorrelation import KZCorrelation, BaseKZCorrelation +from .zzcorrelation import ZZCorrelation, BaseZZCorrelation + from .nvcorrelation import NVCorrelation from .kvcorrelation import KVCorrelation from .vvcorrelation import VVCorrelation diff --git a/treecorr/_version.py b/treecorr/_version.py index b22824a9..162f20df 100644 --- a/treecorr/_version.py +++ b/treecorr/_version.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following diff --git a/treecorr/catalog.py b/treecorr/catalog.py index 5df1b36b..85b767e9 100644 --- a/treecorr/catalog.py +++ b/treecorr/catalog.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following @@ -25,7 +25,7 @@ from .reader import FitsReader, HdfReader, AsciiReader, PandasReader, ParquetReader from .config import merge_config, setup_logger, get, get_from_list from .util import parse_file_type, LRU_Cache, make_writer, make_reader, set_omp_threads -from .field import NField, KField, VField, GField, TField, QField +from .field import NField, KField, ZField, VField, GField, TField, QField class Catalog(object): r"""A set of input data (positions and other quantities) to be correlated. @@ -130,10 +130,12 @@ class Catalog(object): wpos: The weights for position centroiding, as a numpy array, if given. (None otherwise, which means that implicitly wpos = w.) k: The scalar field, kappa, if defined, as a numpy array. (None otherwise) - v1: The v1 component of the vector, if defined, as a numpy array. (None otherwise) - v2: The v2 component of the vector, if defined, as a numpy array. (None otherwise) - g1: The g1 component of the shear, if defined, as a numpy array. (None otherwise) - g2: The g2 component of the shear, if defined, as a numpy array. (None otherwise) + z1: The z1 component of a complex scalar, if defined, as a numpy array. (None otherwise) + z2: The z2 component of a complex scalar, if defined, as a numpy array. (None otherwise) + v1: The v1 component of a vector, if defined, as a numpy array. (None otherwise) + v2: The v2 component of a vector, if defined, as a numpy array. (None otherwise) + g1: The g1 component of a shear, if defined, as a numpy array. (None otherwise) + g2: The g2 component of a shear, if defined, as a numpy array. (None otherwise) t1: The 1st component of a spin-3 field, if defined, as a numpy array. (None otherwise) t2: The 2nd component of a spin-3 field, if defined, as a numpy array. (None otherwise) q1: The 1st component of a spin-4 field, if defined, as a numpy array. (None otherwise) @@ -155,6 +157,16 @@ class Catalog(object): value divided by the total weight per bin, so this is the right quantity to use for that. + varz: The variance per component of the complex scalar field (0 if z1,z2 are not defined) + + .. note:: + + If there are weights, this is really + :math:`\sum(w^2 |z - \langle z \rangle|^2)/\sum(w)`, + which is more like :math:`\langle w \rangle \mathrm{Var}(z)`. + As for ``vark``, this is the right quantity to use for the ``'shot'`` + noise estimate. + varv: The variance per component of the vector field (0 if v1,v2 are not defined) .. note:: @@ -210,7 +222,7 @@ class Catalog(object): field: If any of the `get?Field ` methods have been called to construct a field from this catalog (either explicitly or implicitly via a `corr.process() - ` command, then this attribute will hold the most recent + ` command, then this attribute will hold the most recent field to have been constructed. .. note:: @@ -259,6 +271,8 @@ class Catalog(object): of 0. (default: None) k (array): The kappa values to use for scalar correlations. (This may represent any scalar field.) (default: None) + z1 (array): The z1 values to use for complex scalar correlations. (default: None) + z2 (array): The z2 values to use for complex scalar correlations. (default: None) v1 (array): The v1 values to use for vector correlations. (default: None) v2 (array): The v2 values to use for vector correlations. (default: None) g1 (array): The g1 values to use for shear correlations. (g1,g2 may represent any @@ -356,6 +370,10 @@ class Catalog(object): k_col (str or int): The column to use for the kappa values. An integer is only allowed for ASCII files. (default: '0', which means not to read in this column.) + z1_col (str or int): The column to use for the z1 values. An integer is only allowed for + ASCII files. (default: '0', which means not to read in this column.) + z2_col (str or int): The column to use for the z2 values. An integer is only allowed for + ASCII files. (default: '0', which means not to read in this column.) v1_col (str or int): The column to use for the v1 values. An integer is only allowed for ASCII files. (default: '0', which means not to read in this column.) v2_col (str or int): The column to use for the v2 values. An integer is only allowed for @@ -393,6 +411,8 @@ class Catalog(object): can save time to input them, rather than calculate them using trig functions. (default: False) + flip_z1 (bool): Whether to flip the sign of the input z1 values. (default: False) + flip_z2 (bool): Whether to flip the sign of the input z2 values. (default: False) flip_v1 (bool): Whether to flip the sign of the input v1 values. (default: False) flip_v2 (bool): Whether to flip the sign of the input v2 values. (default: False) flip_g1 (bool): Whether to flip the sign of the input g1 values. (default: False) @@ -419,6 +439,8 @@ class Catalog(object): dec_ext (int/str): Which extension to use for the dec values. (default: ext) r_ext (int/str): Which extension to use for the r values. (default: ext) k_ext (int/str): Which extension to use for the k values. (default: ext) + z1_ext (int/str): Which extension to use for the z1 values. (default: ext) + z2_ext (int/str): Which extension to use for the z2 values. (default: ext) v1_ext (int/str): Which extension to use for the v1 values. (default: ext) v2_ext (int/str): Which extension to use for the v2 values. (default: ext) g1_ext (int/str): Which extension to use for the g1 values. (default: ext) @@ -439,6 +461,8 @@ class Catalog(object): dec_eval (str): An eval string to use for the dec values. (default: None) r_eval (str): An eval string to use for the r values. (default: None) k_eval (str): An eval string to use for the k values. (default: None) + z1_eval (str): An eval string to use for the z1 values. (default: None) + z2_eval (str): An eval string to use for the z2 values. (default: None) v1_eval (str): An eval string to use for the v1 values. (default: None) v2_eval (str): An eval string to use for the v2 values. (default: None) g1_eval (str): An eval string to use for the g1 values. (default: None) @@ -535,6 +559,10 @@ class Catalog(object): 'The units of dec values. Required when using dec_col.'), 'k_col' : (str, True, '0', None, 'Which column to use for kappa. Should be an integer for ASCII catalogs. '), + 'z1_col' : (str, True, '0', None, + 'Which column to use for z1. Should be an integer for ASCII catalogs.'), + 'z2_col' : (str, True, '0', None, + 'Which column to use for z2. Should be an integer for ASCII catalogs.'), 'v1_col' : (str, True, '0', None, 'Which column to use for v1. Should be an integer for ASCII catalogs.'), 'v2_col' : (str, True, '0', None, @@ -582,6 +610,10 @@ class Catalog(object): 'Which extension to use for the r_col. default is the global ext value.'), 'k_ext': (str, True, None, None, 'Which extension to use for the k_col. default is the global ext value.'), + 'z1_ext': (str, True, None, None, + 'Which extension to use for the z1_col. default is the global ext value.'), + 'z2_ext': (str, True, None, None, + 'Which extension to use for the z2_col. default is the global ext value.'), 'v1_ext': (str, True, None, None, 'Which extension to use for the v1_col. default is the global ext value.'), 'v2_ext': (str, True, None, None, @@ -620,6 +652,10 @@ class Catalog(object): 'An eval string to use for r.'), 'k_eval' : (str, True, None, None, 'An eval string to use for kappa.'), + 'z1_eval' : (str, True, None, None, + 'An eval string to use for z1.'), + 'z2_eval' : (str, True, None, None, + 'An eval string to use for z2.'), 'v1_eval' : (str, True, None, None, 'An eval string to use for v1.'), 'v2_eval' : (str, True, None, None, @@ -646,6 +682,10 @@ class Catalog(object): 'An eval string to use for flag.'), 'extra_cols': (list, False, None, None, 'A list of extra column names to read for the eval calculations'), + 'flip_z1' : (bool, True, False, None, + 'Whether to flip the sign of z1'), + 'flip_z2' : (bool, True, False, None, + 'Whether to flip the sign of z2'), 'flip_v1' : (bool, True, False, None, 'Whether to flip the sign of v1'), 'flip_v2' : (bool, True, False, None, @@ -664,6 +704,8 @@ class Catalog(object): 'Whether to flip the sign of q2'), 'vark': (float, True, None, None, 'Variance of k values to use, rather than calculate directly'), + 'varz': (float, True, None, None, + 'Variance of z values to use, rather than calculate directly'), 'varv': (float, True, None, None, 'Variance of v values to use, rather than calculate directly'), 'varg': (float, True, None, None, @@ -702,7 +744,8 @@ class Catalog(object): def __init__(self, file_name=None, config=None, *, num=0, logger=None, is_rand=False, x=None, y=None, z=None, ra=None, dec=None, r=None, w=None, wpos=None, flag=None, - k=None, v1=None, v2=None, g1=None, g2=None, t1=None, t2=None, q1=None, q2=None, + k=None, z1=None, z2=None, v1=None, v2=None, g1=None, g2=None, + t1=None, t2=None, q1=None, q2=None, patch=None, patch_centers=None, rng=None, **kwargs): self.config = merge_config(config, kwargs, Catalog._valid_params, Catalog._aliases) @@ -731,6 +774,8 @@ def __init__(self, file_name=None, config=None, *, num=0, logger=None, is_rand=F self._wpos = None self._flag = None self._k = None + self._z1 = None + self._z2 = None self._v1 = None self._v2 = None self._g1 = None @@ -748,6 +793,7 @@ def __init__(self, file_name=None, config=None, *, num=0, logger=None, is_rand=F self._sumw = None self._sumw2 = None self._vark = None + self._varz = None self._varv = None self._varg = None self._vart = None @@ -814,7 +860,7 @@ def __init__(self, file_name=None, config=None, *, num=0, logger=None, is_rand=F # First style -- read from a file if file_name is not None: if any([v is not None - for v in [x,y,z,ra,dec,r,k,v1,v2,g1,g2,t1,t2,q1,q2,patch,w,wpos,flag]]): + for v in [x,y,z,ra,dec,r,k,z1,z2,v1,v2,g1,g2,t1,t2,q1,q2,patch,w,wpos,flag]]): raise TypeError("Vectors may not be provided when file_name is provided.") self.file_name = file_name self.name = file_name @@ -860,6 +906,9 @@ def __init__(self, file_name=None, config=None, *, num=0, logger=None, is_rand=F if ra is not None or dec is not None: if ra is None or dec is None: raise TypeError("ra and dec must both be provided") + if z1 is not None or z2 is not None: + if z1 is None or z2 is None: + raise TypeError("z1 and z2 must both be provided") if v1 is not None or v2 is not None: if v1 is None or v2 is None: raise TypeError("v1 and v2 must both be provided") @@ -886,6 +935,8 @@ def __init__(self, file_name=None, config=None, *, num=0, logger=None, is_rand=F self._wpos = self.makeArray(wpos,'wpos') self._flag = self.makeArray(flag,'flag',int) self._k = self.makeArray(k,'k') + self._z1 = self.makeArray(z1,'z1') + self._z2 = self.makeArray(z2,'z2') self._v1 = self.makeArray(v1,'v1') self._v2 = self.makeArray(v2,'v2') self._g1 = self.makeArray(g1,'g1') @@ -921,6 +972,10 @@ def __init__(self, file_name=None, config=None, *, num=0, logger=None, is_rand=F raise ValueError("wpos has the wrong numbers of elements") if self._k is not None and len(self._k) != ntot: raise ValueError("k has the wrong numbers of elements") + if self._z1 is not None and len(self._z1) != ntot: + raise ValueError("z1 has the wrong numbers of elements") + if self._z2 is not None and len(self._z2) != ntot: + raise ValueError("z2 has the wrong numbers of elements") if self._v1 is not None and len(self._v1) != ntot: raise ValueError("v1 has the wrong numbers of elements") if self._v2 is not None and len(self._v2) != ntot: @@ -969,6 +1024,10 @@ def __init__(self, file_name=None, config=None, *, num=0, logger=None, is_rand=F self._vark = self.config['vark'] self._meank = 0. self._altmeank = 0. + if self.config.get('varz', None) is not None: + self._varz = self._varz1 = self._varz2 = self.config['varz'] + self._meanz1 = self._meanz2 = 0. + self._altmeanz1 = self._altmeanz2 = 0. if self.config.get('varv', None) is not None: self._varv = self._varv1 = self._varv2 = self.config['varv'] self._meanv1 = self._meanv2 = 0. @@ -1043,6 +1102,16 @@ def k(self): self.load() return self._k + @property + def z1(self): + self.load() + return self._z1 + + @property + def z2(self): + self.load() + return self._z2 + @property def v1(self): self.load() @@ -1127,6 +1196,14 @@ def vark(self): self._meank, self._altmeank, self._vark = self._calculate_weighted_var(self.k) return self._vark + @property + def varz(self): + if self._varz is None: + self._meanz1, self._altmeanz1, self._varz1 = self._calculate_weighted_var(self.z1) + self._meanz2, self._altmeanz2, self._varz2 = self._calculate_weighted_var(self.z2) + self._varz = (self._varz1 + self._varz2)/2 + return self._varz + @property def varv(self): if self._varv is None: @@ -1222,6 +1299,15 @@ def _finish_input(self): # Finish processing the data based on given inputs. # Apply flips if requested + flip_z1 = get_from_list(self.config,'flip_z1',self._num,bool,False) + flip_z2 = get_from_list(self.config,'flip_z2',self._num,bool,False) + if flip_z1: + self.logger.info(" Flipping sign of z1.") + self._z1 = -self._z1 + if flip_z2: + self.logger.info(" Flipping sign of z2.") + self._z2 = -self._z2 + flip_v1 = get_from_list(self.config,'flip_v1',self._num,bool,False) flip_v2 = get_from_list(self.config,'flip_v2',self._num,bool,False) if flip_v1: @@ -1281,6 +1367,8 @@ def _finish_input(self): self.checkForNaN(self._dec,'dec') self.checkForNaN(self._r,'r') self.checkForNaN(self._k,'k') + self.checkForNaN(self._z1,'z1') + self.checkForNaN(self._z2,'z2') self.checkForNaN(self._v1,'v1') self.checkForNaN(self._v2,'v2') self.checkForNaN(self._g1,'g1') @@ -1389,7 +1477,7 @@ def combine(cls, cat_list, *, mask_list=None, low_mem=False): kwargs['allow_xyz'] = True keys = [] for key in ['x', 'y', 'z', 'ra', 'dec', 'r', 'w', 'wpos', - 'k', 'v1', 'v2', 'g1', 'g2', 't1', 't2', 'q1', 'q2']: + 'k', 'z1', 'z2', 'v1', 'v2', 'g1', 'g2', 't1', 't2', 'q1', 'q2']: if getattr(cat, key) is not None: a = getattr(cat,key) if mask_list is not None: @@ -1514,6 +1602,8 @@ def select(self, indx): self._w = self._w[indx] if self._w is not None else None self._wpos = self._wpos[indx] if self._wpos is not None else None self._k = self._k[indx] if self._k is not None else None + self._z1 = self._z1[indx] if self._z1 is not None else None + self._z2 = self._z2[indx] if self._z2 is not None else None self._v1 = self._v1[indx] if self._v1 is not None else None self._v2 = self._v2[indx] if self._v2 is not None else None self._g1 = self._g1[indx] if self._g1 is not None else None @@ -1581,6 +1671,8 @@ def _check_file(self, file_name, reader, num=0, is_rand=False): wpos_col = get_from_list(self.config,'wpos_col',num,str,'0') flag_col = get_from_list(self.config,'flag_col',num,str,'0') k_col = get_from_list(self.config,'k_col',num,str,'0') + z1_col = get_from_list(self.config,'z1_col',num,str,'0') + z2_col = get_from_list(self.config,'z2_col',num,str,'0') v1_col = get_from_list(self.config,'v1_col',num,str,'0') v2_col = get_from_list(self.config,'v2_col',num,str,'0') g1_col = get_from_list(self.config,'g1_col',num,str,'0') @@ -1602,6 +1694,8 @@ def _check_file(self, file_name, reader, num=0, is_rand=False): wpos_eval = get_from_list(self.config,'wpos_eval',num,str,None) flag_eval = get_from_list(self.config,'flag_eval',num,str,None) k_eval = get_from_list(self.config,'k_eval',num,str,None) + z1_eval = get_from_list(self.config,'z1_eval',num,str,None) + z2_eval = get_from_list(self.config,'z2_eval',num,str,None) v1_eval = get_from_list(self.config,'v1_eval',num,str,None) v2_eval = get_from_list(self.config,'v2_eval',num,str,None) g1_eval = get_from_list(self.config,'g1_eval',num,str,None) @@ -1634,6 +1728,10 @@ def _check_file(self, file_name, reader, num=0, is_rand=False): if k_col == '0' and k_eval is None and isKColRequired(self.orig_config,num): raise ValueError("k_col is missing for file %s"%file_name) + if z1_col == '0' and z1_eval is None and isZColRequired(self.orig_config,num): + raise ValueError("z1_col is missing for file %s"%file_name) + if z2_col == '0' and z2_eval is None and isZColRequired(self.orig_config,num): + raise ValueError("z2_col is missing for file %s"%file_name) if v1_col == '0' and v1_eval is None and isVColRequired(self.orig_config,num): raise ValueError("v1_col is missing for file %s"%file_name) if v2_col == '0' and v2_eval is None and isVColRequired(self.orig_config,num): @@ -1652,6 +1750,9 @@ def _check_file(self, file_name, reader, num=0, is_rand=False): raise ValueError("q2_col is missing for file %s"%file_name) # Either both shoudl be 0 or both != 0. + if (z1_col == '0' and z1_eval is None) != (z2_col == '0' and z1_eval is None): + raise ValueError("z1_col, z2_col=(%s, %s) are invalid for file %s"%( + z1_col,z2_col,file_name)) if (v1_col == '0' and v1_eval is None) != (v2_col == '0' and v1_eval is None): raise ValueError("v1_col, v2_col=(%s, %s) are invalid for file %s"%( v1_col,v2_col,file_name)) @@ -1725,7 +1826,7 @@ def _check_file(self, file_name, reader, num=0, is_rand=False): if k_col != '0': k_ext = get_from_list(self.config, 'k_ext', num, str, ext) if k_col not in reader.names(ext=k_ext): - if isKColRequired(self.orig_config,num): + if isKColRequired(self.orig_config,num) or 'corr2' not in self.orig_config: raise ValueError("k_col=%s is invalid for file %s"%(k_col, file_name)) else: self.logger.warning( @@ -1733,12 +1834,27 @@ def _check_file(self, file_name, reader, num=0, is_rand=False): k_col, file_name, num) + "because it is invalid, but unneeded.") + if z1_col != '0': + z1_ext = get_from_list(self.config, 'z1_ext', num, str, ext) + z2_ext = get_from_list(self.config, 'z2_ext', num, str, ext) + if (z1_col not in reader.names(ext=z1_ext) or + z2_col not in reader.names(ext=z2_ext)): + if isZColRequired(self.orig_config,num) or 'corr2' not in self.orig_config: + raise ValueError( + "z1_col, z2_col=(%s, %s) are invalid for file %s"%( + z1_col, z2_col, file_name)) + else: + self.logger.warning( + "Warning: skipping z1_col, z2_col=(%s, %s) for %s, num=%d "%( + z1_col, z2_col, file_name, num) + + "because they are invalid, but unneeded.") + if v1_col != '0': v1_ext = get_from_list(self.config, 'v1_ext', num, str, ext) v2_ext = get_from_list(self.config, 'v2_ext', num, str, ext) if (v1_col not in reader.names(ext=v1_ext) or v2_col not in reader.names(ext=v2_ext)): - if isVColRequired(self.orig_config,num): + if isVColRequired(self.orig_config,num) or 'corr2' not in self.orig_config: raise ValueError( "v1_col, v2_col=(%s, %s) are invalid for file %s"%( v1_col, v2_col, file_name)) @@ -1753,7 +1869,7 @@ def _check_file(self, file_name, reader, num=0, is_rand=False): g2_ext = get_from_list(self.config, 'g2_ext', num, str, ext) if (g1_col not in reader.names(ext=g1_ext) or g2_col not in reader.names(ext=g2_ext)): - if isGColRequired(self.orig_config,num): + if isGColRequired(self.orig_config,num) or 'corr2' not in self.orig_config: raise ValueError( "g1_col, g2_col=(%s, %s) are invalid for file %s"%( g1_col, g2_col, file_name)) @@ -1768,7 +1884,7 @@ def _check_file(self, file_name, reader, num=0, is_rand=False): t2_ext = get_from_list(self.config, 't2_ext', num, str, ext) if (t1_col not in reader.names(ext=t1_ext) or t2_col not in reader.names(ext=t2_ext)): - if isTColRequired(self.orig_config,num): + if isTColRequired(self.orig_config,num) or 'corr2' not in self.orig_config: raise ValueError( "t1_col, t2_col=(%s, %s) are invalid for file %s"%( t1_col, t2_col, file_name)) @@ -1783,7 +1899,7 @@ def _check_file(self, file_name, reader, num=0, is_rand=False): q2_ext = get_from_list(self.config, 'q2_ext', num, str, ext) if (q1_col not in reader.names(ext=q1_ext) or q2_col not in reader.names(ext=q2_ext)): - if isQColRequired(self.orig_config,num): + if isQColRequired(self.orig_config,num) or 'corr2' not in self.orig_config: raise ValueError( "q1_col, q2_col=(%s, %s) are invalid for file %s"%( q1_col, q2_col, file_name)) @@ -1863,6 +1979,8 @@ def set_patch(data, patch_col): wpos_col = get_from_list(self.config,'wpos_col',num,str,'0') flag_col = get_from_list(self.config,'flag_col',num,str,'0') k_col = get_from_list(self.config,'k_col',num,str,'0') + z1_col = get_from_list(self.config,'z1_col',num,str,'0') + z2_col = get_from_list(self.config,'z2_col',num,str,'0') v1_col = get_from_list(self.config,'v1_col',num,str,'0') v2_col = get_from_list(self.config,'v2_col',num,str,'0') g1_col = get_from_list(self.config,'g1_col',num,str,'0') @@ -1883,6 +2001,8 @@ def set_patch(data, patch_col): wpos_eval = get_from_list(self.config,'wpos_eval',num,str,None) flag_eval = get_from_list(self.config,'flag_eval',num,str,None) k_eval = get_from_list(self.config,'k_eval',num,str,None) + z1_eval = get_from_list(self.config,'z1_eval',num,str,None) + z2_eval = get_from_list(self.config,'z2_eval',num,str,None) v1_eval = get_from_list(self.config,'v1_eval',num,str,None) v2_eval = get_from_list(self.config,'v2_eval',num,str,None) g1_eval = get_from_list(self.config,'g1_eval',num,str,None) @@ -1907,7 +2027,7 @@ def set_patch(data, patch_col): ra_col, dec_col, r_col, patch_col, w_col, wpos_col, flag_col, - k_col, v1_col, v2_col, g1_col, g2_col, + k_col, z1_col, z2_col, v1_col, v2_col, g1_col, g2_col, t1_col, t2_col, q1_col, q2_col] # It's faster in FITS to read in all the columns in one read, rather than individually. @@ -1928,6 +2048,8 @@ def set_patch(data, patch_col): wpos_ext = get_from_list(self.config, 'wpos_ext', num, str, ext) flag_ext = get_from_list(self.config, 'flag_ext', num, str, ext) k_ext = get_from_list(self.config, 'k_ext', num, str, ext) + z1_ext = get_from_list(self.config, 'z1_ext', num, str, ext) + z2_ext = get_from_list(self.config, 'z2_ext', num, str, ext) v1_ext = get_from_list(self.config, 'v1_ext', num, str, ext) v2_ext = get_from_list(self.config, 'v2_ext', num, str, ext) g1_ext = get_from_list(self.config, 'g1_ext', num, str, ext) @@ -1940,7 +2062,7 @@ def set_patch(data, patch_col): ra_ext, dec_ext, r_ext, patch_ext, w_ext, wpos_ext, flag_ext, - k_ext, v1_ext, v2_ext, g1_ext, g2_ext, + k_ext, z1_ext, z2_ext, v1_ext, v2_ext, g1_ext, g2_ext, t1_ext, t2_ext, q1_ext, q2_ext] col_by_ext = dict(zip(all_cols,all_exts)) all_exts = set(all_exts + [ext]) @@ -2023,6 +2145,13 @@ def set_patch(data, patch_col): self._k = parse_value(data, k_col, k_eval) self.logger.debug('read k') + # Set z1,z2 + if z1_col in reader.names(ext=z1_ext) or z1_eval is not None: + self._z1 = parse_value(data, z1_col, z1_eval) + self.logger.debug('read z1') + self._z2 = parse_value(data, z2_col, z2_eval) + self.logger.debug('read z2') + # Set v1,v2 if v1_col in reader.names(ext=v1_ext) or v1_eval is not None: self._v1 = parse_value(data, v1_col, v1_eval) @@ -2079,6 +2208,18 @@ def get_kfield(min_size, max_size, split_method, brute, min_top, max_top, coords self._kfields = LRU_Cache(get_kfield, 1) return self._kfields + @property + def zfields(self): + if not hasattr(self, '_zfields'): + def get_zfield(min_size, max_size, split_method, brute, min_top, max_top, coords, + rng, logger=None): + return ZField(self, min_size=min_size, max_size=max_size, + split_method=split_method, brute=brute, + min_top=min_top, max_top=max_top, coords=coords, + rng=rng, logger=logger) + self._zfields = LRU_Cache(get_zfield, 1) + return self._zfields + @property def vfields(self): if not hasattr(self, '_vfields'): @@ -2166,6 +2307,7 @@ def resize_cache(self, maxsize): >>> cat.nfields.resize(maxsize) >>> cat.kfields.resize(maxsize) + >>> cat.zfields.resize(maxsize) >>> cat.vfields.resize(maxsize) >>> cat.gfields.resize(maxsize) >>> cat.tfields.resize(maxsize) @@ -2176,6 +2318,7 @@ def resize_cache(self, maxsize): """ if hasattr(self, '_nfields'): self.nfields.resize(maxsize) if hasattr(self, '_kfields'): self.kfields.resize(maxsize) + if hasattr(self, '_zfields'): self.zfields.resize(maxsize) if hasattr(self, '_vfields'): self.vfields.resize(maxsize) if hasattr(self, '_gfields'): self.gfields.resize(maxsize) if hasattr(self, '_tfields'): self.tfields.resize(maxsize) @@ -2205,6 +2348,7 @@ def clear_cache(self): >>> cat.nfields.clear() >>> cat.kfields.clear() + >>> cat.zfields.clear() >>> cat.vfields.clear() >>> cat.gfields.clear() >>> cat.tfields.clear() @@ -2212,6 +2356,7 @@ def clear_cache(self): """ if hasattr(self, '_nfields'): self.nfields.clear() if hasattr(self, '_kfields'): self.kfields.clear() + if hasattr(self, '_zfields'): self.zfields.clear() if hasattr(self, '_vfields'): self.vfields.clear() if hasattr(self, '_gfields'): self.gfields.clear() if hasattr(self, '_tfields'): self.tfields.clear() @@ -2294,6 +2439,41 @@ def getKField(self, *, min_size=0, max_size=None, split_method=None, brute=False self._field = weakref.ref(field) return field + def getZField(self, *, min_size=0, max_size=None, split_method=None, brute=False, + min_top=None, max_top=10, coords=None, logger=None): + """Return a `VField` based on the z1,z2 values in this catalog. + + The `VField` object is cached, so this is efficient to call multiple times. + cf. `resize_cache` and `clear_cache`. + + Parameters: + min_size (float): The minimum radius cell required (usually min_sep). (default: 0) + max_size (float): The maximum radius cell required (usually max_sep). (default: None) + split_method (str): Which split method to use ('mean', 'median', 'middle', or 'random') + (default: 'mean'; this value can also be given in the Catalog + constructor in the config dict.) + brute (bool): Whether to force traversal to the leaves. (default: False) + min_top (int): The minimum number of top layers to use when setting up the + field. (default: :math:`\\max(3, \\log_2(N_{\\rm cpu}))`) + max_top (int): The maximum number of top layers to use when setting up the + field. (default: 10) + coords (str): The kind of coordinate system to use. (default self.coords) + logger: A Logger object if desired (default: self.logger) + + Returns: + A `VField` object + """ + if split_method is None: + split_method = get(self.config,'split_method',str,'mean') + if self.z1 is None or self.z2 is None: + raise TypeError("z1,z2 are not defined.") + if logger is None: + logger = self.logger + field = self.zfields(min_size, max_size, split_method, brute, min_top, max_top, coords, + rng=self._rng, logger=logger) + self._field = weakref.ref(field) + return field + def getVField(self, *, min_size=0, max_size=None, split_method=None, brute=False, min_top=None, max_top=10, coords=None, logger=None): """Return a `VField` based on the v1,v2 values in this catalog. @@ -2591,6 +2771,8 @@ def unload(self): self._w = None self._wpos = None self._k = None + self._z1 = None + self._z2 = None self._v1 = None self._v2 = None self._g1 = None @@ -2625,7 +2807,7 @@ def write_patches(self, save_patch_dir=None): """Write the patches to disk as separate files. This can be used in conjunction with ``low_mem=True`` option of `get_patches` (and - implicitly by the various `process ` methods) to only keep + implicitly by the various `process ` methods) to only keep at most two patches in memory at a time. Parameters: @@ -2697,6 +2879,10 @@ def read_patches(self, save_patch_dir=None): kwargs['wpos_col'] = 'wpos' if self._k is not None or self.config.get('k_col','0') != '0': kwargs['k_col'] = 'k' + if self._z1 is not None or self.config.get('z1_col','0') != '0': + kwargs['z1_col'] = 'z1' + if self._z2 is not None or self.config.get('z2_col','0') != '0': + kwargs['z2_col'] = 'z2' if self._v1 is not None or self.config.get('v1_col','0') != '0': kwargs['v1_col'] = 'v1' if self._v2 is not None or self.config.get('v2_col','0') != '0': @@ -2786,6 +2972,8 @@ def get_patches(self, *, low_mem=False): w=self.w[indx] if self.nontrivial_w else None wpos=self.wpos[indx] if self.wpos is not None else None k=self.k[indx] if self.k is not None else None + z1=self.z1[indx] if self.z1 is not None else None + z2=self.z2[indx] if self.z2 is not None else None v1=self.v1[indx] if self.v1 is not None else None v2=self.v2[indx] if self.v2 is not None else None g1=self.g1[indx] if self.g1 is not None else None @@ -2801,7 +2989,8 @@ def get_patches(self, *, low_mem=False): kwargs['dec_units'] = 'rad' kwargs['allow_xyz'] = True p = Catalog(x=x, y=y, z=z, ra=ra, dec=dec, r=r, w=w, wpos=wpos, - k=k, v1=v1, v2=v2, g1=g1, g2=g2, t1=t1, t2=t2, q1=q1, q2=q2, + k=k, z1=z1, z2=z2, v1=v1, v2=v2, g1=g1, g2=g2, + t1=t1, t2=t2, q1=q1, q2=q2, patch=i, npatch=self.npatch, **kwargs) self._patches.append(p) @@ -2842,6 +3031,8 @@ def write(self, file_name, *, file_type=None, precision=None): w self.w if not None and self.nontrivial_w wpos self.wpos if not None k self.k if not None + z1 self.z1 if not None + z2 self.z2 if not None v1 self.v1 if not None v2 self.v2 if not None g1 self.g1 if not None @@ -2892,6 +3083,12 @@ def write(self, file_name, *, file_type=None, precision=None): if self.k is not None: col_names.append('k') columns.append(self.k) + if self.z1 is not None: + col_names.append('z1') + columns.append(self.z1) + if self.z2 is not None: + col_names.append('z2') + columns.append(self.z2) if self.v1 is not None: col_names.append('v1') columns.append(self.v1) @@ -2939,6 +3136,7 @@ def __getstate__(self): d.pop('_field',None) d.pop('_nfields',None) d.pop('_kfields',None) + d.pop('_zfields',None) d.pop('_vfields',None) d.pop('_gfields',None) d.pop('_tfields',None) @@ -2964,6 +3162,8 @@ def __repr__(self): if self.nontrivial_w: s += 'w='+repr(self.w)+',' if self.wpos is not None: s += 'wpos='+repr(self.wpos)+',' if self.k is not None: s += 'k='+repr(self.k)+',' + if self.z1 is not None: s += 'z1='+repr(self.z1)+',' + if self.z2 is not None: s += 'z2='+repr(self.z2)+',' if self.v1 is not None: s += 'v1='+repr(self.v1)+',' if self.v2 is not None: s += 'v2='+repr(self.v2)+',' if self.g1 is not None: s += 'g1='+repr(self.g1)+',' @@ -2995,6 +3195,8 @@ def __eq__(self, other): np.array_equal(self.w, other.w) and np.array_equal(self.wpos, other.wpos) and np.array_equal(self.k, other.k) and + np.array_equal(self.z1, other.z1) and + np.array_equal(self.z2, other.z2) and np.array_equal(self.v1, other.v1) and np.array_equal(self.v2, other.v2) and np.array_equal(self.g1, other.g1) and @@ -3114,6 +3316,29 @@ def calculateVarK(cat_list, *, low_mem=False): # unit tests have small enough N that this matters. return _compute_var_multi_cat(cat_list, 'k', low_mem) +def calculateVarZ(cat_list, *, low_mem=False): + """Calculate the overall variance of the complex scalar field from a list of catalogs. + + The catalogs are assumed to be equivalent, so this is just the average vector + variance (per component) weighted by the number of objects in each catalog. + + Parameters: + cat_list: A Catalog or a list of Catalogs for which to calculate the vector variance. + low_mem: Whether to try to conserve memory when cat is a list by unloading each + catalog after getting its individual varz. [default: False] + + Returns: + The variance per component of the vector field. + """ + if isinstance(cat_list, Catalog): + return cat_list.varz + elif len(cat_list) == 1: + return cat_list[0].varz + else: + varz1 = _compute_var_multi_cat(cat_list, 'z1', low_mem) + varz2 = _compute_var_multi_cat(cat_list, 'z2', low_mem) + return (varz1 + varz2)/2. + def calculateVarV(cat_list, *, low_mem=False): """Calculate the overall variance of the vector field from a list of catalogs. @@ -3221,12 +3446,31 @@ def isKColRequired(config, num): False if not. """ - return config and ( 'kk_file_name' in config - or (num==0 and 'kv_file_name' in config) - or (num==0 and 'kg_file_name' in config) - or (num==0 and 'kt_file_name' in config) - or (num==0 and 'kq_file_name' in config) - or (num==1 and 'nk_file_name' in config) ) + return config and ('kk_file_name' in config + or (num==0 and 'kv_file_name' in config) + or (num==0 and 'kg_file_name' in config) + or (num==0 and 'kt_file_name' in config) + or (num==0 and 'kq_file_name' in config) + or (num==1 and 'nk_file_name' in config)) + +def isZColRequired(config, num): + """A quick helper function that checks whether we need to bother reading the z1,z2 columns. + + The logic here is the same as for `isGColRequired`, but we check for output files that require + the z1,z2 columns rather than g1,g2. + + Parameters: + config (dict): The configuration file to check. + num (int): Which number catalog are we working on. + + Returns: + True if some output file requires this catalog to have valid z1/z2 columns, + False if not. + + """ + return config and ('zz_file_name' in config + or (num==1 and 'nz_file_name' in config) + or (num==1 and 'kz_file_name' in config)) def isVColRequired(config, num): """A quick helper function that checks whether we need to bother reading the v1,v2 columns. @@ -3243,9 +3487,9 @@ def isVColRequired(config, num): False if not. """ - return config and ( 'vv_file_name' in config - or (num==1 and 'nv_file_name' in config) - or (num==1 and 'kv_file_name' in config) ) + return config and ('vv_file_name' in config + or (num==1 and 'nv_file_name' in config) + or (num==1 and 'kv_file_name' in config)) def isGColRequired(config, num): """A quick helper function that checks whether we need to bother reading the g1,g2 columns. @@ -3271,12 +3515,12 @@ def isGColRequired(config, num): False if not. """ - return config and ( 'gg_file_name' in config - or 'm2_file_name' in config - or (num==1 and 'norm_file_name' in config) - or (num==1 and 'ng_file_name' in config) - or (num==1 and 'nm_file_name' in config) - or (num==1 and 'kg_file_name' in config) ) + return config and ('gg_file_name' in config + or 'm2_file_name' in config + or (num==1 and 'norm_file_name' in config) + or (num==1 and 'ng_file_name' in config) + or (num==1 and 'nm_file_name' in config) + or (num==1 and 'kg_file_name' in config)) def isTColRequired(config, num): """A quick helper function that checks whether we need to bother reading the t1,t2 columns. @@ -3292,9 +3536,9 @@ def isTColRequired(config, num): True if some output file requires this catalog to have valid t1/t2 columns, False if not. """ - return config and ( 'tt_file_name' in config - or (num==1 and 'nt_file_name' in config) - or (num==1 and 'kt_file_name' in config) ) + return config and ('tt_file_name' in config + or (num==1 and 'nt_file_name' in config) + or (num==1 and 'kt_file_name' in config)) def isQColRequired(config, num): """A quick helper function that checks whether we need to bother reading the q1,q2 columns. @@ -3310,6 +3554,6 @@ def isQColRequired(config, num): True if some output file requires this catalog to have valid q1/q2 columns, False if not. """ - return config and ( 'qq_file_name' in config - or (num==1 and 'nq_file_name' in config) - or (num==1 and 'kq_file_name' in config) ) + return config and ('qq_file_name' in config + or (num==1 and 'nq_file_name' in config) + or (num==1 and 'kq_file_name' in config)) diff --git a/treecorr/config.py b/treecorr/config.py index 71b95332..cadaf843 100644 --- a/treecorr/config.py +++ b/treecorr/config.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following diff --git a/treecorr/corr2base.py b/treecorr/corr2base.py index 24a38476..bf372d39 100644 --- a/treecorr/corr2base.py +++ b/treecorr/corr2base.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following @@ -25,6 +25,7 @@ from . import _treecorr from .config import merge_config, setup_logger, get, make_minimal_config from .util import parse_metric, metric_enum, coord_enum, set_omp_threads, lazy_property +from .util import make_reader from .catalog import Catalog class Namespace(object): @@ -229,6 +230,9 @@ class Corr2(object): # this is unnecessary, so we override it in those classes. _default_angle_slop = 0.1 + # A dict pointing from _letters to cls. E.g. _lookup_dict['GG'] = GGCorrelation + _lookup_dict = {} + _valid_params = { 'nbins' : (int, False, None, None, 'The number of output bins to use.'), @@ -506,6 +510,21 @@ def __init__(self, config=None, *, logger=None, rng=None, **kwargs): self.npatch1 = self.npatch2 = 1 self._rng = rng + self.meanr = np.zeros_like(self.rnom, dtype=float) + self.meanlogr = np.zeros_like(self.rnom, dtype=float) + self.weight = np.zeros_like(self.rnom, dtype=float) + self.npairs = np.zeros_like(self.rnom, dtype=float) + self._varxi = None + self._cov = None + self._var_num = 0 + self._processed_cats1 = [] + self._processed_cats2 = [] + + def __init_subclass__(cls): + super().__init_subclass__() + if hasattr(cls, '_letters'): + Corr2._lookup_dict[cls._letters] = cls + @property def rng(self): if self._rng is None: @@ -617,6 +636,65 @@ def cov_diag(self): else: return self._cov.diagonal() + @property + def corr(self): + if self._corr is None: + x = np.array([]) + self._corr = self._builder(self._bintype, self._min_sep, self._max_sep, self._nbins, + self._bin_size, self.b, self.angle_slop, + self.min_rpar, self.max_rpar, + self.xperiod, self.yperiod, self.zperiod, + self._xi1, self._xi2, self._xi3, self._xi4, + self.meanr, self.meanlogr, self.weight, self.npairs) + return self._corr + + def __eq__(self, other): + """Return whether two Correlation instances are equal""" + return (isinstance(other, self.__class__) and + self.nbins == other.nbins and + self.bin_size == other.bin_size and + self.min_sep == other.min_sep and + self.max_sep == other.max_sep and + self.sep_units == other.sep_units and + self.coords == other.coords and + self.bin_type == other.bin_type and + self.bin_slop == other.bin_slop and + self.angle_slop == other.angle_slop and + self.min_rpar == other.min_rpar and + self.max_rpar == other.max_rpar and + self.xperiod == other.xperiod and + self.yperiod == other.yperiod and + self.zperiod == other.zperiod and + np.array_equal(self.meanr, other.meanr) and + np.array_equal(self.meanlogr, other.meanlogr) and + np.array_equal(self.weight, other.weight) and + np.array_equal(self.npairs, other.npairs) and + np.array_equal(self._xi1, other._xi1) and + np.array_equal(self._xi2, other._xi2) and + np.array_equal(self._xi3, other._xi3) and + np.array_equal(self._xi4, other._xi4)) + + def copy(self): + """Make a copy""" + ret = self.__class__.__new__(self.__class__) + for key, item in self.__dict__.items(): + if isinstance(item, np.ndarray): + # Only items that might change need to by deep copied. + ret.__dict__[key] = item.copy() + else: + # For everything else, shallow copy is fine. + # In particular don't deep copy config or logger + # Most of the rest are scalars, which copy fine this way. + # And the read-only things are all in _ro. + # The results dict is trickier. We rely on it being copied in places, but we + # never add more to it after the copy, so shallow copy is fine. + ret.__dict__[key] = item + ret._corr = None # We'll want to make a new one of these if we need it. + return ret + + def __repr__(self): + return f'{self._cls}({self._repr_kwargs})' + def __getstate__(self): d = self.__dict__.copy() d.pop('_corr',None) @@ -937,6 +1015,222 @@ def getWeight(self): """ return self.weight.ravel() + def _process_auto(self, cat, metric=None, num_threads=None): + # This is only valid for some classes, but it common enough that we do the implementation + # here and only when appropriate define the non underscore version. + if cat.name == '': + self.logger.info(f'Starting process {self._letters} auto-correlations') + else: + self.logger.info(f'Starting process {self._letters} auto-correlations for cat %s.', + cat.name) + + self._set_metric(metric, cat.coords) + self._set_num_threads(num_threads) + min_size, max_size = self._get_minmax_size() + + getField = getattr(cat, f"get{self._letter1}Field") + field = getField(min_size=min_size, max_size=max_size, + split_method=self.split_method, brute=bool(self.brute), + min_top=self.min_top, max_top=self.max_top, + coords=self.coords) + + self.logger.info('Starting %d jobs.',field.nTopLevelNodes) + self.corr.processAuto(field.data, self.output_dots, self._metric) + + def process_cross(self, cat1, cat2, *, metric=None, num_threads=None): + """Process a single pair of catalogs, accumulating the cross-correlation. + + This accumulates the weighted sums into the bins, but does not finalize + the calculation by dividing by the total weight at the end. After + calling this function as often as desired, the ``finalize`` command will + finish the calculation. + + Parameters: + cat1 (Catalog): The first catalog to process + cat2 (Catalog): The second catalog to process + metric (str): Which metric to use. See `Metrics` for details. + (default: 'Euclidean'; this value can also be given in the + constructor in the config dict.) + num_threads (int): How many OpenMP threads to use during the calculation. + (default: use the number of cpu cores; this value can also be given + in the constructor in the config dict.) + """ + if cat1.name == '' and cat2.name == '': + self.logger.info('Starting process %s%s cross-correlations', + self._letter1, self._letter2) + else: + self.logger.info('Starting process %s%s cross-correlations for cats %s, %s.', + self._letter1, self._letter2, cat1.name, cat2.name) + + self._set_metric(metric, cat1.coords, cat2.coords) + self._set_num_threads(num_threads) + min_size, max_size = self._get_minmax_size() + + getField1 = getattr(cat1, f"get{self._letter1}Field") + f1 = getField1(min_size=min_size, max_size=max_size, + split_method=self.split_method, + brute=self.brute is True or self.brute == 1, + min_top=self.min_top, max_top=self.max_top, + coords=self.coords) + getField2 = getattr(cat2, f"get{self._letter2}Field") + f2 = getField2(min_size=min_size, max_size=max_size, + split_method=self.split_method, + brute=self.brute is True or self.brute == 2, + min_top=self.min_top, max_top=self.max_top, + coords=self.coords) + + self.logger.info('Starting %d jobs.',f1.nTopLevelNodes) + self.corr.processCross(f1.data, f2.data, self.output_dots, self._metric) + + def process(self, cat1, cat2=None, metric=None, num_threads=None, comm=None, low_mem=False, + initialize=True, finalize=True, patch_method='global'): + """Compute the correlation function. + + - If only 1 argument is given, then compute an auto-correlation function. + - If 2 arguments are given, then compute a cross-correlation function. + + Both arguments may be lists, in which case all items in the list are used + for that element of the correlation. + + Parameters: + cat1 (Catalog): A catalog or list of catalogs for the first field. + cat2 (Catalog): A catalog or list of catalogs for the second field, if any. + (default: None) + metric (str): Which metric to use. See `Metrics` for details. + (default: 'Euclidean'; this value can also be given in the + constructor in the config dict.) + num_threads (int): How many OpenMP threads to use during the calculation. + (default: use the number of cpu cores; this value can also be given + in the constructor in the config dict.) + comm (mpi4py.Comm): If running MPI, an mpi4py Comm object to communicate between + processes. If used, the rank=0 process will have the final + computation. This only works if using patches. (default: None) + low_mem (bool): Whether to sacrifice a little speed to try to reduce memory usage. + This only works if using patches. (default: False) + initialize (bool): Whether to begin the calculation with a call to + `Corr2.clear`. (default: True) + finalize (bool): Whether to complete the calculation with a call to finalize. + (default: True) + patch_method (str): Which patch method to use. (default: 'global') + """ + import math + + if self._letter1 != self._letter2 and cat2 is None: + raise TypeError(f"cat2 is required for {self._cls}.process") + if initialize: + self.clear() + self._processed_cats1.clear() + self._processed_cats2.clear() + + if patch_method not in ['local', 'global']: + raise ValueError("Invalid patch_method %s"%patch_method) + local = patch_method == 'local' + + if not isinstance(cat1,list): + cat1 = cat1.get_patches(low_mem=low_mem) + if cat2 is not None and not isinstance(cat2,list): + cat2 = cat2.get_patches(low_mem=low_mem) + + if cat2 is None: + self._process_all_auto(cat1, metric, num_threads, comm, low_mem, local) + else: + self._process_all_cross(cat1, cat2, metric, num_threads, comm, low_mem, local) + + self._processed_cats1.extend(cat1) + if cat2 is not None: + self._processed_cats2.extend(cat2) + if finalize: + if cat2 is None: + var1 = var2 = self._calculateVar1(self._processed_cats1, low_mem=low_mem) + if var1 is not None: + self.logger.info(f"var%s = %f: {self._sig1} = %f", + self._letter1.lower(), var1, math.sqrt(var1)) + else: + var1 = self._calculateVar1(self._processed_cats1, low_mem=low_mem) + var2 = self._calculateVar2(self._processed_cats2, low_mem=low_mem) + if self._letter1 == self._letter2: + if var1 is not None: + self.logger.info(f"var%s1 = %f: {self._sig1} = %f", + self._letter1, var1, math.sqrt(var1)) + self.logger.info(f"var%s2 = %f: {self._sig2} = %f", + self._letter2, var2, math.sqrt(var2)) + else: + if var1 is not None: + self.logger.info(f"var%s = %f: {self._sig1} = %f", + self._letter1, var1, math.sqrt(var1)) + self.logger.info(f"var%s = %f: {self._sig2} = %f", + self._letter2, var2, math.sqrt(var2)) + if var1 is None: + if var2 is None: + self.finalize() + else: + self.finalize(var2) + else: + self.finalize(var1, var2) + self._processed_cats1.clear() + self._processed_cats2.clear() + + def _finalize(self): + mask1 = self.weight != 0 + mask2 = self.weight == 0 + + if len(self._xi1) > 0: + self._xi1[mask1] /= self.weight[mask1] + if len(self._xi2) > 0: + self._xi2[mask1] /= self.weight[mask1] + if len(self._xi3) > 0: + self._xi3[mask1] /= self.weight[mask1] + self._xi4[mask1] /= self.weight[mask1] + + self.meanr[mask1] /= self.weight[mask1] + self.meanlogr[mask1] /= self.weight[mask1] + + # Update the units of meanlogr + self._apply_units(mask1) + + # Use meanlogr when available, but set to nominal when no pairs in bin. + self.meanr[mask2] = self.rnom[mask2] + self.meanlogr[mask2] = self.logr[mask2] + + def _clear(self): + """Clear the data vectors + """ + self._xi1.ravel()[:] = 0 + self._xi2.ravel()[:] = 0 + self._xi3.ravel()[:] = 0 + self._xi4.ravel()[:] = 0 + self.meanr.ravel()[:] = 0 + self.meanlogr.ravel()[:] = 0 + self.weight.ravel()[:] = 0 + self.npairs.ravel()[:] = 0 + self._cov = None + + def __iadd__(self, other): + """Add a second Correlation object's data to this one. + + .. note:: + + For this to make sense, both objects should not have had ``finalize`` called yet. + Then, after adding them together, you should call ``finalize`` on the sum. + """ + if not isinstance(other, self.__class__): + raise TypeError(f"Can only add another {self._cls} object") + if not (self._nbins == other._nbins and + self.min_sep == other.min_sep and + self.max_sep == other.max_sep): + raise ValueError(f"{self._cls} to be added is not compatible with this one.") + + self._set_metric(other.metric, other.coords, other.coords) + self._xi1[:] += other._xi1 + self._xi2[:] += other._xi2 + self._xi3[:] += other._xi3 + self._xi4[:] += other._xi4 + self.meanr[:] += other.meanr + self.meanlogr[:] += other.meanlogr + self.weight[:] += other.weight + self.npairs[:] += other.npairs + return self + def estimate_cov(self, method, *, func=None, comm=None): """Estimate the covariance matrix based on the data @@ -1399,6 +1693,14 @@ def make_gen(self): def _bootstrap_pairs(self, index): return self.BootstrapPairIterator(self.results, self.npatch1, self.npatch2, index, self._ok) + @property + def _write_params(self): + params = make_minimal_config(self.config, Corr2._valid_params) + # Add in a couple other things we want to preserve that aren't construction kwargs. + params['coords'] = self.coords + params['metric'] = self.metric + return params + def _write(self, writer, name, write_patch_results, write_cov=False, zero_tot=False): if name is None and (write_patch_results or write_cov): # HDF doesn't work right with multiple groups unless they all have non-empty names. @@ -1407,6 +1709,7 @@ def _write(self, writer, name, write_patch_results, write_cov=False, zero_tot=Fa col_names = self._write_col_names data = self._write_data params = self._write_params + params['corr'] = self._letters if write_patch_results: # Note: Only include npatch1, npatch2 in serialization if we are also serializing @@ -1478,6 +1781,91 @@ def _read(self, reader, name=None, params=None): cov_shape = eval(cov_shape) self._cov = reader.read_array(cov_shape, ext='cov') + def _read_from_data(self, data, params): + s = self.logr.shape + self.meanr = data['meanr'].reshape(s) + self.meanlogr = data['meanlogr'].reshape(s) + self.npairs = data['npairs'].reshape(s) + self.coords = params['coords'].strip() + self.metric = params['metric'].strip() + self.npatch1 = params.get('npatch1', 1) + self.npatch2 = params.get('npatch2', 1) + + def read(self, file_name, *, file_type=None): + """Read in values from a file. + + This should be a file that was written by TreeCorr, preferably a FITS or HDF5 file, so + there is no loss of information. + + .. warning:: + + The current object should be constructed with the same configuration parameters as + the one being read. e.g. the same min_sep, max_sep, etc. This is not checked by + the read function. + + Parameters: + file_name (str): The name of the file to read in. + file_type (str): The type of file ('ASCII' or 'FITS'). (default: determine the type + automatically from the extension of file_name.) + """ + self.logger.info(f'Reading {self._letters} correlations from %s',file_name) + with make_reader(file_name, file_type, self.logger) as reader: + self._read(reader) + + @classmethod + def from_file(cls, file_name, *, file_type=None, logger=None, rng=None): + """Create a new instance from an output file. + + This should be a file that was written by TreeCorr. + + .. note:: + + This classmethod may be called either using the base class or the class type that + wrote the file. E.g. if the file was written by `GGCorrelation`, then either + of the following would work and be equivalent: + + >>> gg = treecorr.GGCorrelation.from_file(file_name) + >>> gg = treecorr.Corr2.from_file(file_name) + + Parameters: + file_name (str): The name of the file to read in. + file_type (str): The type of file ('ASCII', 'FITS', or 'HDF'). (default: determine + the type automatically from the extension of file_name.) + logger (Logger): If desired, a logger object to use for logging. (default: None) + rng (RandomState): If desired, a numpy.random.RandomState instance to use for bootstrap + random number generation. (default: None) + + Returns: + corr: A Correlation object, constructed from the information in the file. + """ + if cls is Corr2: + # Then need to figure out what class to make first. + with make_reader(file_name, file_type, logger) as reader: + name = 'main' if 'main' in reader else None + params = reader.read_params(ext=name) + letters = params.get('corr', None) + if letters not in Corr2._lookup_dict: + raise OSError("%s does not seem to be a valid treecorr output file."%file_name) + cls = Corr2._lookup_dict[letters] + return cls.from_file(file_name, file_type=file_type, logger=logger, rng=rng) + if logger: + logger.info(f'Building {cls._cls} from %s', file_name) + with make_reader(file_name, file_type, logger) as reader: + name = 'main' if 'main' in reader else None + params = reader.read_params(ext=name) + letters = params.get('corr', None) + if letters not in Corr2._lookup_dict: + raise OSError("%s does not seem to be a valid treecorr output file."%file_name) + if params['corr'] != cls._letters: + raise OSError("Trying to read a %sCorrelation output file with %s"%( + params['corr'], cls.__name__)) + kwargs = make_minimal_config(params, Corr2._valid_params) + corr = cls(**kwargs, logger=logger, rng=rng) + corr.logger.info(f'Reading {cls._letters} correlations from %s', file_name) + corr._read(reader, name=name, params=params) + return corr + + def estimate_multi_cov(corrs, method, *, func=None, comm=None): """Estimate the covariance matrix of multiple statistics. diff --git a/treecorr/corr3base.py b/treecorr/corr3base.py index 225f404d..a391f920 100644 --- a/treecorr/corr3base.py +++ b/treecorr/corr3base.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following diff --git a/treecorr/exec_corr2.py b/treecorr/exec_corr2.py index 9b5183d6..a635cd3c 100644 --- a/treecorr/exec_corr2.py +++ b/treecorr/exec_corr2.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following @@ -22,6 +22,9 @@ from .nncorrelation import NNCorrelation from .nkcorrelation import NKCorrelation from .kkcorrelation import KKCorrelation +from .nzcorrelation import NZCorrelation +from .kzcorrelation import KZCorrelation +from .zzcorrelation import ZZCorrelation from .nvcorrelation import NVCorrelation from .kvcorrelation import KVCorrelation from .vvcorrelation import VVCorrelation @@ -72,15 +75,25 @@ 'nk_file_name' : (str, False, None, None, 'The output filename for count-scalar correlation function.'), 'nk_statistic' : (str, False, None, ['compensated', 'simple'], - 'Which statistic to use for the xi estimator of the NK correlation function. ', + 'Which statistic to use for the estimator of the NK correlation function. ', 'The default is compensated if rand_files is given, otherwise simple'), 'kk_file_name' : (str, False, None, None, 'The output filename for scalar-scalar correlation function.'), + 'nz_file_name' : (str, False, None, None, + 'The output filename for point-spin-0 correlation function.'), + 'nz_statistic' : (str, False, None, ['compensated', 'simple'], + 'Which statistic to use for the estimator of the NZ correlation function. ', + 'The default is compensated if rand_files is given, otherwise simple'), + 'kz_file_name' : (str, False, None, None, + 'The output filename for scalar-spin-0 correlation function.'), + 'zz_file_name' : (str, False, None, None, + 'The output filename for spin-0-spin-0 correlation function.'), + 'nv_file_name' : (str, False, None, None, 'The output filename for point-vector correlation function.'), 'nv_statistic' : (str, False, None, ['compensated', 'simple'], - 'Which statistic to use for the mean vector estimator of the NV correlation function. ', + 'Which statistic to use for the estimator of the NV correlation function. ', 'The default is compensated if rand_files is given, otherwise simple'), 'kv_file_name' : (str, False, None, None, 'The output filename for scalar-vector correlation function.'), @@ -90,7 +103,7 @@ 'ng_file_name' : (str, False, None, None, 'The output filename for point-shear correlation function.'), 'ng_statistic' : (str, False, None, ['compensated', 'simple'], - 'Which statistic to use for the mean shear estimator of the NG correlation function. ', + 'Which statistic to use for the estimator of the NG correlation function. ', 'The default is compensated if rand_files is given, otherwise simple'), 'kg_file_name' : (str, False, None, None, 'The output filename for scalar-shear correlation function.'), @@ -98,24 +111,24 @@ 'The output filename for shear-shear correlation function.'), 'nt_file_name' : (str, False, None, None, - 'The output filename for point-shear correlation function.'), + 'The output filename for point-spin-3 correlation function.'), 'nt_statistic' : (str, False, None, ['compensated', 'simple'], - 'Which statistic to use for the mean shear estimator of the NG correlation function. ', + 'Which statistic to use for the estimator of the NT correlation function. ', 'The default is compensated if rand_files is given, otherwise simple'), 'kt_file_name' : (str, False, None, None, - 'The output filename for scalar-shear correlation function.'), + 'The output filename for scalar-spin-3 correlation function.'), 'tt_file_name' : (str, False, None, None, - 'The output filename for shear-shear correlation function.'), + 'The output filename for spin-3-spin-3 correlation function.'), 'nq_file_name' : (str, False, None, None, - 'The output filename for point-shear correlation function.'), + 'The output filename for point-spin-4 correlation function.'), 'nq_statistic' : (str, False, None, ['compensated', 'simple'], - 'Which statistic to use for the mean shear estimator of the NG correlation function. ', + 'Which statistic to use for the estimator of the NQ correlation function. ', 'The default is compensated if rand_files is given, otherwise simple'), 'kq_file_name' : (str, False, None, None, - 'The output filename for scalar-shear correlation function.'), + 'The output filename for scalar-spin-4 correlation function.'), 'qq_file_name' : (str, False, None, None, - 'The output filename for shear-shear correlation function.'), + 'The output filename for spin-4-spin-4 correlation function.'), # Derived output quantities @@ -157,6 +170,9 @@ def corr2(config, logger=None): # Also convert the given parameters to the correct type, etc. config = check_config(config, corr2_valid_params, corr2_aliases, logger) + # Mark that we are running the corr2 function. + config['corr2'] = True + import pprint logger.debug('Using configuration dict:\n%s',pprint.pformat(config)) @@ -327,6 +343,48 @@ def corr2(config, logger=None): gg.writeMapSq(config['m2_file_name'], m2_uform=config['m2_uform']) logger.warning("Wrote Mapsq values to %s",config['m2_file_name']) + # Do NZ correlation function if necessary + if 'nz_file_name' in config: + if cat2 is None: + raise TypeError("file_name2 is required for nz correlation") + logger.warning("Performing NZ calculations...") + nz = NZCorrelation(config, logger=logger) + nz.process(cat1,cat2) + logger.info("Done NZ calculation.") + + # The default nz_statistic is compensated _iff_ rand files are given. + rz = None + if rand1 is None: + if config.get('nz_statistic',None) == 'compensated': + raise TypeError("rand_files is required for nz_statistic = compensated") + elif config.get('nz_statistic','compensated') == 'compensated': + rz = NZCorrelation(config, logger=logger) + rz.process(rand1,cat2) + logger.info("Done RZ calculation.") + + nz.write(config['nz_file_name'], rz=rz) + logger.warning("Wrote NZ correlation to %s",config['nz_file_name']) + + # Do KZ correlation function if necessary + if 'kz_file_name' in config: + if cat2 is None: + raise TypeError("file_name2 is required for kz correlation") + logger.warning("Performing KZ calculations...") + kz = KZCorrelation(config, logger=logger) + kz.process(cat1,cat2) + logger.info("Done KZ calculation.") + kz.write(config['kz_file_name']) + logger.warning("Wrote KZ correlation to %s",config['kz_file_name']) + + # Do ZZ correlation function if necessary + if 'zz_file_name' in config: + logger.warning("Performing ZZ calculations...") + zz = ZZCorrelation(config, logger=logger) + zz.process(cat1,cat2) + logger.info("Done ZZ calculations.") + zz.write(config['zz_file_name']) + logger.warning("Wrote ZZ correlation to %s",config['zz_file_name']) + # Do NV correlation function if necessary if 'nv_file_name' in config: if cat2 is None: diff --git a/treecorr/exec_corr3.py b/treecorr/exec_corr3.py index 1a8ce000..549e2792 100644 --- a/treecorr/exec_corr3.py +++ b/treecorr/exec_corr3.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following diff --git a/treecorr/field.py b/treecorr/field.py index a5feec10..ff349956 100644 --- a/treecorr/field.py +++ b/treecorr/field.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following @@ -76,6 +76,8 @@ class Field(object): - `KField` describes a field of points sampling a scalar field (e.g. kappa in the weak lensing context). In addition to the above values, cells keep track of the mean value in the given region. + - `ZField` describes a complex scalar (spin-0) field. In addition to the above values, + cells keep track of the mean (complex) value in the given region. - `VField` describes a field of points sampling a vector (spin-1) field. In addition to the above values, cells keep track of the mean (complex) vector in the given region. - `GField` describes a field of points sampling a spinor (spin-2) field (e.g. gamma in the @@ -633,6 +635,69 @@ def __init__(self, cat, *, min_size=0, max_size=None, split_method='mean', brute logger.debug('Finished building KField (%s)',self.coords) +class ZField(Field): + r"""This class stores the values of a complex scalar (spin-0) field in a tree structure + from which it is efficient to compute correlation functions. + + A ZField is typically created from a Catalog object using + + >>> zfield = cat.getZField(min_size, max_size, b) + + Parameters: + cat (Catalog): The catalog from which to make the field. + min_size (float): The minimum radius cell required (usually min_sep). (default: 0) + max_size (float): The maximum radius cell required (usually max_sep). (default: None) + split_method (str): Which split method to use ('mean', 'median', 'middle', or 'random'). + (default: 'mean') + brute (bool): Whether to force traversal to the leaves for this field. + (default: False) + min_top (int): The minimum number of top layers to use when setting up the field. + (default: :math:`\max(3, \log_2(N_{\rm cpu}))`) + max_top (int): The maximum number of top layers to use when setting up the field. + (default: 10) + coords (str): The kind of coordinate system to use. (default: cat.coords) + rng (RandomState): If desired, a numpy.random.RandomState instance to use for random + number generation. (default: None) + logger (Logger): A logger file if desired. (default: None) + """ + def __init__(self, cat, *, min_size=0, max_size=None, split_method='mean', brute=False, + min_top=None, max_top=10, coords=None, rng=None, logger=None): + if logger: + if cat.name != '': + logger.info('Building ZField from cat %s',cat.name) + else: + logger.info('Building ZField') + + self._cat = weakref.ref(cat) + self.ntot = cat.ntot + self.min_size = float(min_size) if not brute else 0. + self.max_size = float(max_size) if max_size is not None else np.inf + self.split_method = split_method + self._sm = _parse_split_method(split_method) + self.brute = bool(brute) + self.min_top, self.max_top = self._determine_top(min_top, max_top) + self.coords = coords if coords is not None else cat.coords + self._coords = coord_enum(self.coords) # These are the C++-layer enums + seed = 0 if rng is None else int(rng.random_sample() * 2**63) + + zx = cat.z if cat.z is not None else np.array([]) + wpx = cat.wpos if cat.wpos is not None else np.array([]) + if self._coords == _treecorr.Flat: + self.data = _treecorr.ZFieldFlat(cat.x, cat.y, zx, cat.z1, cat.z2, cat.w, wpx, + self.min_size, self.max_size, self._sm, seed, + self.brute, self.min_top, self.max_top) + elif self._coords == _treecorr.Sphere: + self.data = _treecorr.ZFieldSphere(cat.x, cat.y, zx, cat.z1, cat.z2, cat.w, wpx, + self.min_size, self.max_size, self._sm, seed, + self.brute, self.min_top, self.max_top) + else: + self.data = _treecorr.ZFieldThreeD(cat.x, cat.y, zx, cat.z1, cat.z2, cat.w, wpx, + self.min_size, self.max_size, self._sm, seed, + self.brute, self.min_top, self.max_top) + if logger: + logger.debug('Finished building ZField (%s)',self.coords) + + class VField(Field): r"""This class stores the values of a vector field in a tree structure from which it is efficient to compute correlation functions. diff --git a/treecorr/ggcorrelation.py b/treecorr/ggcorrelation.py index 41fe33ee..6aa20871 100644 --- a/treecorr/ggcorrelation.py +++ b/treecorr/ggcorrelation.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following @@ -19,12 +19,12 @@ from . import _treecorr from .catalog import calculateVarG -from .corr2base import Corr2 +from .zzcorrelation import BaseZZCorrelation from .util import make_writer, make_reader from .config import make_minimal_config -class GGCorrelation(Corr2): +class GGCorrelation(BaseZZCorrelation): r"""This class handles the calculation and storage of a 2-point shear-shear correlation function. @@ -73,9 +73,9 @@ class GGCorrelation(Corr2): .. note:: - If you separate out the steps of the `process` command and use `process_auto` and/or - `process_cross`, then the units will not be applied to ``meanr`` or ``meanlogr`` until - the `finalize` function is called. + If you separate out the steps of the `Corr2.process` command and use + `BaseZZCorrelation.process_auto` and/or `Corr2.process_cross`, then the units will not be + applied to ``meanr`` or ``meanlogr`` until the `finalize` function is called. The typical usage pattern is as follows: @@ -96,484 +96,31 @@ class GGCorrelation(Corr2): **kwargs: See the documentation for `Corr2` for the list of allowed keyword arguments, which may be passed either directly or in the config dict. """ + _cls = 'GGCorrelation' + _letter1 = 'G' + _letter2 = 'G' + _letters = 'GG' + _builder = _treecorr.GGCorr + _calculateVar1 = staticmethod(calculateVarG) + _calculateVar2 = staticmethod(calculateVarG) + def __init__(self, config=None, *, logger=None, **kwargs): """Initialize `GGCorrelation`. See class doc for details. """ - Corr2.__init__(self, config, logger=logger, **kwargs) - - self.xip = np.zeros_like(self.rnom, dtype=float) - self.xim = np.zeros_like(self.rnom, dtype=float) - self.xip_im = np.zeros_like(self.rnom, dtype=float) - self.xim_im = np.zeros_like(self.rnom, dtype=float) - self.meanr = np.zeros_like(self.rnom, dtype=float) - self.meanlogr = np.zeros_like(self.rnom, dtype=float) - self.weight = np.zeros_like(self.rnom, dtype=float) - self.npairs = np.zeros_like(self.rnom, dtype=float) - self._varxip = None - self._varxim = None - self._cov = None - self._var_num = 0 - self._processed_cats1 = [] - self._processed_cats2 = [] - self.logger.debug('Finished building GGCorr') - - @property - def corr(self): - if self._corr is None: - self._corr = _treecorr.GGCorr(self._bintype, self._min_sep, self._max_sep, self._nbins, - self._bin_size, self.b, self.angle_slop, - self.min_rpar, self.max_rpar, - self.xperiod, self.yperiod, self.zperiod, - self.xip, self.xip_im, self.xim, self.xim_im, - self.meanr, self.meanlogr, self.weight, self.npairs) - return self._corr - - def __eq__(self, other): - """Return whether two `GGCorrelation` instances are equal""" - return (isinstance(other, GGCorrelation) and - self.nbins == other.nbins and - self.bin_size == other.bin_size and - self.min_sep == other.min_sep and - self.max_sep == other.max_sep and - self.sep_units == other.sep_units and - self.coords == other.coords and - self.bin_type == other.bin_type and - self.bin_slop == other.bin_slop and - self.angle_slop == other.angle_slop and - self.min_rpar == other.min_rpar and - self.max_rpar == other.max_rpar and - self.xperiod == other.xperiod and - self.yperiod == other.yperiod and - self.zperiod == other.zperiod and - np.array_equal(self.meanr, other.meanr) and - np.array_equal(self.meanlogr, other.meanlogr) and - np.array_equal(self.xip, other.xip) and - np.array_equal(self.xim, other.xim) and - np.array_equal(self.xip_im, other.xip_im) and - np.array_equal(self.xim_im, other.xim_im) and - np.array_equal(self.varxip, other.varxip) and - np.array_equal(self.varxim, other.varxim) and - np.array_equal(self.weight, other.weight) and - np.array_equal(self.npairs, other.npairs)) - - def copy(self): - """Make a copy""" - ret = GGCorrelation.__new__(GGCorrelation) - for key, item in self.__dict__.items(): - if isinstance(item, np.ndarray): - # Only items that might change need to by deep copied. - ret.__dict__[key] = item.copy() - else: - # For everything else, shallow copy is fine. - # In particular don't deep copy config or logger - # Most of the rest are scalars, which copy fine this way. - # And the read-only things are all in _ro. - # The results dict is trickier. We rely on it being copied in places, but we - # never add more to it after the copy, so shallow copy is fine. - ret.__dict__[key] = item - ret._corr = None # We'll want to make a new one of these if we need it. - return ret - - def __repr__(self): - return f'GGCorrelation({self._repr_kwargs})' - - def process_auto(self, cat, *, metric=None, num_threads=None): - """Process a single catalog, accumulating the auto-correlation. - - This accumulates the weighted sums into the bins, but does not finalize - the calculation by dividing by the total weight at the end. After - calling this function as often as desired, the `finalize` command will - finish the calculation. - - Parameters: - cat (Catalog): The catalog to process - metric (str): Which metric to use. See `Metrics` for details. - (default: 'Euclidean'; this value can also be given in the - constructor in the config dict.) - num_threads (int): How many OpenMP threads to use during the calculation. - (default: use the number of cpu cores; this value can also be given - in the constructor in the config dict.) - """ - if cat.name == '': - self.logger.info('Starting process GG auto-correlations') - else: - self.logger.info('Starting process GG auto-correlations for cat %s.',cat.name) - - self._set_metric(metric, cat.coords) - self._set_num_threads(num_threads) - min_size, max_size = self._get_minmax_size() - - field = cat.getGField(min_size=min_size, max_size=max_size, - split_method=self.split_method, - brute=bool(self.brute), - min_top=self.min_top, max_top=self.max_top, - coords=self.coords) - - self.logger.info('Starting %d jobs.',field.nTopLevelNodes) - self.corr.processAuto(field.data, self.output_dots, self._metric) - - - def process_cross(self, cat1, cat2, *, metric=None, num_threads=None): - """Process a single pair of catalogs, accumulating the cross-correlation. - - This accumulates the weighted sums into the bins, but does not finalize - the calculation by dividing by the total weight at the end. After - calling this function as often as desired, the `finalize` command will - finish the calculation. - - Parameters: - cat1 (Catalog): The first catalog to process - cat2 (Catalog): The second catalog to process - metric (str): Which metric to use. See `Metrics` for details. - (default: 'Euclidean'; this value can also be given in the - constructor in the config dict.) - num_threads (int): How many OpenMP threads to use during the calculation. - (default: use the number of cpu cores; this value can also be given - in the constructor in the config dict.) - """ - if cat1.name == '' and cat2.name == '': - self.logger.info('Starting process GG cross-correlations') - else: - self.logger.info('Starting process GG cross-correlations for cats %s, %s.', - cat1.name, cat2.name) - - self._set_metric(metric, cat1.coords, cat2.coords) - self._set_num_threads(num_threads) - min_size, max_size = self._get_minmax_size() - - f1 = cat1.getGField(min_size=min_size, max_size=max_size, - split_method=self.split_method, - brute=self.brute is True or self.brute == 1, - min_top=self.min_top, max_top=self.max_top, - coords=self.coords) - f2 = cat2.getGField(min_size=min_size, max_size=max_size, - split_method=self.split_method, - brute=self.brute is True or self.brute == 2, - min_top=self.min_top, max_top=self.max_top, - coords=self.coords) - - self.logger.info('Starting %d jobs.',f1.nTopLevelNodes) - self.corr.processCross(f1.data, f2.data, self.output_dots, self._metric) - - def getStat(self): - """The standard statistic for the current correlation object as a 1-d array. - - In this case, this is the concatenation of self.xip and self.xim (raveled if necessary). - """ - return np.concatenate([self.xip.ravel(), self.xim.ravel()]) - - def getWeight(self): - """The weight array for the current correlation object as a 1-d array. - - This is the weight array corresponding to `getStat`. In this case, the weight is - duplicated to account for both xip and xim returned as part of getStat(). - """ - return np.concatenate([self.weight.ravel(), self.weight.ravel()]) - - def _finalize(self): - mask1 = self.weight != 0 - mask2 = self.weight == 0 - - self.xip[mask1] /= self.weight[mask1] - self.xim[mask1] /= self.weight[mask1] - self.xip_im[mask1] /= self.weight[mask1] - self.xim_im[mask1] /= self.weight[mask1] - self.meanr[mask1] /= self.weight[mask1] - self.meanlogr[mask1] /= self.weight[mask1] - - # Update the units of meanr, meanlogr - self._apply_units(mask1) - - # Use meanr, meanlogr when available, but set to nominal when no pairs in bin. - self.meanr[mask2] = self.rnom[mask2] - self.meanlogr[mask2] = self.logr[mask2] + super().__init__(config, logger=logger, **kwargs) def finalize(self, varg1, varg2): """Finalize the calculation of the correlation function. - The `process_auto` and `process_cross` commands accumulate values in each bin, - so they can be called multiple times if appropriate. Afterwards, this command + The `BaseZZCorrelation.process_auto` and `Corr2.process_cross` commands accumulate values + in each bin, so they can be called multiple times if appropriate. Afterwards, this command finishes the calculation by dividing each column by the total weight. Parameters: varg1 (float): The variance per component of the first shear field. varg2 (float): The variance per component of the second shear field. """ - self._finalize() - self._var_num = 2. * varg1 * varg2 - - @property - def varxip(self): - if self._varxip is None: - self._varxip = np.zeros_like(self.rnom, dtype=float) - if self._var_num != 0: - self._varxip.ravel()[:] = self.cov_diag[:self._nbins] - return self._varxip - - @property - def varxim(self): - if self._varxim is None: - self._varxim = np.zeros_like(self.rnom, dtype=float) - if self._var_num != 0: - self._varxim.ravel()[:] = self.cov_diag[self._nbins:] - return self._varxim - - def _clear(self): - """Clear the data vectors - """ - self.xip.ravel()[:] = 0 - self.xim.ravel()[:] = 0 - self.xip_im.ravel()[:] = 0 - self.xim_im.ravel()[:] = 0 - self.meanr.ravel()[:] = 0 - self.meanlogr.ravel()[:] = 0 - self.weight.ravel()[:] = 0 - self.npairs.ravel()[:] = 0 - self._varxip = None - self._varxim = None - self._cov = None - - def __iadd__(self, other): - """Add a second `GGCorrelation`'s data to this one. - - .. note:: - - For this to make sense, both `GGCorrelation` objects should not have had `finalize` - called yet. Then, after adding them together, you should call `finalize` on the sum. - """ - if not isinstance(other, GGCorrelation): - raise TypeError("Can only add another GGCorrelation object") - if not (self._nbins == other._nbins and - self.min_sep == other.min_sep and - self.max_sep == other.max_sep): - raise ValueError("GGCorrelation to be added is not compatible with this one.") - - self._set_metric(other.metric, other.coords, other.coords) - self.xip.ravel()[:] += other.xip.ravel()[:] - self.xim.ravel()[:] += other.xim.ravel()[:] - self.xip_im.ravel()[:] += other.xip_im.ravel()[:] - self.xim_im.ravel()[:] += other.xim_im.ravel()[:] - self.meanr.ravel()[:] += other.meanr.ravel()[:] - self.meanlogr.ravel()[:] += other.meanlogr.ravel()[:] - self.weight.ravel()[:] += other.weight.ravel()[:] - self.npairs.ravel()[:] += other.npairs.ravel()[:] - return self - - def _sum(self, others): - # Equivalent to the operation of: - # self._clear() - # for other in others: - # self += other - # but no sanity checks and use numpy.sum for faster calculation. - np.sum([c.xip for c in others], axis=0, out=self.xip) - np.sum([c.xim for c in others], axis=0, out=self.xim) - np.sum([c.xip_im for c in others], axis=0, out=self.xip_im) - np.sum([c.xim_im for c in others], axis=0, out=self.xim_im) - np.sum([c.meanr for c in others], axis=0, out=self.meanr) - np.sum([c.meanlogr for c in others], axis=0, out=self.meanlogr) - np.sum([c.weight for c in others], axis=0, out=self.weight) - np.sum([c.npairs for c in others], axis=0, out=self.npairs) - - def process(self, cat1, cat2=None, *, metric=None, num_threads=None, comm=None, low_mem=False, - initialize=True, finalize=True, patch_method='global'): - """Compute the correlation function. - - - If only 1 argument is given, then compute an auto-correlation function. - - If 2 arguments are given, then compute a cross-correlation function. - - Both arguments may be lists, in which case all items in the list are used - for that element of the correlation. - - Parameters: - cat1 (Catalog): A catalog or list of catalogs for the first G field. - cat2 (Catalog): A catalog or list of catalogs for the second G field, if any. - (default: None) - metric (str): Which metric to use. See `Metrics` for details. - (default: 'Euclidean'; this value can also be given in the - constructor in the config dict.) - num_threads (int): How many OpenMP threads to use during the calculation. - (default: use the number of cpu cores; this value can also be given - in the constructor in the config dict.) - comm (mpi4py.Comm): If running MPI, an mpi4py Comm object to communicate between - processes. If used, the rank=0 process will have the final - computation. This only works if using patches. (default: None) - low_mem (bool): Whether to sacrifice a little speed to try to reduce memory usage. - This only works if using patches. (default: False) - initialize (bool): Whether to begin the calculation with a call to - `Corr2.clear`. (default: True) - finalize (bool): Whether to complete the calculation with a call to `finalize`. - (default: True) - patch_method (str): Which patch method to use. (default: 'global') - """ - import math - if initialize: - self.clear() - self._processed_cats1.clear() - self._processed_cats2.clear() - - if patch_method not in ['local', 'global']: - raise ValueError("Invalid patch_method %s"%patch_method) - local = patch_method == 'local' - - if not isinstance(cat1,list): - cat1 = cat1.get_patches(low_mem=low_mem) - if cat2 is not None and not isinstance(cat2,list): - cat2 = cat2.get_patches(low_mem=low_mem) - - if cat2 is None: - self._process_all_auto(cat1, metric, num_threads, comm, low_mem, local) - else: - self._process_all_cross(cat1, cat2, metric, num_threads, comm, low_mem, local) - - self._processed_cats1.extend(cat1) - if cat2 is not None: - self._processed_cats2.extend(cat2) - if finalize: - if cat2 is None: - varg1 = calculateVarG(self._processed_cats1, low_mem=low_mem) - varg2 = varg1 - self.logger.info("varg = %f: sig_sn (per component) = %f",varg1,math.sqrt(varg1)) - else: - varg1 = calculateVarG(self._processed_cats1, low_mem=low_mem) - varg2 = calculateVarG(self._processed_cats2, low_mem=low_mem) - self.logger.info("varg1 = %f: sig_sn (per component) = %f",varg1,math.sqrt(varg1)) - self.logger.info("varg2 = %f: sig_sn (per component) = %f",varg2,math.sqrt(varg2)) - self.finalize(varg1,varg2) - self._processed_cats1.clear() - self._processed_cats2.clear() - - - def write(self, file_name, *, file_type=None, precision=None, write_patch_results=False, - write_cov=False): - r"""Write the correlation function to the file, file_name. - - The output file will include the following columns: - - ========= ======================================================== - Column Description - ========= ======================================================== - r_nom The nominal center of the bin in r - meanr The mean value :math:`\langle r \rangle` of pairs that - fell into each bin - meanlogr The mean value :math:`\langle \log(r) \rangle` of pairs - that fell into each bin - xip The real part of the :math:`\xi_+` correlation function - xim The real part of the :math:`\xi_-` correlation function - xip_im The imag part of the :math:`\xi_+` correlation function - xim_im The imag part of the :math:`\xi_-` correlation function - sigma_xip The sqrt of the variance estimate of :math:`\xi_+` - sigma_xim The sqrt of the variance estimate of :math:`\xi_-` - weight The total weight contributing to each bin - npairs The total number of pairs in each bin - ========= ======================================================== - - If ``sep_units`` was given at construction, then the distances will all be in these units. - Otherwise, they will be in either the same units as x,y,z (for flat or 3d coordinates) or - radians (for spherical coordinates). - - Parameters: - file_name (str): The name of the file to write to. - file_type (str): The type of file to write ('ASCII' or 'FITS'). (default: determine - the type automatically from the extension of file_name.) - precision (int): For ASCII output catalogs, the desired precision. (default: 4; - this value can also be given in the constructor in the config dict.) - write_patch_results (bool): Whether to write the patch-based results as well. - (default: False) - write_cov (bool): Whether to write the covariance matrix as well. (default: False) - """ - self.logger.info('Writing GG correlations to %s',file_name) - precision = self.config.get('precision', 4) if precision is None else precision - with make_writer(file_name, precision, file_type, self.logger) as writer: - self._write(writer, None, write_patch_results, write_cov=write_cov) - - @property - def _write_col_names(self): - return ['r_nom', 'meanr', 'meanlogr', 'xip', 'xim', 'xip_im', 'xim_im', - 'sigma_xip', 'sigma_xim', 'weight', 'npairs'] - - @property - def _write_data(self): - data = [ self.rnom, self.meanr, self.meanlogr, - self.xip, self.xim, self.xip_im, self.xim_im, - np.sqrt(self.varxip), np.sqrt(self.varxim), - self.weight, self.npairs ] - data = [ col.flatten() for col in data ] - return data - - @property - def _write_params(self): - params = make_minimal_config(self.config, Corr2._valid_params) - # Add in a couple other things we want to preserve that aren't construction kwargs. - params['coords'] = self.coords - params['metric'] = self.metric - return params - - @classmethod - def from_file(cls, file_name, *, file_type=None, logger=None, rng=None): - """Create a GGCorrelation instance from an output file. - - This should be a file that was written by TreeCorr. - - Parameters: - file_name (str): The name of the file to read in. - file_type (str): The type of file ('ASCII', 'FITS', or 'HDF'). (default: determine - the type automatically from the extension of file_name.) - logger (Logger): If desired, a logger object to use for logging. (default: None) - rng (RandomState): If desired, a numpy.random.RandomState instance to use for bootstrap - random number generation. (default: None) - - Returns: - corr: A GGCorrelation object, constructed from the information in the file. - """ - if logger: - logger.info('Building GGCorrelation from %s',file_name) - with make_reader(file_name, file_type, logger) as reader: - name = 'main' if 'main' in reader else None - params = reader.read_params(ext=name) - kwargs = make_minimal_config(params, Corr2._valid_params) - corr = cls(**kwargs, logger=logger, rng=rng) - corr.logger.info('Reading GG correlations from %s',file_name) - corr._read(reader, name=name, params=params) - return corr - - def read(self, file_name, *, file_type=None): - """Read in values from a file. - - This should be a file that was written by TreeCorr, preferably a FITS or HDF5 file, so - there is no loss of information. - - .. warning:: - - The `GGCorrelation` object should be constructed with the same configuration - parameters as the one being read. e.g. the same min_sep, max_sep, etc. This is not - checked by the read function. - - Parameters: - file_name (str): The name of the file to read in. - file_type (str): The type of file ('ASCII', 'FITS', or 'HDF'). (default: determine - the type automatically from the extension of file_name.) - """ - self.logger.info('Reading GG correlations from %s',file_name) - with make_reader(file_name, file_type, self.logger) as reader: - self._read(reader) - - # Helper function used by _read - def _read_from_data(self, data, params): - s = self.logr.shape - self.meanr = data['meanr'].reshape(s) - self.meanlogr = data['meanlogr'].reshape(s) - self.xip = data['xip'].reshape(s) - self.xim = data['xim'].reshape(s) - self.xip_im = data['xip_im'].reshape(s) - self.xim_im = data['xim_im'].reshape(s) - self._varxip = data['sigma_xip'].reshape(s)**2 - self._varxim = data['sigma_xim'].reshape(s)**2 - self.weight = data['weight'].reshape(s) - self.npairs = data['npairs'].reshape(s) - self.coords = params['coords'].strip() - self.metric = params['metric'].strip() - self.npatch1 = params.get('npatch1', 1) - self.npatch2 = params.get('npatch2', 1) + super().finalize(varg1, varg2) def calculateMapSq(self, *, R=None, m2_uform=None): r"""Calculate the aperture mass statistics from the correlation function. @@ -684,7 +231,6 @@ def calculateMapSq(self, *, R=None, m2_uform=None): return mapsq, mapsq_im, mxsq, mxsq_im, varmapsq - def calculateGamSq(self, *, R=None, eb=False): r"""Calculate the tophat shear variance from the correlation function. @@ -765,7 +311,6 @@ def calculateGamSq(self, *, R=None, eb=False): return gamsq, vargamsq, gamsq_e, gamsq_b, vargamsq_e - def writeMapSq(self, file_name, *, R=None, m2_uform=None, file_type=None, precision=None): r"""Write the aperture mass statistics based on the correlation function to the file, file_name. diff --git a/treecorr/gggcorrelation.py b/treecorr/gggcorrelation.py index 9a28f88e..e66eea26 100644 --- a/treecorr/gggcorrelation.py +++ b/treecorr/gggcorrelation.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following diff --git a/treecorr/kgcorrelation.py b/treecorr/kgcorrelation.py index e8658e86..9aa36700 100644 --- a/treecorr/kgcorrelation.py +++ b/treecorr/kgcorrelation.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following @@ -19,12 +19,12 @@ from . import _treecorr from .catalog import calculateVarG, calculateVarK -from .corr2base import Corr2 +from .kzcorrelation import BaseKZCorrelation from .util import make_writer, make_reader from .config import make_minimal_config -class KGCorrelation(Corr2): +class KGCorrelation(BaseKZCorrelation): r"""This class handles the calculation and storage of a 2-point scalar-shear correlation function. @@ -74,7 +74,7 @@ class KGCorrelation(Corr2): .. note:: - If you separate out the steps of the `process` command and use `process_cross`, + If you separate out the steps of the `Corr2.process` command and use `Corr2.process_cross`, then the units will not be applied to ``meanr`` or ``meanlogr`` until the `finalize` function is called. @@ -96,144 +96,25 @@ class KGCorrelation(Corr2): **kwargs: See the documentation for `Corr2` for the list of allowed keyword arguments, which may be passed either directly or in the config dict. """ + _cls = 'KGCorrelation' + _letter1 = 'K' + _letter2 = 'G' + _letters = 'KG' + _builder = _treecorr.KGCorr + _calculateVar1 = staticmethod(calculateVarK) + _calculateVar2 = staticmethod(calculateVarG) + _xireal = 'kgamT' + _xiimag = 'kgamX' + def __init__(self, config=None, *, logger=None, **kwargs): """Initialize `KGCorrelation`. See class doc for details. """ - Corr2.__init__(self, config, logger=logger, **kwargs) - - self.xi = np.zeros_like(self.rnom, dtype=float) - self.xi_im = np.zeros_like(self.rnom, dtype=float) - self.meanr = np.zeros_like(self.rnom, dtype=float) - self.meanlogr = np.zeros_like(self.rnom, dtype=float) - self.weight = np.zeros_like(self.rnom, dtype=float) - self.npairs = np.zeros_like(self.rnom, dtype=float) - self._varxi = None - self._cov = None - self._var_num = 0 - self._processed_cats1 = [] - self._processed_cats2 = [] - self.logger.debug('Finished building KGCorr') - - @property - def corr(self): - if self._corr is None: - x = np.array([]) - self._corr = _treecorr.KGCorr(self._bintype, self._min_sep, self._max_sep, self._nbins, - self._bin_size, self.b, self.angle_slop, - self.min_rpar, self.max_rpar, - self.xperiod, self.yperiod, self.zperiod, - self.xi, self.xi_im, x, x, - self.meanr, self.meanlogr, self.weight, self.npairs) - return self._corr - - def __eq__(self, other): - """Return whether two `KGCorrelation` instances are equal""" - return (isinstance(other, KGCorrelation) and - self.nbins == other.nbins and - self.bin_size == other.bin_size and - self.min_sep == other.min_sep and - self.max_sep == other.max_sep and - self.sep_units == other.sep_units and - self.coords == other.coords and - self.bin_type == other.bin_type and - self.bin_slop == other.bin_slop and - self.angle_slop == other.angle_slop and - self.min_rpar == other.min_rpar and - self.max_rpar == other.max_rpar and - self.xperiod == other.xperiod and - self.yperiod == other.yperiod and - self.zperiod == other.zperiod and - np.array_equal(self.meanr, other.meanr) and - np.array_equal(self.meanlogr, other.meanlogr) and - np.array_equal(self.xi, other.xi) and - np.array_equal(self.xi_im, other.xi_im) and - np.array_equal(self.varxi, other.varxi) and - np.array_equal(self.weight, other.weight) and - np.array_equal(self.npairs, other.npairs)) - - def copy(self): - """Make a copy""" - ret = KGCorrelation.__new__(KGCorrelation) - for key, item in self.__dict__.items(): - if isinstance(item, np.ndarray): - # Only items that might change need to by deep copied. - ret.__dict__[key] = item.copy() - else: - # For everything else, shallow copy is fine. - # In particular don't deep copy config or logger - # Most of the rest are scalars, which copy fine this way. - # And the read-only things are all in _ro. - # The results dict is trickier. We rely on it being copied in places, but we - # never add more to it after the copy, so shallow copy is fine. - ret.__dict__[key] = item - ret._corr = None # We'll want to make a new one of these if we need it. - return ret - - def __repr__(self): - return f'KGCorrelation({self._repr_kwargs})' - - def process_cross(self, cat1, cat2, *, metric=None, num_threads=None): - """Process a single pair of catalogs, accumulating the cross-correlation. - - This accumulates the weighted sums into the bins, but does not finalize - the calculation by dividing by the total weight at the end. After - calling this function as often as desired, the `finalize` command will - finish the calculation. - - Parameters: - cat1 (Catalog): The first catalog to process - cat2 (Catalog): The second catalog to process - metric (str): Which metric to use. See `Metrics` for details. - (default: 'Euclidean'; this value can also be given in the - constructor in the config dict.) - num_threads (int): How many OpenMP threads to use during the calculation. - (default: use the number of cpu cores; this value can also be given - in the constructor in the config dict.) - """ - if cat1.name == '' and cat2.name == '': - self.logger.info('Starting process KG cross-correlations') - else: - self.logger.info('Starting process KG cross-correlations for cats %s, %s.', - cat1.name, cat2.name) - - self._set_metric(metric, cat1.coords, cat2.coords) - self._set_num_threads(num_threads) - min_size, max_size = self._get_minmax_size() - - f1 = cat1.getKField(min_size=min_size, max_size=max_size, - split_method=self.split_method, - brute=self.brute is True or self.brute == 1, - min_top=self.min_top, max_top=self.max_top, - coords=self.coords) - f2 = cat2.getGField(min_size=min_size, max_size=max_size, - split_method=self.split_method, - brute=self.brute is True or self.brute == 2, - min_top=self.min_top, max_top=self.max_top, - coords=self.coords) - - self.logger.info('Starting %d jobs.',f1.nTopLevelNodes) - self.corr.processCross(f1.data, f2.data, self.output_dots, self._metric) - - def _finalize(self): - mask1 = self.weight != 0 - mask2 = self.weight == 0 - - self.xi[mask1] /= self.weight[mask1] - self.xi_im[mask1] /= self.weight[mask1] - self.meanr[mask1] /= self.weight[mask1] - self.meanlogr[mask1] /= self.weight[mask1] - - # Update the units of meanr, meanlogr - self._apply_units(mask1) - - # Use meanr, meanlogr when available, but set to nominal when no pairs in bin. - self.meanr[mask2] = self.rnom[mask2] - self.meanlogr[mask2] = self.logr[mask2] + super().__init__(config, logger=logger, **kwargs) def finalize(self, vark, varg): """Finalize the calculation of the correlation function. - The `process_cross` command accumulates values in each bin, so it can be called + The `Corr2.process_cross` command accumulates values in each bin, so it can be called multiple times if appropriate. Afterwards, this command finishes the calculation by dividing each column by the total weight. @@ -241,120 +122,7 @@ def finalize(self, vark, varg): vark (float): The variance of the scaler field. varg (float): The variance per component of the shear field. """ - self._finalize() - self._var_num = vark * varg - - @property - def varxi(self): - if self._varxi is None: - self._varxi = np.zeros_like(self.rnom, dtype=float) - if self._var_num != 0: - self._varxi.ravel()[:] = self.cov_diag - return self._varxi - - def _clear(self): - """Clear the data vectors - """ - self.xi.ravel()[:] = 0 - self.xi_im.ravel()[:] = 0 - self.meanr.ravel()[:] = 0 - self.meanlogr.ravel()[:] = 0 - self.weight.ravel()[:] = 0 - self.npairs.ravel()[:] = 0 - self._varxi = None - self._cov = None - - def __iadd__(self, other): - """Add a second `KGCorrelation`'s data to this one. - - .. note:: - - For this to make sense, both `KGCorrelation` objects should not have had `finalize` - called yet. Then, after adding them together, you should call `finalize` on the sum. - """ - if not isinstance(other, KGCorrelation): - raise TypeError("Can only add another KGCorrelation object") - if not (self._nbins == other._nbins and - self.min_sep == other.min_sep and - self.max_sep == other.max_sep): - raise ValueError("KGCorrelation to be added is not compatible with this one.") - - self._set_metric(other.metric, other.coords, other.coords) - self.xi.ravel()[:] += other.xi.ravel()[:] - self.xi_im.ravel()[:] += other.xi_im.ravel()[:] - self.meanr.ravel()[:] += other.meanr.ravel()[:] - self.meanlogr.ravel()[:] += other.meanlogr.ravel()[:] - self.weight.ravel()[:] += other.weight.ravel()[:] - self.npairs.ravel()[:] += other.npairs.ravel()[:] - return self - - def _sum(self, others): - # Equivalent to the operation of: - # self._clear() - # for other in others: - # self += other - # but no sanity checks and use numpy.sum for faster calculation. - np.sum([c.xi for c in others], axis=0, out=self.xi) - np.sum([c.xi_im for c in others], axis=0, out=self.xi_im) - np.sum([c.meanr for c in others], axis=0, out=self.meanr) - np.sum([c.meanlogr for c in others], axis=0, out=self.meanlogr) - np.sum([c.weight for c in others], axis=0, out=self.weight) - np.sum([c.npairs for c in others], axis=0, out=self.npairs) - - def process(self, cat1, cat2, *, metric=None, num_threads=None, comm=None, low_mem=False, - initialize=True, finalize=True, patch_method='global'): - """Compute the correlation function. - - Both arguments may be lists, in which case all items in the list are used - for that element of the correlation. - - Parameters: - cat1 (Catalog): A catalog or list of catalogs for the K field. - cat2 (Catalog): A catalog or list of catalogs for the G field. - metric (str): Which metric to use. See `Metrics` for details. - (default: 'Euclidean'; this value can also be given in the - constructor in the config dict.) - num_threads (int): How many OpenMP threads to use during the calculation. - (default: use the number of cpu cores; this value can also be given - in the constructor in the config dict.) - comm (mpi4py.Comm): If running MPI, an mpi4py Comm object to communicate between - processes. If used, the rank=0 process will have the final - computation. This only works if using patches. (default: None) - low_mem (bool): Whether to sacrifice a little speed to try to reduce memory usage. - This only works if using patches. (default: False) - initialize (bool): Whether to begin the calculation with a call to - `Corr2.clear`. (default: True) - finalize (bool): Whether to complete the calculation with a call to `finalize`. - (default: True) - patch_method (str): Which patch method to use. (default: 'global') - """ - import math - if initialize: - self.clear() - self._processed_cats1.clear() - self._processed_cats2.clear() - - if patch_method not in ['local', 'global']: - raise ValueError("Invalid patch_method %s"%patch_method) - local = patch_method == 'local' - - if not isinstance(cat1,list): - cat1 = cat1.get_patches(low_mem=low_mem) - if not isinstance(cat2,list): - cat2 = cat2.get_patches(low_mem=low_mem) - - self._process_all_cross(cat1, cat2, metric, num_threads, comm, low_mem, local) - - self._processed_cats1.extend(cat1) - self._processed_cats2.extend(cat2) - if finalize: - vark = calculateVarK(self._processed_cats1, low_mem=low_mem) - varg = calculateVarG(self._processed_cats2, low_mem=low_mem) - self.logger.info("vark = %f: sig_k = %f",vark,math.sqrt(vark)) - self.logger.info("varg = %f: sig_sn (per component) = %f",varg,math.sqrt(varg)) - self.finalize(vark,varg) - self._processed_cats1.clear() - self._processed_cats2.clear() + super().finalize(vark, varg) def write(self, file_name, *, file_type=None, precision=None, write_patch_results=False, write_cov=False): @@ -392,90 +160,4 @@ def write(self, file_name, *, file_type=None, precision=None, write_patch_result (default: False) write_cov (bool): Whether to write the covariance matrix as well. (default: False) """ - self.logger.info('Writing KG correlations to %s',file_name) - precision = self.config.get('precision', 4) if precision is None else precision - with make_writer(file_name, precision, file_type, self.logger) as writer: - self._write(writer, None, write_patch_results, write_cov=write_cov) - - @property - def _write_col_names(self): - return ['r_nom','meanr','meanlogr','kgamT','kgamX','sigma','weight','npairs'] - - @property - def _write_data(self): - data = [ self.rnom, self.meanr, self.meanlogr, - self.xi, self.xi_im, np.sqrt(self.varxi), - self.weight, self.npairs ] - data = [ col.flatten() for col in data ] - return data - - @property - def _write_params(self): - params = make_minimal_config(self.config, Corr2._valid_params) - # Add in a couple other things we want to preserve that aren't construction kwargs. - params['coords'] = self.coords - params['metric'] = self.metric - return params - - @classmethod - def from_file(cls, file_name, *, file_type=None, logger=None, rng=None): - """Create a KGCorrelation instance from an output file. - - This should be a file that was written by TreeCorr. - - Parameters: - file_name (str): The name of the file to read in. - file_type (str): The type of file ('ASCII', 'FITS', or 'HDF'). (default: determine - the type automatically from the extension of file_name.) - logger (Logger): If desired, a logger object to use for logging. (default: None) - rng (RandomState): If desired, a numpy.random.RandomState instance to use for bootstrap - random number generation. (default: None) - - Returns: - corr: A KGCorrelation object, constructed from the information in the file. - """ - if logger: - logger.info('Building KGCorrelation from %s',file_name) - with make_reader(file_name, file_type, logger) as reader: - name = 'main' if 'main' in reader else None - params = reader.read_params(ext=name) - kwargs = make_minimal_config(params, Corr2._valid_params) - corr = cls(**kwargs, logger=logger, rng=rng) - corr.logger.info('Reading KG correlations from %s',file_name) - corr._read(reader, name=name, params=params) - return corr - - def read(self, file_name, *, file_type=None): - """Read in values from a file. - - This should be a file that was written by TreeCorr, preferably a FITS or HDF5 file, so - there is no loss of information. - - .. warning:: - - The `KGCorrelation` object should be constructed with the same configuration - parameters as the one being read. e.g. the same min_sep, max_sep, etc. This is not - checked by the read function. - - Parameters: - file_name (str): The name of the file to read in. - file_type (str): The type of file ('ASCII' or 'FITS'). (default: determine the type - automatically from the extension of file_name.) - """ - self.logger.info('Reading KG correlations from %s',file_name) - with make_reader(file_name, file_type, self.logger) as reader: - self._read(reader) - - def _read_from_data(self, data, params): - s = self.logr.shape - self.meanr = data['meanr'].reshape(s) - self.meanlogr = data['meanlogr'].reshape(s) - self.xi = data['kgamT'].reshape(s) - self.xi_im = data['kgamX'].reshape(s) - self._varxi = data['sigma'].reshape(s)**2 - self.weight = data['weight'].reshape(s) - self.npairs = data['npairs'].reshape(s) - self.coords = params['coords'].strip() - self.metric = params['metric'].strip() - self.npatch1 = params.get('npatch1', 1) - self.npatch2 = params.get('npatch2', 1) + super().write(file_name, file_type, precision, write_patch_results, write_cov) diff --git a/treecorr/kkcorrelation.py b/treecorr/kkcorrelation.py index bc7cad55..318360f7 100644 --- a/treecorr/kkcorrelation.py +++ b/treecorr/kkcorrelation.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following @@ -20,7 +20,7 @@ from . import _treecorr from .catalog import calculateVarK from .corr2base import Corr2 -from .util import make_writer, make_reader +from .util import make_writer from .config import make_minimal_config @@ -73,9 +73,9 @@ class KKCorrelation(Corr2): .. note:: - If you separate out the steps of the `process` command and use `process_auto` and/or - `process_cross`, then the units will not be applied to ``meanr`` or ``meanlogr`` until - the `finalize` function is called. + If you separate out the steps of the `Corr2.process` command and use `process_auto` + and/or `Corr2.process_cross`, then the units will not be applied to ``meanr`` or + ``meanlogr`` until the `finalize` function is called. The typical usage pattern is as follows: @@ -96,82 +96,31 @@ class KKCorrelation(Corr2): **kwargs: See the documentation for `Corr2` for the list of allowed keyword arguments, which may be passed either directly or in the config dict. """ + _cls = 'KKCorrelation' + _letter1 = 'K' + _letter2 = 'K' + _letters = 'KK' + _builder = _treecorr.KKCorr + _calculateVar1 = staticmethod(calculateVarK) + _calculateVar2 = staticmethod(calculateVarK) + _sig1 = 'sig_k' + _sig2 = 'sig_k' # The angles are not important for accuracy of KK correlations. _default_angle_slop = 1 def __init__(self, config=None, *, logger=None, **kwargs): """Initialize `KKCorrelation`. See class doc for details. """ - Corr2.__init__(self, config, logger=logger, **kwargs) + super().__init__(config, logger=logger, **kwargs) - self.xi = np.zeros_like(self.rnom, dtype=float) - self.meanr = np.zeros_like(self.rnom, dtype=float) - self.meanlogr = np.zeros_like(self.rnom, dtype=float) - self.weight = np.zeros_like(self.rnom, dtype=float) - self.npairs = np.zeros_like(self.rnom, dtype=float) + self._xi1 = np.zeros_like(self.rnom, dtype=float) + self._xi2 = self._xi3 = self._xi4 = np.array([]) self._varxi = None - self._cov = None - self._var_num = 0 - self._processed_cats1 = [] - self._processed_cats2 = [] self.logger.debug('Finished building KKCorr') @property - def corr(self): - if self._corr is None: - x = np.array([]) - self._corr = _treecorr.KKCorr(self._bintype, self._min_sep, self._max_sep, self._nbins, - self._bin_size, self.b, self.angle_slop, - self.min_rpar, self.max_rpar, - self.xperiod, self.yperiod, self.zperiod, - self.xi, x, x, x, - self.meanr, self.meanlogr, self.weight, self.npairs) - return self._corr - - def __eq__(self, other): - """Return whether two `KKCorrelation` instances are equal""" - return (isinstance(other, KKCorrelation) and - self.nbins == other.nbins and - self.bin_size == other.bin_size and - self.min_sep == other.min_sep and - self.max_sep == other.max_sep and - self.sep_units == other.sep_units and - self.coords == other.coords and - self.bin_type == other.bin_type and - self.bin_slop == other.bin_slop and - self.angle_slop == other.angle_slop and - self.min_rpar == other.min_rpar and - self.max_rpar == other.max_rpar and - self.xperiod == other.xperiod and - self.yperiod == other.yperiod and - self.zperiod == other.zperiod and - np.array_equal(self.meanr, other.meanr) and - np.array_equal(self.meanlogr, other.meanlogr) and - np.array_equal(self.xi, other.xi) and - np.array_equal(self.varxi, other.varxi) and - np.array_equal(self.weight, other.weight) and - np.array_equal(self.npairs, other.npairs)) - - def copy(self): - """Make a copy""" - ret = KKCorrelation.__new__(KKCorrelation) - for key, item in self.__dict__.items(): - if isinstance(item, np.ndarray): - # Only items that might change need to by deep copied. - ret.__dict__[key] = item.copy() - else: - # For everything else, shallow copy is fine. - # In particular don't deep copy config or logger - # Most of the rest are scalars, which copy fine this way. - # And the read-only things are all in _ro. - # The results dict is trickier. We rely on it being copied in places, but we - # never add more to it after the copy, so shallow copy is fine. - ret.__dict__[key] = item - ret._corr = None # We'll want to make a new one of these if we need it. - return ret - - def __repr__(self): - return f'KKCorrelation({self._repr_kwargs})' + def xi(self): + return self._xi1 def process_auto(self, cat, *, metric=None, num_threads=None): """Process a single catalog, accumulating the auto-correlation. @@ -190,84 +139,12 @@ def process_auto(self, cat, *, metric=None, num_threads=None): (default: use the number of cpu cores; this value can also be given in the constructor in the config dict.) """ - if cat.name == '': - self.logger.info('Starting process KK auto-correlations') - else: - self.logger.info('Starting process KK auto-correlations for cat %s.', cat.name) - - self._set_metric(metric, cat.coords) - self._set_num_threads(num_threads) - min_size, max_size = self._get_minmax_size() - - field = cat.getKField(min_size=min_size, max_size=max_size, - split_method=self.split_method, brute=bool(self.brute), - min_top=self.min_top, max_top=self.max_top, - coords=self.coords) - - self.logger.info('Starting %d jobs.',field.nTopLevelNodes) - self.corr.processAuto(field.data, self.output_dots, self._metric) - - def process_cross(self, cat1, cat2, *, metric=None, num_threads=None): - """Process a single pair of catalogs, accumulating the cross-correlation. - - This accumulates the weighted sums into the bins, but does not finalize - the calculation by dividing by the total weight at the end. After - calling this function as often as desired, the `finalize` command will - finish the calculation. - - Parameters: - cat1 (Catalog): The first catalog to process - cat2 (Catalog): The second catalog to process - metric (str): Which metric to use. See `Metrics` for details. - (default: 'Euclidean'; this value can also be given in the - constructor in the config dict.) - num_threads (int): How many OpenMP threads to use during the calculation. - (default: use the number of cpu cores; this value can also be given - in the constructor in the config dict.) - """ - if cat1.name == '' and cat2.name == '': - self.logger.info('Starting process KK cross-correlations') - else: - self.logger.info('Starting process KK cross-correlations for cats %s, %s.', - cat1.name, cat2.name) - - self._set_metric(metric, cat1.coords, cat2.coords) - self._set_num_threads(num_threads) - min_size, max_size = self._get_minmax_size() - - f1 = cat1.getKField(min_size=min_size, max_size=max_size, - split_method=self.split_method, - brute=self.brute is True or self.brute == 1, - min_top=self.min_top, max_top=self.max_top, - coords=self.coords) - f2 = cat2.getKField(min_size=min_size, max_size=max_size, - split_method=self.split_method, - brute=self.brute is True or self.brute == 1, - min_top=self.min_top, max_top=self.max_top, - coords=self.coords) - - self.logger.info('Starting %d jobs.',f1.nTopLevelNodes) - self.corr.processCross(f1.data, f2.data, self.output_dots, self._metric) - - def _finalize(self): - mask1 = self.weight != 0 - mask2 = self.weight == 0 - - self.xi[mask1] /= self.weight[mask1] - self.meanr[mask1] /= self.weight[mask1] - self.meanlogr[mask1] /= self.weight[mask1] - - # Update the units of meanlogr - self._apply_units(mask1) - - # Use meanlogr when available, but set to nominal when no pairs in bin. - self.meanr[mask2] = self.rnom[mask2] - self.meanlogr[mask2] = self.logr[mask2] + super()._process_auto(cat, metric, num_threads) def finalize(self, vark1, vark2): """Finalize the calculation of the correlation function. - The `process_auto` and `process_cross` commands accumulate values in each bin, + The `process_auto` and `Corr2.process_cross` commands accumulate values in each bin, so they can be called multiple times if appropriate. Afterwards, this command finishes the calculation by dividing each column by the total weight. @@ -289,36 +166,8 @@ def varxi(self): def _clear(self): """Clear the data vectors """ - self.xi.ravel()[:] = 0 - self.meanr.ravel()[:] = 0 - self.meanlogr.ravel()[:] = 0 - self.weight.ravel()[:] = 0 - self.npairs.ravel()[:] = 0 + super()._clear() self._varxi = None - self._cov = None - - def __iadd__(self, other): - """Add a second `KKCorrelation`'s data to this one. - - .. note:: - - For this to make sense, both `KKCorrelation` objects should not have had `finalize` - called yet. Then, after adding them together, you should call `finalize` on the sum. - """ - if not isinstance(other, KKCorrelation): - raise TypeError("Can only add another KKCorrelation object") - if not (self._nbins == other._nbins and - self.min_sep == other.min_sep and - self.max_sep == other.max_sep): - raise ValueError("KKCorrelation to be added is not compatible with this one.") - - self._set_metric(other.metric, other.coords, other.coords) - self.xi.ravel()[:] += other.xi.ravel()[:] - self.meanr.ravel()[:] += other.meanr.ravel()[:] - self.meanlogr.ravel()[:] += other.meanlogr.ravel()[:] - self.weight.ravel()[:] += other.weight.ravel()[:] - self.npairs.ravel()[:] += other.npairs.ravel()[:] - return self def _sum(self, others): # Equivalent to the operation of: @@ -326,79 +175,13 @@ def _sum(self, others): # for other in others: # self += other # but no sanity checks and use numpy.sum for faster calculation. - np.sum([c.xi for c in others], axis=0, out=self.xi) + np.sum([c._xi1 for c in others], axis=0, out=self._xi1) np.sum([c.meanr for c in others], axis=0, out=self.meanr) np.sum([c.meanlogr for c in others], axis=0, out=self.meanlogr) np.sum([c.weight for c in others], axis=0, out=self.weight) np.sum([c.npairs for c in others], axis=0, out=self.npairs) - - def process(self, cat1, cat2=None, *, metric=None, num_threads=None, comm=None, low_mem=False, - initialize=True, finalize=True, patch_method='global'): - """Compute the correlation function. - - - If only 1 argument is given, then compute an auto-correlation function. - - If 2 arguments are given, then compute a cross-correlation function. - - Both arguments may be lists, in which case all items in the list are used - for that element of the correlation. - - Parameters: - cat1 (Catalog): A catalog or list of catalogs for the first K field. - cat2 (Catalog): A catalog or list of catalogs for the second K field, if any. - (default: None) - metric (str): Which metric to use. See `Metrics` for details. - (default: 'Euclidean'; this value can also be given in the - constructor in the config dict.) - num_threads (int): How many OpenMP threads to use during the calculation. - (default: use the number of cpu cores; this value can also be given - in the constructor in the config dict.) - comm (mpi4py.Comm): If running MPI, an mpi4py Comm object to communicate between - processes. If used, the rank=0 process will have the final - computation. This only works if using patches. (default: None) - low_mem (bool): Whether to sacrifice a little speed to try to reduce memory usage. - This only works if using patches. (default: False) - initialize (bool): Whether to begin the calculation with a call to - `Corr2.clear`. (default: True) - finalize (bool): Whether to complete the calculation with a call to `finalize`. - (default: True) - patch_method (str): Which patch method to use. (default: 'global') - """ - import math - if initialize: - self.clear() - self._processed_cats1.clear() - self._processed_cats2.clear() - - if patch_method not in ['local', 'global']: - raise ValueError("Invalid patch_method %s"%patch_method) - local = patch_method == 'local' - - if not isinstance(cat1,list): - cat1 = cat1.get_patches(low_mem=low_mem) - if cat2 is not None and not isinstance(cat2,list): - cat2 = cat2.get_patches(low_mem=low_mem) - - if cat2 is None: - self._process_all_auto(cat1, metric, num_threads, comm, low_mem, local) - else: - self._process_all_cross(cat1, cat2, metric, num_threads, comm, low_mem, local) - - self._processed_cats1.extend(cat1) - if cat2 is not None: - self._processed_cats2.extend(cat2) - if finalize: - if cat2 is None: - vark1 = calculateVarK(self._processed_cats1, low_mem=low_mem) - vark2 = vark1 - self.logger.info("vark = %f: sig_k = %f",vark1,math.sqrt(vark1)) - else: - vark1 = calculateVarK(self._processed_cats1, low_mem=low_mem) - vark2 = calculateVarK(self._processed_cats2, low_mem=low_mem) - self.logger.info("vark1 = %f: sig_k = %f",vark1,math.sqrt(vark1)) - self.logger.info("vark2 = %f: sig_k = %f",vark2,math.sqrt(vark2)) - self.finalize(vark1,vark2) - self._processed_cats1.clear() - self._processed_cats2.clear() + self._varxi = None + self._cov = None def write(self, file_name, *, file_type=None, precision=None, write_patch_results=False, write_cov=False): @@ -450,72 +233,9 @@ def _write_data(self): data = [ col.flatten() for col in data ] return data - @property - def _write_params(self): - params = make_minimal_config(self.config, Corr2._valid_params) - # Add in a couple other things we want to preserve that aren't construction kwargs. - params['coords'] = self.coords - params['metric'] = self.metric - return params - - @classmethod - def from_file(cls, file_name, *, file_type=None, logger=None, rng=None): - """Create a KKCorrelation instance from an output file. - - This should be a file that was written by TreeCorr. - - Parameters: - file_name (str): The name of the file to read in. - file_type (str): The type of file ('ASCII', 'FITS', or 'HDF'). (default: determine - the type automatically from the extension of file_name.) - logger (Logger): If desired, a logger object to use for logging. (default: None) - rng (RandomState): If desired, a numpy.random.RandomState instance to use for bootstrap - random number generation. (default: None) - - Returns: - corr: A KKCorrelation object, constructed from the information in the file. - """ - if logger: - logger.info('Building KKCorrelation from %s',file_name) - with make_reader(file_name, file_type, logger) as reader: - name = 'main' if 'main' in reader else None - params = reader.read_params(ext=name) - kwargs = make_minimal_config(params, Corr2._valid_params) - corr = cls(**kwargs, logger=logger, rng=rng) - corr.logger.info('Reading KK correlations from %s',file_name) - corr._read(reader, name=name, params=params) - return corr - - def read(self, file_name, *, file_type=None): - """Read in values from a file. - - This should be a file that was written by TreeCorr, preferably a FITS or HDF5 file, so - there is no loss of information. - - .. warning:: - - The `KKCorrelation` object should be constructed with the same configuration - parameters as the one being read. e.g. the same min_sep, max_sep, etc. This is not - checked by the read function. - - Parameters: - file_name (str): The name of the file to read in. - file_type (str): The type of file ('ASCII' or 'FITS'). (default: determine the type - automatically from the extension of file_name.) - """ - self.logger.info('Reading KK correlations from %s',file_name) - with make_reader(file_name, file_type, self.logger) as reader: - self._read(reader) - def _read_from_data(self, data, params): + super()._read_from_data(data, params) s = self.logr.shape - self.meanr = data['meanr'].reshape(s) - self.meanlogr = data['meanlogr'].reshape(s) - self.xi = data['xi'].reshape(s) - self._varxi = data['sigma_xi'].reshape(s)**2 self.weight = data['weight'].reshape(s) - self.npairs = data['npairs'].reshape(s) - self.coords = params['coords'].strip() - self.metric = params['metric'].strip() - self.npatch1 = params.get('npatch1', 1) - self.npatch2 = params.get('npatch2', 1) + self._xi1 = data['xi'].reshape(s) + self._varxi = data['sigma_xi'].reshape(s)**2 diff --git a/treecorr/kkkcorrelation.py b/treecorr/kkkcorrelation.py index d09be0e4..e5005c86 100644 --- a/treecorr/kkkcorrelation.py +++ b/treecorr/kkkcorrelation.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following diff --git a/treecorr/kqcorrelation.py b/treecorr/kqcorrelation.py index f7999d9f..5dfcc13f 100644 --- a/treecorr/kqcorrelation.py +++ b/treecorr/kqcorrelation.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following @@ -19,12 +19,12 @@ from . import _treecorr from .catalog import calculateVarQ, calculateVarK -from .corr2base import Corr2 +from .kzcorrelation import BaseKZCorrelation from .util import make_writer, make_reader from .config import make_minimal_config -class KQCorrelation(Corr2): +class KQCorrelation(BaseKZCorrelation): r"""This class handles the calculation and storage of a 2-point scalar-spin-4 correlation function. @@ -46,7 +46,7 @@ class KQCorrelation(Corr2): If there are no pairs in a bin, then exp(logr) will be used instead. meanlogr: The (weighted) mean value of log(r) for the pairs in each bin. If there are no pairs in a bin, then logr will be used instead. - xi: The correlation function, :math:`\xi(r) = \langle \kappa\, \q_R\rangle`. + xi: The correlation function, :math:`\xi(r) = \langle \kappa\, q_R\rangle`. xi_im: The imaginary part of :math:`\xi(r)`. varxi: An estimate of the variance of :math:`\xi` weight: The total weight in each bin. @@ -67,7 +67,7 @@ class KQCorrelation(Corr2): .. note:: - If you separate out the steps of the `process` command and use `process_cross`, + If you separate out the steps of the `Corr2.process` command and use `Corr2.process_cross`, then the units will not be applied to ``meanr`` or ``meanlogr`` until the `finalize` function is called. @@ -89,144 +89,25 @@ class KQCorrelation(Corr2): **kwargs: See the documentation for `Corr2` for the list of allowed keyword arguments, which may be passed either directly or in the config dict. """ + _cls = 'KQCorrelation' + _letter1 = 'K' + _letter2 = 'Q' + _letters = 'KQ' + _builder = _treecorr.KQCorr + _calculateVar1 = staticmethod(calculateVarK) + _calculateVar2 = staticmethod(calculateVarQ) + _xireal = 'xi' + _xiimag = 'xi_im' + def __init__(self, config=None, *, logger=None, **kwargs): """Initialize `KQCorrelation`. See class doc for details. """ - Corr2.__init__(self, config, logger=logger, **kwargs) - - self.xi = np.zeros_like(self.rnom, dtype=float) - self.xi_im = np.zeros_like(self.rnom, dtype=float) - self.meanr = np.zeros_like(self.rnom, dtype=float) - self.meanlogr = np.zeros_like(self.rnom, dtype=float) - self.weight = np.zeros_like(self.rnom, dtype=float) - self.npairs = np.zeros_like(self.rnom, dtype=float) - self._varxi = None - self._cov = None - self._var_num = 0 - self._processed_cats1 = [] - self._processed_cats2 = [] - self.logger.debug('Finished building KQCorr') - - @property - def corr(self): - if self._corr is None: - x = np.array([]) - self._corr = _treecorr.KQCorr(self._bintype, self._min_sep, self._max_sep, self._nbins, - self._bin_size, self.b, self.angle_slop, - self.min_rpar, self.max_rpar, - self.xperiod, self.yperiod, self.zperiod, - self.xi, self.xi_im, x, x, - self.meanr, self.meanlogr, self.weight, self.npairs) - return self._corr - - def __eq__(self, other): - """Return whether two `KQCorrelation` instances are equal""" - return (isinstance(other, KQCorrelation) and - self.nbins == other.nbins and - self.bin_size == other.bin_size and - self.min_sep == other.min_sep and - self.max_sep == other.max_sep and - self.sep_units == other.sep_units and - self.coords == other.coords and - self.bin_type == other.bin_type and - self.bin_slop == other.bin_slop and - self.angle_slop == other.angle_slop and - self.min_rpar == other.min_rpar and - self.max_rpar == other.max_rpar and - self.xperiod == other.xperiod and - self.yperiod == other.yperiod and - self.zperiod == other.zperiod and - np.array_equal(self.meanr, other.meanr) and - np.array_equal(self.meanlogr, other.meanlogr) and - np.array_equal(self.xi, other.xi) and - np.array_equal(self.xi_im, other.xi_im) and - np.array_equal(self.varxi, other.varxi) and - np.array_equal(self.weight, other.weight) and - np.array_equal(self.npairs, other.npairs)) - - def copy(self): - """Make a copy""" - ret = KQCorrelation.__new__(KQCorrelation) - for key, item in self.__dict__.items(): - if isinstance(item, np.ndarray): - # Only items that might change need to by deep copied. - ret.__dict__[key] = item.copy() - else: - # For everything else, shallow copy is fine. - # In particular don't deep copy config or logger - # Most of the rest are scalars, which copy fine this way. - # And the read-only things are all in _ro. - # The results dict is trickier. We rely on it being copied in places, but we - # never add more to it after the copy, so shallow copy is fine. - ret.__dict__[key] = item - ret._corr = None # We'll want to make a new one of these if we need it. - return ret - - def __repr__(self): - return f'KQCorrelation({self._repr_kwargs})' - - def process_cross(self, cat1, cat2, *, metric=None, num_threads=None): - """Process a single pair of catalogs, accumulating the cross-correlation. - - This accumulates the weighted sums into the bins, but does not finalize - the calculation by dividing by the total weight at the end. After - calling this function as often as desired, the `finalize` command will - finish the calculation. - - Parameters: - cat1 (Catalog): The first catalog to process - cat2 (Catalog): The second catalog to process - metric (str): Which metric to use. See `Metrics` for details. - (default: 'Euclidean'; this value can also be given in the - constructor in the config dict.) - num_threads (int): How many OpenMP threads to use during the calculation. - (default: use the number of cpu cores; this value can also be given - in the constructor in the config dict.) - """ - if cat1.name == '' and cat2.name == '': - self.logger.info('Starting process KQ cross-correlations') - else: - self.logger.info('Starting process KQ cross-correlations for cats %s, %s.', - cat1.name, cat2.name) - - self._set_metric(metric, cat1.coords, cat2.coords) - self._set_num_threads(num_threads) - min_size, max_size = self._get_minmax_size() - - f1 = cat1.getKField(min_size=min_size, max_size=max_size, - split_method=self.split_method, - brute=self.brute is True or self.brute == 1, - min_top=self.min_top, max_top=self.max_top, - coords=self.coords) - f2 = cat2.getQField(min_size=min_size, max_size=max_size, - split_method=self.split_method, - brute=self.brute is True or self.brute == 2, - min_top=self.min_top, max_top=self.max_top, - coords=self.coords) - - self.logger.info('Starting %d jobs.',f1.nTopLevelNodes) - self.corr.processCross(f1.data, f2.data, self.output_dots, self._metric) - - def _finalize(self): - mask1 = self.weight != 0 - mask2 = self.weight == 0 - - self.xi[mask1] /= self.weight[mask1] - self.xi_im[mask1] /= self.weight[mask1] - self.meanr[mask1] /= self.weight[mask1] - self.meanlogr[mask1] /= self.weight[mask1] - - # Update the units of meanr, meanlogr - self._apply_units(mask1) - - # Use meanr, meanlogr when available, but set to nominal when no pairs in bin. - self.meanr[mask2] = self.rnom[mask2] - self.meanlogr[mask2] = self.logr[mask2] + super().__init__(config, logger=logger, **kwargs) def finalize(self, vark, varq): """Finalize the calculation of the correlation function. - The `process_cross` command accumulates values in each bin, so it can be called + The `Corr2.process_cross` command accumulates values in each bin, so it can be called multiple times if appropriate. Afterwards, this command finishes the calculation by dividing each column by the total weight. @@ -234,120 +115,7 @@ def finalize(self, vark, varq): vark (float): The variance of the scaler field. varq (float): The variance per component of the spin-4 field. """ - self._finalize() - self._var_num = vark * varq - - @property - def varxi(self): - if self._varxi is None: - self._varxi = np.zeros_like(self.rnom, dtype=float) - if self._var_num != 0: - self._varxi.ravel()[:] = self.cov_diag - return self._varxi - - def _clear(self): - """Clear the data vectors - """ - self.xi.ravel()[:] = 0 - self.xi_im.ravel()[:] = 0 - self.meanr.ravel()[:] = 0 - self.meanlogr.ravel()[:] = 0 - self.weight.ravel()[:] = 0 - self.npairs.ravel()[:] = 0 - self._varxi = None - self._cov = None - - def __iadd__(self, other): - """Add a second `KQCorrelation`'s data to this one. - - .. note:: - - For this to make sense, both `KQCorrelation` objects should not have had `finalize` - called yet. Then, after adding them together, you should call `finalize` on the sum. - """ - if not isinstance(other, KQCorrelation): - raise TypeError("Can only add another KQCorrelation object") - if not (self._nbins == other._nbins and - self.min_sep == other.min_sep and - self.max_sep == other.max_sep): - raise ValueError("KQCorrelation to be added is not compatible with this one.") - - self._set_metric(other.metric, other.coords, other.coords) - self.xi.ravel()[:] += other.xi.ravel()[:] - self.xi_im.ravel()[:] += other.xi_im.ravel()[:] - self.meanr.ravel()[:] += other.meanr.ravel()[:] - self.meanlogr.ravel()[:] += other.meanlogr.ravel()[:] - self.weight.ravel()[:] += other.weight.ravel()[:] - self.npairs.ravel()[:] += other.npairs.ravel()[:] - return self - - def _sum(self, others): - # Equivalent to the operation of: - # self._clear() - # for other in others: - # self += other - # but no sanity checks and use numpy.sum for faster calculation. - np.sum([c.xi for c in others], axis=0, out=self.xi) - np.sum([c.xi_im for c in others], axis=0, out=self.xi_im) - np.sum([c.meanr for c in others], axis=0, out=self.meanr) - np.sum([c.meanlogr for c in others], axis=0, out=self.meanlogr) - np.sum([c.weight for c in others], axis=0, out=self.weight) - np.sum([c.npairs for c in others], axis=0, out=self.npairs) - - def process(self, cat1, cat2, *, metric=None, num_threads=None, comm=None, low_mem=False, - initialize=True, finalize=True, patch_method='global'): - """Compute the correlation function. - - Both arguments may be lists, in which case all items in the list are used - for that element of the correlation. - - Parameters: - cat1 (Catalog): A catalog or list of catalogs for the K field. - cat2 (Catalog): A catalog or list of catalogs for the Q field. - metric (str): Which metric to use. See `Metrics` for details. - (default: 'Euclidean'; this value can also be given in the - constructor in the config dict.) - num_threads (int): How many OpenMP threads to use during the calculation. - (default: use the number of cpu cores; this value can also be given - in the constructor in the config dict.) - comm (mpi4py.Comm): If running MPI, an mpi4py Comm object to communicate between - processes. If used, the rank=0 process will have the final - computation. This only works if using patches. (default: None) - low_mem (bool): Whether to sacrifice a little speed to try to reduce memory usage. - This only works if using patches. (default: False) - initialize (bool): Whether to begin the calculation with a call to - `Corr2.clear`. (default: True) - finalize (bool): Whether to complete the calculation with a call to `finalize`. - (default: True) - patch_method (str): Which patch method to use. (default: 'global') - """ - import math - if initialize: - self.clear() - self._processed_cats1.clear() - self._processed_cats2.clear() - - if patch_method not in ['local', 'global']: - raise ValueError("Invalid patch_method %s"%patch_method) - local = patch_method == 'local' - - if not isinstance(cat1,list): - cat1 = cat1.get_patches(low_mem=low_mem) - if not isinstance(cat2,list): - cat2 = cat2.get_patches(low_mem=low_mem) - - self._process_all_cross(cat1, cat2, metric, num_threads, comm, low_mem, local) - - self._processed_cats1.extend(cat1) - self._processed_cats2.extend(cat2) - if finalize: - vark = calculateVarK(self._processed_cats1, low_mem=low_mem) - varq = calculateVarQ(self._processed_cats2, low_mem=low_mem) - self.logger.info("vark = %f: sig_k = %f",vark,math.sqrt(vark)) - self.logger.info("varq = %f: sig_sn (per component) = %f",varq,math.sqrt(varq)) - self.finalize(vark,varq) - self._processed_cats1.clear() - self._processed_cats2.clear() + super().finalize(vark, varq) def write(self, file_name, *, file_type=None, precision=None, write_patch_results=False, write_cov=False): @@ -364,7 +132,7 @@ def write(self, file_name, *, file_type=None, precision=None, write_patch_result meanlogr The mean value :math:`\langle \log(r)\rangle` of pairs that fell into each bin xi The real part of the correlation function, - :math:`xi(r) = \langle \kappa\, \q_R\rangle`. + :math:`xi(r) = \langle \kappa\, q_R\rangle`. xi_im The imaginary part of the correlation function. sigma The sqrt of the variance estimate of both of these weight The total weight contributing to each bin @@ -385,90 +153,4 @@ def write(self, file_name, *, file_type=None, precision=None, write_patch_result (default: False) write_cov (bool): Whether to write the covariance matrix as well. (default: False) """ - self.logger.info('Writing KQ correlations to %s',file_name) - precision = self.config.get('precision', 4) if precision is None else precision - with make_writer(file_name, precision, file_type, self.logger) as writer: - self._write(writer, None, write_patch_results, write_cov=write_cov) - - @property - def _write_col_names(self): - return ['r_nom','meanr','meanlogr','xi','xi_im','sigma','weight','npairs'] - - @property - def _write_data(self): - data = [ self.rnom, self.meanr, self.meanlogr, - self.xi, self.xi_im, np.sqrt(self.varxi), - self.weight, self.npairs ] - data = [ col.flatten() for col in data ] - return data - - @property - def _write_params(self): - params = make_minimal_config(self.config, Corr2._valid_params) - # Add in a couple other things we want to preserve that aren't construction kwargs. - params['coords'] = self.coords - params['metric'] = self.metric - return params - - @classmethod - def from_file(cls, file_name, *, file_type=None, logger=None, rng=None): - """Create a KQCorrelation instance from an output file. - - This should be a file that was written by TreeCorr. - - Parameters: - file_name (str): The name of the file to read in. - file_type (str): The type of file ('ASCII', 'FITS', or 'HDF'). (default: determine - the type automatically from the extension of file_name.) - logger (Logger): If desired, a logger object to use for logging. (default: None) - rng (RandomState): If desired, a numpy.random.RandomState instance to use for bootstrap - random number generation. (default: None) - - Returns: - corr: A KQCorrelation object, constructed from the information in the file. - """ - if logger: - logger.info('Building KQCorrelation from %s',file_name) - with make_reader(file_name, file_type, logger) as reader: - name = 'main' if 'main' in reader else None - params = reader.read_params(ext=name) - kwargs = make_minimal_config(params, Corr2._valid_params) - corr = cls(**kwargs, logger=logger, rng=rng) - corr.logger.info('Reading KQ correlations from %s',file_name) - corr._read(reader, name=name, params=params) - return corr - - def read(self, file_name, *, file_type=None): - """Read in values from a file. - - This should be a file that was written by TreeCorr, preferably a FITS or HDF5 file, so - there is no loss of information. - - .. warning:: - - The `KQCorrelation` object should be constructed with the same configuration - parameters as the one being read. e.g. the same min_sep, max_sep, etc. This is not - checked by the read function. - - Parameters: - file_name (str): The name of the file to read in. - file_type (str): The type of file ('ASCII' or 'FITS'). (default: determine the type - automatically from the extension of file_name.) - """ - self.logger.info('Reading KQ correlations from %s',file_name) - with make_reader(file_name, file_type, self.logger) as reader: - self._read(reader) - - def _read_from_data(self, data, params): - s = self.logr.shape - self.meanr = data['meanr'].reshape(s) - self.meanlogr = data['meanlogr'].reshape(s) - self.xi = data['xi'].reshape(s) - self.xi_im = data['xi_im'].reshape(s) - self._varxi = data['sigma'].reshape(s)**2 - self.weight = data['weight'].reshape(s) - self.npairs = data['npairs'].reshape(s) - self.coords = params['coords'].strip() - self.metric = params['metric'].strip() - self.npatch1 = params.get('npatch1', 1) - self.npatch2 = params.get('npatch2', 1) + super().write(file_name, file_type, precision, write_patch_results, write_cov) diff --git a/treecorr/ktcorrelation.py b/treecorr/ktcorrelation.py index fd7a2d8e..780a6642 100644 --- a/treecorr/ktcorrelation.py +++ b/treecorr/ktcorrelation.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following @@ -19,12 +19,12 @@ from . import _treecorr from .catalog import calculateVarT, calculateVarK -from .corr2base import Corr2 +from .kzcorrelation import BaseKZCorrelation from .util import make_writer, make_reader from .config import make_minimal_config -class KTCorrelation(Corr2): +class KTCorrelation(BaseKZCorrelation): r"""This class handles the calculation and storage of a 2-point scalar-spin-3 correlation function. @@ -46,7 +46,7 @@ class KTCorrelation(Corr2): If there are no pairs in a bin, then exp(logr) will be used instead. meanlogr: The (weighted) mean value of log(r) for the pairs in each bin. If there are no pairs in a bin, then logr will be used instead. - xi: The correlation function, :math:`\xi(r) = \langle \kappa\, \t_R\rangle`. + xi: The correlation function, :math:`\xi(r) = \langle \kappa\, t_R\rangle`. xi_im: The imaginary part of :math:`\xi(r)`. varxi: An estimate of the variance of :math:`\xi` weight: The total weight in each bin. @@ -67,7 +67,7 @@ class KTCorrelation(Corr2): .. note:: - If you separate out the steps of the `process` command and use `process_cross`, + If you separate out the steps of the `Corr2.process` command and use `Corr2.process_cross`, then the units will not be applied to ``meanr`` or ``meanlogr`` until the `finalize` function is called. @@ -89,144 +89,25 @@ class KTCorrelation(Corr2): **kwargs: See the documentation for `Corr2` for the list of allowed keyword arguments, which may be passed either directly or in the config dict. """ + _cls = 'KTCorrelation' + _letter1 = 'K' + _letter2 = 'T' + _letters = 'KT' + _builder = _treecorr.KTCorr + _calculateVar1 = staticmethod(calculateVarK) + _calculateVar2 = staticmethod(calculateVarT) + _xireal = 'xi' + _xiimag = 'xi_im' + def __init__(self, config=None, *, logger=None, **kwargs): """Initialize `KTCorrelation`. See class doc for details. """ - Corr2.__init__(self, config, logger=logger, **kwargs) - - self.xi = np.zeros_like(self.rnom, dtype=float) - self.xi_im = np.zeros_like(self.rnom, dtype=float) - self.meanr = np.zeros_like(self.rnom, dtype=float) - self.meanlogr = np.zeros_like(self.rnom, dtype=float) - self.weight = np.zeros_like(self.rnom, dtype=float) - self.npairs = np.zeros_like(self.rnom, dtype=float) - self._varxi = None - self._cov = None - self._var_num = 0 - self._processed_cats1 = [] - self._processed_cats2 = [] - self.logger.debug('Finished building KTCorr') - - @property - def corr(self): - if self._corr is None: - x = np.array([]) - self._corr = _treecorr.KTCorr(self._bintype, self._min_sep, self._max_sep, self._nbins, - self._bin_size, self.b, self.angle_slop, - self.min_rpar, self.max_rpar, - self.xperiod, self.yperiod, self.zperiod, - self.xi, self.xi_im, x, x, - self.meanr, self.meanlogr, self.weight, self.npairs) - return self._corr - - def __eq__(self, other): - """Return whether two `KTCorrelation` instances are equal""" - return (isinstance(other, KTCorrelation) and - self.nbins == other.nbins and - self.bin_size == other.bin_size and - self.min_sep == other.min_sep and - self.max_sep == other.max_sep and - self.sep_units == other.sep_units and - self.coords == other.coords and - self.bin_type == other.bin_type and - self.bin_slop == other.bin_slop and - self.angle_slop == other.angle_slop and - self.min_rpar == other.min_rpar and - self.max_rpar == other.max_rpar and - self.xperiod == other.xperiod and - self.yperiod == other.yperiod and - self.zperiod == other.zperiod and - np.array_equal(self.meanr, other.meanr) and - np.array_equal(self.meanlogr, other.meanlogr) and - np.array_equal(self.xi, other.xi) and - np.array_equal(self.xi_im, other.xi_im) and - np.array_equal(self.varxi, other.varxi) and - np.array_equal(self.weight, other.weight) and - np.array_equal(self.npairs, other.npairs)) - - def copy(self): - """Make a copy""" - ret = KTCorrelation.__new__(KTCorrelation) - for key, item in self.__dict__.items(): - if isinstance(item, np.ndarray): - # Only items that might change need to by deep copied. - ret.__dict__[key] = item.copy() - else: - # For everything else, shallow copy is fine. - # In particular don't deep copy config or logger - # Most of the rest are scalars, which copy fine this way. - # And the read-only things are all in _ro. - # The results dict is trickier. We rely on it being copied in places, but we - # never add more to it after the copy, so shallow copy is fine. - ret.__dict__[key] = item - ret._corr = None # We'll want to make a new one of these if we need it. - return ret - - def __repr__(self): - return f'KTCorrelation({self._repr_kwargs})' - - def process_cross(self, cat1, cat2, *, metric=None, num_threads=None): - """Process a single pair of catalogs, accumulating the cross-correlation. - - This accumulates the weighted sums into the bins, but does not finalize - the calculation by dividing by the total weight at the end. After - calling this function as often as desired, the `finalize` command will - finish the calculation. - - Parameters: - cat1 (Catalog): The first catalog to process - cat2 (Catalog): The second catalog to process - metric (str): Which metric to use. See `Metrics` for details. - (default: 'Euclidean'; this value can also be given in the - constructor in the config dict.) - num_threads (int): How many OpenMP threads to use during the calculation. - (default: use the number of cpu cores; this value can also be given - in the constructor in the config dict.) - """ - if cat1.name == '' and cat2.name == '': - self.logger.info('Starting process KT cross-correlations') - else: - self.logger.info('Starting process KT cross-correlations for cats %s, %s.', - cat1.name, cat2.name) - - self._set_metric(metric, cat1.coords, cat2.coords) - self._set_num_threads(num_threads) - min_size, max_size = self._get_minmax_size() - - f1 = cat1.getKField(min_size=min_size, max_size=max_size, - split_method=self.split_method, - brute=self.brute is True or self.brute == 1, - min_top=self.min_top, max_top=self.max_top, - coords=self.coords) - f2 = cat2.getTField(min_size=min_size, max_size=max_size, - split_method=self.split_method, - brute=self.brute is True or self.brute == 2, - min_top=self.min_top, max_top=self.max_top, - coords=self.coords) - - self.logger.info('Starting %d jobs.',f1.nTopLevelNodes) - self.corr.processCross(f1.data, f2.data, self.output_dots, self._metric) - - def _finalize(self): - mask1 = self.weight != 0 - mask2 = self.weight == 0 - - self.xi[mask1] /= self.weight[mask1] - self.xi_im[mask1] /= self.weight[mask1] - self.meanr[mask1] /= self.weight[mask1] - self.meanlogr[mask1] /= self.weight[mask1] - - # Update the units of meanr, meanlogr - self._apply_units(mask1) - - # Use meanr, meanlogr when available, but set to nominal when no pairs in bin. - self.meanr[mask2] = self.rnom[mask2] - self.meanlogr[mask2] = self.logr[mask2] + super().__init__(config, logger=logger, **kwargs) def finalize(self, vark, vart): """Finalize the calculation of the correlation function. - The `process_cross` command accumulates values in each bin, so it can be called + The `Corr2.process_cross` command accumulates values in each bin, so it can be called multiple times if appropriate. Afterwards, this command finishes the calculation by dividing each column by the total weight. @@ -234,120 +115,7 @@ def finalize(self, vark, vart): vark (float): The variance of the scaler field. vart (float): The variance per component of the spin-3 field. """ - self._finalize() - self._var_num = vark * vart - - @property - def varxi(self): - if self._varxi is None: - self._varxi = np.zeros_like(self.rnom, dtype=float) - if self._var_num != 0: - self._varxi.ravel()[:] = self.cov_diag - return self._varxi - - def _clear(self): - """Clear the data vectors - """ - self.xi.ravel()[:] = 0 - self.xi_im.ravel()[:] = 0 - self.meanr.ravel()[:] = 0 - self.meanlogr.ravel()[:] = 0 - self.weight.ravel()[:] = 0 - self.npairs.ravel()[:] = 0 - self._varxi = None - self._cov = None - - def __iadd__(self, other): - """Add a second `KTCorrelation`'s data to this one. - - .. note:: - - For this to make sense, both `KTCorrelation` objects should not have had `finalize` - called yet. Then, after adding them together, you should call `finalize` on the sum. - """ - if not isinstance(other, KTCorrelation): - raise TypeError("Can only add another KTCorrelation object") - if not (self._nbins == other._nbins and - self.min_sep == other.min_sep and - self.max_sep == other.max_sep): - raise ValueError("KTCorrelation to be added is not compatible with this one.") - - self._set_metric(other.metric, other.coords, other.coords) - self.xi.ravel()[:] += other.xi.ravel()[:] - self.xi_im.ravel()[:] += other.xi_im.ravel()[:] - self.meanr.ravel()[:] += other.meanr.ravel()[:] - self.meanlogr.ravel()[:] += other.meanlogr.ravel()[:] - self.weight.ravel()[:] += other.weight.ravel()[:] - self.npairs.ravel()[:] += other.npairs.ravel()[:] - return self - - def _sum(self, others): - # Equivalent to the operation of: - # self._clear() - # for other in others: - # self += other - # but no sanity checks and use numpy.sum for faster calculation. - np.sum([c.xi for c in others], axis=0, out=self.xi) - np.sum([c.xi_im for c in others], axis=0, out=self.xi_im) - np.sum([c.meanr for c in others], axis=0, out=self.meanr) - np.sum([c.meanlogr for c in others], axis=0, out=self.meanlogr) - np.sum([c.weight for c in others], axis=0, out=self.weight) - np.sum([c.npairs for c in others], axis=0, out=self.npairs) - - def process(self, cat1, cat2, *, metric=None, num_threads=None, comm=None, low_mem=False, - initialize=True, finalize=True, patch_method='global'): - """Compute the correlation function. - - Both arguments may be lists, in which case all items in the list are used - for that element of the correlation. - - Parameters: - cat1 (Catalog): A catalog or list of catalogs for the K field. - cat2 (Catalog): A catalog or list of catalogs for the T field. - metric (str): Which metric to use. See `Metrics` for details. - (default: 'Euclidean'; this value can also be given in the - constructor in the config dict.) - num_threads (int): How many OpenMP threads to use during the calculation. - (default: use the number of cpu cores; this value can also be given - in the constructor in the config dict.) - comm (mpi4py.Comm): If running MPI, an mpi4py Comm object to communicate between - processes. If used, the rank=0 process will have the final - computation. This only works if using patches. (default: None) - low_mem (bool): Whether to sacrifice a little speed to try to reduce memory usage. - This only works if using patches. (default: False) - initialize (bool): Whether to begin the calculation with a call to - `Corr2.clear`. (default: True) - finalize (bool): Whether to complete the calculation with a call to `finalize`. - (default: True) - patch_method (str): Which patch method to use. (default: 'global') - """ - import math - if initialize: - self.clear() - self._processed_cats1.clear() - self._processed_cats2.clear() - - if patch_method not in ['local', 'global']: - raise ValueError("Invalid patch_method %s"%patch_method) - local = patch_method == 'local' - - if not isinstance(cat1,list): - cat1 = cat1.get_patches(low_mem=low_mem) - if not isinstance(cat2,list): - cat2 = cat2.get_patches(low_mem=low_mem) - - self._process_all_cross(cat1, cat2, metric, num_threads, comm, low_mem, local) - - self._processed_cats1.extend(cat1) - self._processed_cats2.extend(cat2) - if finalize: - vark = calculateVarK(self._processed_cats1, low_mem=low_mem) - vart = calculateVarT(self._processed_cats2, low_mem=low_mem) - self.logger.info("vark = %f: sig_k = %f",vark,math.sqrt(vark)) - self.logger.info("vart = %f: sig_sn (per component) = %f",vart,math.sqrt(vart)) - self.finalize(vark,vart) - self._processed_cats1.clear() - self._processed_cats2.clear() + super().finalize(vark, vart) def write(self, file_name, *, file_type=None, precision=None, write_patch_results=False, write_cov=False): @@ -364,7 +132,7 @@ def write(self, file_name, *, file_type=None, precision=None, write_patch_result meanlogr The mean value :math:`\langle \log(r)\rangle` of pairs that fell into each bin xi The real part of correlation function, - :math:`\xi(r) = \langle \kappa\, \t_R\rangle` + :math:`\xi(r) = \langle \kappa\, t_R\rangle` xi_im The imaginary part of correlation function. sigma The sqrt of the variance estimate of both of these weight The total weight contributing to each bin @@ -385,90 +153,4 @@ def write(self, file_name, *, file_type=None, precision=None, write_patch_result (default: False) write_cov (bool): Whether to write the covariance matrix as well. (default: False) """ - self.logger.info('Writing KT correlations to %s',file_name) - precision = self.config.get('precision', 4) if precision is None else precision - with make_writer(file_name, precision, file_type, self.logger) as writer: - self._write(writer, None, write_patch_results, write_cov=write_cov) - - @property - def _write_col_names(self): - return ['r_nom','meanr','meanlogr','xi','xi_im','sigma','weight','npairs'] - - @property - def _write_data(self): - data = [ self.rnom, self.meanr, self.meanlogr, - self.xi, self.xi_im, np.sqrt(self.varxi), - self.weight, self.npairs ] - data = [ col.flatten() for col in data ] - return data - - @property - def _write_params(self): - params = make_minimal_config(self.config, Corr2._valid_params) - # Add in a couple other things we want to preserve that aren't construction kwargs. - params['coords'] = self.coords - params['metric'] = self.metric - return params - - @classmethod - def from_file(cls, file_name, *, file_type=None, logger=None, rng=None): - """Create a KTCorrelation instance from an output file. - - This should be a file that was written by TreeCorr. - - Parameters: - file_name (str): The name of the file to read in. - file_type (str): The type of file ('ASCII', 'FITS', or 'HDF'). (default: determine - the type automatically from the extension of file_name.) - logger (Logger): If desired, a logger object to use for logging. (default: None) - rng (RandomState): If desired, a numpy.random.RandomState instance to use for bootstrap - random number generation. (default: None) - - Returns: - corr: A KTCorrelation object, constructed from the information in the file. - """ - if logger: - logger.info('Building KTCorrelation from %s',file_name) - with make_reader(file_name, file_type, logger) as reader: - name = 'main' if 'main' in reader else None - params = reader.read_params(ext=name) - kwargs = make_minimal_config(params, Corr2._valid_params) - corr = cls(**kwargs, logger=logger, rng=rng) - corr.logger.info('Reading KT correlations from %s',file_name) - corr._read(reader, name=name, params=params) - return corr - - def read(self, file_name, *, file_type=None): - """Read in values from a file. - - This should be a file that was written by TreeCorr, preferably a FITS or HDF5 file, so - there is no loss of information. - - .. warning:: - - The `KTCorrelation` object should be constructed with the same configuration - parameters as the one being read. e.g. the same min_sep, max_sep, etc. This is not - checked by the read function. - - Parameters: - file_name (str): The name of the file to read in. - file_type (str): The type of file ('ASCII' or 'FITS'). (default: determine the type - automatically from the extension of file_name.) - """ - self.logger.info('Reading KT correlations from %s',file_name) - with make_reader(file_name, file_type, self.logger) as reader: - self._read(reader) - - def _read_from_data(self, data, params): - s = self.logr.shape - self.meanr = data['meanr'].reshape(s) - self.meanlogr = data['meanlogr'].reshape(s) - self.xi = data['xi'].reshape(s) - self.xi_im = data['xi_im'].reshape(s) - self._varxi = data['sigma'].reshape(s)**2 - self.weight = data['weight'].reshape(s) - self.npairs = data['npairs'].reshape(s) - self.coords = params['coords'].strip() - self.metric = params['metric'].strip() - self.npatch1 = params.get('npatch1', 1) - self.npatch2 = params.get('npatch2', 1) + super().write(file_name, file_type, precision, write_patch_results, write_cov) diff --git a/treecorr/kvcorrelation.py b/treecorr/kvcorrelation.py index 22fd4ac2..12571883 100644 --- a/treecorr/kvcorrelation.py +++ b/treecorr/kvcorrelation.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following @@ -19,12 +19,12 @@ from . import _treecorr from .catalog import calculateVarV, calculateVarK -from .corr2base import Corr2 +from .kzcorrelation import BaseKZCorrelation from .util import make_writer, make_reader from .config import make_minimal_config -class KVCorrelation(Corr2): +class KVCorrelation(BaseKZCorrelation): r"""This class handles the calculation and storage of a 2-point scalar-vector correlation function. @@ -46,8 +46,9 @@ class KVCorrelation(Corr2): If there are no pairs in a bin, then exp(logr) will be used instead. meanlogr: The (weighted) mean value of log(r) for the pairs in each bin. If there are no pairs in a bin, then logr will be used instead. - xi: The correlation function, :math:`\xi(r) = \langle \kappa\, \v_R\rangle`. - xi_im: The imaginary part of :math:`\xi(r)`. + xi: The real component of the correlation function, + :math:`\xi(r) = \langle \kappa\, v_R\rangle`. + xi_im: The imaginary comonent of :math:`\xi(r)`. varxi: An estimate of the variance of :math:`\xi` weight: The total weight in each bin. npairs: The number of pairs going into each bin (including pairs where one or @@ -67,7 +68,7 @@ class KVCorrelation(Corr2): .. note:: - If you separate out the steps of the `process` command and use `process_cross`, + If you separate out the steps of the `Corr2.process` command and use `Corr2.process_cross`, then the units will not be applied to ``meanr`` or ``meanlogr`` until the `finalize` function is called. @@ -89,144 +90,25 @@ class KVCorrelation(Corr2): **kwargs: See the documentation for `Corr2` for the list of allowed keyword arguments, which may be passed either directly or in the config dict. """ + _cls = 'KVCorrelation' + _letter1 = 'K' + _letter2 = 'V' + _letters = 'KV' + _builder = _treecorr.KVCorr + _calculateVar1 = staticmethod(calculateVarK) + _calculateVar2 = staticmethod(calculateVarV) + _xireal = 'xi' + _xiimag = 'xi_im' + def __init__(self, config=None, *, logger=None, **kwargs): """Initialize `KVCorrelation`. See class doc for details. """ - Corr2.__init__(self, config, logger=logger, **kwargs) - - self.xi = np.zeros_like(self.rnom, dtype=float) - self.xi_im = np.zeros_like(self.rnom, dtype=float) - self.meanr = np.zeros_like(self.rnom, dtype=float) - self.meanlogr = np.zeros_like(self.rnom, dtype=float) - self.weight = np.zeros_like(self.rnom, dtype=float) - self.npairs = np.zeros_like(self.rnom, dtype=float) - self._varxi = None - self._cov = None - self._var_num = 0 - self._processed_cats1 = [] - self._processed_cats2 = [] - self.logger.debug('Finished building KVCorr') - - @property - def corr(self): - if self._corr is None: - x = np.array([]) - self._corr = _treecorr.KVCorr(self._bintype, self._min_sep, self._max_sep, self._nbins, - self._bin_size, self.b, self.angle_slop, - self.min_rpar, self.max_rpar, - self.xperiod, self.yperiod, self.zperiod, - self.xi, self.xi_im, x, x, - self.meanr, self.meanlogr, self.weight, self.npairs) - return self._corr - - def __eq__(self, other): - """Return whether two `KVCorrelation` instances are equal""" - return (isinstance(other, KVCorrelation) and - self.nbins == other.nbins and - self.bin_size == other.bin_size and - self.min_sep == other.min_sep and - self.max_sep == other.max_sep and - self.sep_units == other.sep_units and - self.coords == other.coords and - self.bin_type == other.bin_type and - self.bin_slop == other.bin_slop and - self.angle_slop == other.angle_slop and - self.min_rpar == other.min_rpar and - self.max_rpar == other.max_rpar and - self.xperiod == other.xperiod and - self.yperiod == other.yperiod and - self.zperiod == other.zperiod and - np.array_equal(self.meanr, other.meanr) and - np.array_equal(self.meanlogr, other.meanlogr) and - np.array_equal(self.xi, other.xi) and - np.array_equal(self.xi_im, other.xi_im) and - np.array_equal(self.varxi, other.varxi) and - np.array_equal(self.weight, other.weight) and - np.array_equal(self.npairs, other.npairs)) - - def copy(self): - """Make a copy""" - ret = KVCorrelation.__new__(KVCorrelation) - for key, item in self.__dict__.items(): - if isinstance(item, np.ndarray): - # Only items that might change need to by deep copied. - ret.__dict__[key] = item.copy() - else: - # For everything else, shallow copy is fine. - # In particular don't deep copy config or logger - # Most of the rest are scalars, which copy fine this way. - # And the read-only things are all in _ro. - # The results dict is trickier. We rely on it being copied in places, but we - # never add more to it after the copy, so shallow copy is fine. - ret.__dict__[key] = item - ret._corr = None # We'll want to make a new one of these if we need it. - return ret - - def __repr__(self): - return f'KVCorrelation({self._repr_kwargs})' - - def process_cross(self, cat1, cat2, *, metric=None, num_threads=None): - """Process a single pair of catalogs, accumulating the cross-correlation. - - This accumulates the weighted sums into the bins, but does not finalize - the calculation by dividing by the total weight at the end. After - calling this function as often as desired, the `finalize` command will - finish the calculation. - - Parameters: - cat1 (Catalog): The first catalog to process - cat2 (Catalog): The second catalog to process - metric (str): Which metric to use. See `Metrics` for details. - (default: 'Euclidean'; this value can also be given in the - constructor in the config dict.) - num_threads (int): How many OpenMP threads to use during the calculation. - (default: use the number of cpu cores; this value can also be given - in the constructor in the config dict.) - """ - if cat1.name == '' and cat2.name == '': - self.logger.info('Starting process KV cross-correlations') - else: - self.logger.info('Starting process KV cross-correlations for cats %s, %s.', - cat1.name, cat2.name) - - self._set_metric(metric, cat1.coords, cat2.coords) - self._set_num_threads(num_threads) - min_size, max_size = self._get_minmax_size() - - f1 = cat1.getKField(min_size=min_size, max_size=max_size, - split_method=self.split_method, - brute=self.brute is True or self.brute == 1, - min_top=self.min_top, max_top=self.max_top, - coords=self.coords) - f2 = cat2.getVField(min_size=min_size, max_size=max_size, - split_method=self.split_method, - brute=self.brute is True or self.brute == 2, - min_top=self.min_top, max_top=self.max_top, - coords=self.coords) - - self.logger.info('Starting %d jobs.',f1.nTopLevelNodes) - self.corr.processCross(f1.data, f2.data, self.output_dots, self._metric) - - def _finalize(self): - mask1 = self.weight != 0 - mask2 = self.weight == 0 - - self.xi[mask1] /= self.weight[mask1] - self.xi_im[mask1] /= self.weight[mask1] - self.meanr[mask1] /= self.weight[mask1] - self.meanlogr[mask1] /= self.weight[mask1] - - # Update the units of meanr, meanlogr - self._apply_units(mask1) - - # Use meanr, meanlogr when available, but set to nominal when no pairs in bin. - self.meanr[mask2] = self.rnom[mask2] - self.meanlogr[mask2] = self.logr[mask2] + super().__init__(config, logger=logger, **kwargs) def finalize(self, vark, varv): """Finalize the calculation of the correlation function. - The `process_cross` command accumulates values in each bin, so it can be called + The `Corr2.process_cross` command accumulates values in each bin, so it can be called multiple times if appropriate. Afterwards, this command finishes the calculation by dividing each column by the total weight. @@ -234,120 +116,7 @@ def finalize(self, vark, varv): vark (float): The variance of the scaler field. varv (float): The variance per component of the vector field. """ - self._finalize() - self._var_num = vark * varv - - @property - def varxi(self): - if self._varxi is None: - self._varxi = np.zeros_like(self.rnom, dtype=float) - if self._var_num != 0: - self._varxi.ravel()[:] = self.cov_diag - return self._varxi - - def _clear(self): - """Clear the data vectors - """ - self.xi.ravel()[:] = 0 - self.xi_im.ravel()[:] = 0 - self.meanr.ravel()[:] = 0 - self.meanlogr.ravel()[:] = 0 - self.weight.ravel()[:] = 0 - self.npairs.ravel()[:] = 0 - self._varxi = None - self._cov = None - - def __iadd__(self, other): - """Add a second `KVCorrelation`'s data to this one. - - .. note:: - - For this to make sense, both `KVCorrelation` objects should not have had `finalize` - called yet. Then, after adding them together, you should call `finalize` on the sum. - """ - if not isinstance(other, KVCorrelation): - raise TypeError("Can only add another KVCorrelation object") - if not (self._nbins == other._nbins and - self.min_sep == other.min_sep and - self.max_sep == other.max_sep): - raise ValueError("KVCorrelation to be added is not compatible with this one.") - - self._set_metric(other.metric, other.coords, other.coords) - self.xi.ravel()[:] += other.xi.ravel()[:] - self.xi_im.ravel()[:] += other.xi_im.ravel()[:] - self.meanr.ravel()[:] += other.meanr.ravel()[:] - self.meanlogr.ravel()[:] += other.meanlogr.ravel()[:] - self.weight.ravel()[:] += other.weight.ravel()[:] - self.npairs.ravel()[:] += other.npairs.ravel()[:] - return self - - def _sum(self, others): - # Equivalent to the operation of: - # self._clear() - # for other in others: - # self += other - # but no sanity checks and use numpy.sum for faster calculation. - np.sum([c.xi for c in others], axis=0, out=self.xi) - np.sum([c.xi_im for c in others], axis=0, out=self.xi_im) - np.sum([c.meanr for c in others], axis=0, out=self.meanr) - np.sum([c.meanlogr for c in others], axis=0, out=self.meanlogr) - np.sum([c.weight for c in others], axis=0, out=self.weight) - np.sum([c.npairs for c in others], axis=0, out=self.npairs) - - def process(self, cat1, cat2, *, metric=None, num_threads=None, comm=None, low_mem=False, - initialize=True, finalize=True, patch_method='global'): - """Compute the correlation function. - - Both arguments may be lists, in which case all items in the list are used - for that element of the correlation. - - Parameters: - cat1 (Catalog): A catalog or list of catalogs for the K field. - cat2 (Catalog): A catalog or list of catalogs for the V field. - metric (str): Which metric to use. See `Metrics` for details. - (default: 'Euclidean'; this value can also be given in the - constructor in the config dict.) - num_threads (int): How many OpenMP threads to use during the calculation. - (default: use the number of cpu cores; this value can also be given - in the constructor in the config dict.) - comm (mpi4py.Comm): If running MPI, an mpi4py Comm object to communicate between - processes. If used, the rank=0 process will have the final - computation. This only works if using patches. (default: None) - low_mem (bool): Whether to sacrifice a little speed to try to reduce memory usage. - This only works if using patches. (default: False) - initialize (bool): Whether to begin the calculation with a call to - `Corr2.clear`. (default: True) - finalize (bool): Whether to complete the calculation with a call to `finalize`. - (default: True) - patch_method (str): Which patch method to use. (default: 'global') - """ - import math - if initialize: - self.clear() - self._processed_cats1.clear() - self._processed_cats2.clear() - - if patch_method not in ['local', 'global']: - raise ValueError("Invalid patch_method %s"%patch_method) - local = patch_method == 'local' - - if not isinstance(cat1,list): - cat1 = cat1.get_patches(low_mem=low_mem) - if not isinstance(cat2,list): - cat2 = cat2.get_patches(low_mem=low_mem) - - self._process_all_cross(cat1, cat2, metric, num_threads, comm, low_mem, local) - - self._processed_cats1.extend(cat1) - self._processed_cats2.extend(cat2) - if finalize: - vark = calculateVarK(self._processed_cats1, low_mem=low_mem) - varv = calculateVarV(self._processed_cats2, low_mem=low_mem) - self.logger.info("vark = %f: sig_k = %f",vark,math.sqrt(vark)) - self.logger.info("varv = %f: sig_sn (per component) = %f",varv,math.sqrt(varv)) - self.finalize(vark,varv) - self._processed_cats1.clear() - self._processed_cats2.clear() + super().finalize(vark, varv) def write(self, file_name, *, file_type=None, precision=None, write_patch_results=False, write_cov=False): @@ -363,9 +132,9 @@ def write(self, file_name, *, file_type=None, precision=None, write_patch_result fell into each bin meanlogr The mean value :math:`\langle \log(r)\rangle` of pairs that fell into each bin - xi The real part of correlation function, - :math:`xi(r) = \langle \kappa\, \v_R\rangle` - xi_im The imaginary part of correlation function. + xi The real component of the correlation function, + :math:`xi(r) = \langle \kappa\, v_R\rangle` + xi_im The imaginary component of the correlation function. sigma The sqrt of the variance estimate of both of these weight The total weight contributing to each bin npairs The total number of pairs in each bin @@ -385,90 +154,4 @@ def write(self, file_name, *, file_type=None, precision=None, write_patch_result (default: False) write_cov (bool): Whether to write the covariance matrix as well. (default: False) """ - self.logger.info('Writing KV correlations to %s',file_name) - precision = self.config.get('precision', 4) if precision is None else precision - with make_writer(file_name, precision, file_type, self.logger) as writer: - self._write(writer, None, write_patch_results, write_cov=write_cov) - - @property - def _write_col_names(self): - return ['r_nom','meanr','meanlogr','xi','xi_im','sigma','weight','npairs'] - - @property - def _write_data(self): - data = [ self.rnom, self.meanr, self.meanlogr, - self.xi, self.xi_im, np.sqrt(self.varxi), - self.weight, self.npairs ] - data = [ col.flatten() for col in data ] - return data - - @property - def _write_params(self): - params = make_minimal_config(self.config, Corr2._valid_params) - # Add in a couple other things we want to preserve that aren't construction kwargs. - params['coords'] = self.coords - params['metric'] = self.metric - return params - - @classmethod - def from_file(cls, file_name, *, file_type=None, logger=None, rng=None): - """Create a KVCorrelation instance from an output file. - - This should be a file that was written by TreeCorr. - - Parameters: - file_name (str): The name of the file to read in. - file_type (str): The type of file ('ASCII', 'FITS', or 'HDF'). (default: determine - the type automatically from the extension of file_name.) - logger (Logger): If desired, a logger object to use for logging. (default: None) - rng (RandomState): If desired, a numpy.random.RandomState instance to use for bootstrap - random number generation. (default: None) - - Returns: - corr: A KVCorrelation object, constructed from the information in the file. - """ - if logger: - logger.info('Building KVCorrelation from %s',file_name) - with make_reader(file_name, file_type, logger) as reader: - name = 'main' if 'main' in reader else None - params = reader.read_params(ext=name) - kwargs = make_minimal_config(params, Corr2._valid_params) - corr = cls(**kwargs, logger=logger, rng=rng) - corr.logger.info('Reading KV correlations from %s',file_name) - corr._read(reader, name=name, params=params) - return corr - - def read(self, file_name, *, file_type=None): - """Read in values from a file. - - This should be a file that was written by TreeCorr, preferably a FITS or HDF5 file, so - there is no loss of information. - - .. warning:: - - The `KVCorrelation` object should be constructed with the same configuration - parameters as the one being read. e.g. the same min_sep, max_sep, etc. This is not - checked by the read function. - - Parameters: - file_name (str): The name of the file to read in. - file_type (str): The type of file ('ASCII' or 'FITS'). (default: determine the type - automatically from the extension of file_name.) - """ - self.logger.info('Reading KV correlations from %s',file_name) - with make_reader(file_name, file_type, self.logger) as reader: - self._read(reader) - - def _read_from_data(self, data, params): - s = self.logr.shape - self.meanr = data['meanr'].reshape(s) - self.meanlogr = data['meanlogr'].reshape(s) - self.xi = data['xi'].reshape(s) - self.xi_im = data['xi_im'].reshape(s) - self._varxi = data['sigma'].reshape(s)**2 - self.weight = data['weight'].reshape(s) - self.npairs = data['npairs'].reshape(s) - self.coords = params['coords'].strip() - self.metric = params['metric'].strip() - self.npatch1 = params.get('npatch1', 1) - self.npatch2 = params.get('npatch2', 1) + super().write(file_name, file_type, precision, write_patch_results, write_cov) diff --git a/treecorr/kzcorrelation.py b/treecorr/kzcorrelation.py new file mode 100644 index 00000000..9013b592 --- /dev/null +++ b/treecorr/kzcorrelation.py @@ -0,0 +1,246 @@ +# Copyright (c) 2003-2024 by Mike Jarvis +# +# TreeCorr is free software: redistribution and use in source and binary forms, +# with or without modification, are permitted provided that the following +# conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions, and the disclaimer given in the accompanying LICENSE +# file. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions, and the disclaimer given in the documentation +# and/or other materials provided with the distribution. + +""" +.. module:: kvcorrelation +""" + +import numpy as np + +from . import _treecorr +from .catalog import calculateVarZ, calculateVarK +from .corr2base import Corr2 +from .util import make_writer +from .config import make_minimal_config + + +class BaseKZCorrelation(Corr2): + """This class is a base class for all the K?Correlation classes, where ? is one of the + complex fields of varying spin. + + A lot of the implementation is shared among those types, so whenever possible the shared + implementation is done in this class. + """ + _sig1 = 'sig_k' + _sig2 = 'sig_sn (per component)' + + def __init__(self, config=None, *, logger=None, **kwargs): + super().__init__(config, logger=logger, **kwargs) + + self._xi1 = np.zeros_like(self.rnom, dtype=float) + self._xi2 = np.zeros_like(self.rnom, dtype=float) + self._xi3 = self._xi4 = np.array([]) + self._varxi = None + self.logger.debug('Finished building %s', self._cls) + + @property + def xi(self): + return self._xi1 + + @property + def xi_im(self): + return self._xi2 + + def finalize(self, vark, varz): + self._finalize() + self._var_num = vark * varz + + @property + def varxi(self): + if self._varxi is None: + self._varxi = np.zeros_like(self.rnom, dtype=float) + if self._var_num != 0: + self._varxi.ravel()[:] = self.cov_diag + return self._varxi + + def _clear(self): + """Clear the data vectors + """ + super()._clear() + self._varxi = None + + def _sum(self, others): + # Equivalent to the operation of: + # self._clear() + # for other in others: + # self += other + # but no sanity checks and use numpy.sum for faster calculation. + np.sum([c._xi1 for c in others], axis=0, out=self._xi1) + np.sum([c._xi2 for c in others], axis=0, out=self._xi2) + np.sum([c.meanr for c in others], axis=0, out=self.meanr) + np.sum([c.meanlogr for c in others], axis=0, out=self.meanlogr) + np.sum([c.weight for c in others], axis=0, out=self.weight) + np.sum([c.npairs for c in others], axis=0, out=self.npairs) + self._varxi = None + self._cov = None + + def write(self, file_name, file_type=None, precision=None, write_patch_results=False, + write_cov=False): + self.logger.info(f'Writing {self._letters} correlations to %s', file_name) + precision = self.config.get('precision', 4) if precision is None else precision + with make_writer(file_name, precision, file_type, self.logger) as writer: + self._write(writer, None, write_patch_results, write_cov=write_cov) + + @property + def _write_col_names(self): + return ['r_nom','meanr','meanlogr',self._xireal,self._xiimag,'sigma','weight','npairs'] + + @property + def _write_data(self): + data = [ self.rnom, self.meanr, self.meanlogr, + self.xi, self.xi_im, np.sqrt(self.varxi), + self.weight, self.npairs ] + data = [ col.flatten() for col in data ] + return data + + def _read_from_data(self, data, params): + super()._read_from_data(data, params) + s = self.logr.shape + self.weight = data['weight'].reshape(s) + self._xi1 = data[self._xireal].reshape(s) + self._xi2 = data[self._xiimag].reshape(s) + self._varxi = data['sigma'].reshape(s)**2 + + +class KZCorrelation(BaseKZCorrelation): + r"""This class handles the calculation and storage of a 2-point scalar-spin-0 correlation + function. If the spin-0 field is real, you should instead use `KKCorrelation` as it will + be faster. This class is intended for correlations of a real scalar field with a complex + spin-0 field. + + Ojects of this class holds the following attributes: + + Attributes: + nbins: The number of bins in logr + bin_size: The size of the bins in logr + min_sep: The minimum separation being considered + max_sep: The maximum separation being considered + + In addition, the following attributes are numpy arrays of length (nbins): + + Attributes: + logr: The nominal center of the bin in log(r) (the natural logarithm of r). + rnom: The nominal center of the bin converted to regular distance. + i.e. r = exp(logr). + meanr: The (weighted) mean value of r for the pairs in each bin. + If there are no pairs in a bin, then exp(logr) will be used instead. + meanlogr: The (weighted) mean value of log(r) for the pairs in each bin. + If there are no pairs in a bin, then logr will be used instead. + xi: The correlation function, :math:`\xi(r) = \langle \kappa\, z\rangle`. + xi_im: The imaginary part of :math:`\xi(r)`. + varxi: An estimate of the variance of each component of :math:`\xi` + weight: The total weight in each bin. + npairs: The number of pairs going into each bin (including pairs where one or + both objects have w=0). + cov: An estimate of the full covariance matrix. + + .. note:: + + The default method for estimating the variance and covariance attributes (``varxi``, + and ``cov``) is 'shot', which only includes the shape noise propagated into the final + correlation. This does not include sample variance, so it is always an underestimate of + the actual variance. To get better estimates, you need to set ``var_method`` to something + else and use patches in the input catalog(s). cf. `Covariance Estimates`. + + If ``sep_units`` are given (either in the config dict or as a named kwarg) then the distances + will all be in these units. + + .. note:: + + If you separate out the steps of the `Corr2.process` command and use `Corr2.process_cross`, + then the units will not be applied to ``meanr`` or ``meanlogr`` until the `finalize` + function is called. + + The typical usage pattern is as follows: + + >>> kv = treecorr.KZCorrelation(config) + >>> kv.process(cat1,cat2) # Calculate the cross-correlation + >>> kv.write(file_name) # Write out to a file. + >>> xi = kv.xi # Or access the correlation function directly. + + Parameters: + config (dict): A configuration dict that can be used to pass in kwargs if desired. + This dict is allowed to have addition entries besides those listed + in `Corr2`, which are ignored here. (default: None) + logger: If desired, a logger object for logging. (default: None, in which case + one will be built according to the config dict's verbose level.) + + Keyword Arguments: + **kwargs: See the documentation for `Corr2` for the list of allowed keyword + arguments, which may be passed either directly or in the config dict. + """ + _cls = 'KZCorrelation' + _letter1 = 'K' + _letter2 = 'Z' + _letters = 'KZ' + _builder = _treecorr.KZCorr + _calculateVar1 = staticmethod(calculateVarK) + _calculateVar2 = staticmethod(calculateVarZ) + _xireal = 'xi' + _xiimag = 'xi_im' + + def __init__(self, config=None, *, logger=None, **kwargs): + """Initialize `KZCorrelation`. See class doc for details. + """ + super().__init__(config, logger=logger, **kwargs) + + def finalize(self, vark, varz): + """Finalize the calculation of the correlation function. + + The `Corr2.process_cross` command accumulates values in each bin, so it can be called + multiple times if appropriate. Afterwards, this command finishes the calculation + by dividing each column by the total weight. + + Parameters: + vark (float): The variance of the scaler field. + varz (float): The variance per component of the spin-0 field. + """ + super().finalize(vark, varz) + + def write(self, file_name, *, file_type=None, precision=None, write_patch_results=False, + write_cov=False): + r"""Write the correlation function to the file, file_name. + + The output file will include the following columns: + + ========== ======================================================== + Column Description + ========== ======================================================== + r_nom The nominal center of the bin in r + meanr The mean value :math:`\langle r\rangle` of pairs that + fell into each bin + meanlogr The mean value :math:`\langle \log(r)\rangle` of pairs + that fell into each bin + xi The real part of correlation function, + :math:`xi(r) = \langle \kappa\, z\rangle` + xi_im The imaginary part of correlation function. + sigma The sqrt of the variance estimate of both of these + weight The total weight contributing to each bin + npairs The total number of pairs in each bin + ========== ======================================================== + + If ``sep_units`` was given at construction, then the distances will all be in these units. + Otherwise, they will be in either the same units as x,y,z (for flat or 3d coordinates) or + radians (for spherical coordinates). + + Parameters: + file_name (str): The name of the file to write to. + file_type (str): The type of file to write ('ASCII' or 'FITS'). (default: determine + the type automatically from the extension of file_name.) + precision (int): For ASCII output catalogs, the desired precision. (default: 4; + this value can also be given in the constructor in the config dict.) + write_patch_results (bool): Whether to write the patch-based results as well. + (default: False) + write_cov (bool): Whether to write the covariance matrix as well. (default: False) + """ + super().write(file_name, file_type, precision, write_patch_results, write_cov) diff --git a/treecorr/ngcorrelation.py b/treecorr/ngcorrelation.py index 9b4c4145..38270b57 100644 --- a/treecorr/ngcorrelation.py +++ b/treecorr/ngcorrelation.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following @@ -19,12 +19,12 @@ from . import _treecorr from .catalog import calculateVarG -from .corr2base import Corr2 from .util import make_writer, make_reader from .config import make_minimal_config +from .nzcorrelation import BaseNZCorrelation -class NGCorrelation(Corr2): +class NGCorrelation(BaseNZCorrelation): r"""This class handles the calculation and storage of a 2-point count-shear correlation function. This is the tangential shear profile around lenses, commonly referred to as galaxy-galaxy lensing. @@ -72,7 +72,7 @@ class NGCorrelation(Corr2): .. note:: - If you separate out the steps of the `process` command and use `process_cross`, + If you separate out the steps of the `Corr2.process` command and use `Corr2.process_cross`, then the units will not be applied to ``meanr`` or ``meanlogr`` until the `finalize` function is called. @@ -94,288 +94,32 @@ class NGCorrelation(Corr2): **kwargs: See the documentation for `Corr2` for the list of allowed keyword arguments, which may be passed either directly or in the config dict. """ + _cls = 'NGCorrelation' + _letter1 = 'N' + _letter2 = 'G' + _letters = 'NG' + _builder = _treecorr.NGCorr + _calculateVar1 = lambda *args, **kwargs: None + _calculateVar2 = staticmethod(calculateVarG) + _zreal = 'gamT' + _zimag = 'gamX' + def __init__(self, config=None, *, logger=None, **kwargs): """Initialize `NGCorrelation`. See class doc for details. """ - Corr2.__init__(self, config, logger=logger, **kwargs) - - self.xi = np.zeros_like(self.rnom, dtype=float) - self.xi_im = np.zeros_like(self.rnom, dtype=float) - self.meanr = np.zeros_like(self.rnom, dtype=float) - self.meanlogr = np.zeros_like(self.rnom, dtype=float) - self.weight = np.zeros_like(self.rnom, dtype=float) - self.npairs = np.zeros_like(self.rnom, dtype=float) - self.raw_xi = self.xi - self.raw_xi_im = self.xi_im - self._rg = None - self._raw_varxi = None - self._varxi = None - self._cov = None - self._var_num = 0 - self._processed_cats = [] - self.logger.debug('Finished building NGCorr') - - @property - def corr(self): - if self._corr is None: - x = np.array([]) - self._corr = _treecorr.NGCorr(self._bintype, self._min_sep, self._max_sep, self._nbins, - self._bin_size, self.b, self.angle_slop, - self.min_rpar, self.max_rpar, - self.xperiod, self.yperiod, self.zperiod, - self.raw_xi, self.raw_xi_im, x, x, - self.meanr, self.meanlogr, self.weight, self.npairs) - return self._corr - - def __eq__(self, other): - """Return whether two `NGCorrelation` instances are equal""" - return (isinstance(other, NGCorrelation) and - self.nbins == other.nbins and - self.bin_size == other.bin_size and - self.min_sep == other.min_sep and - self.max_sep == other.max_sep and - self.sep_units == other.sep_units and - self.coords == other.coords and - self.bin_type == other.bin_type and - self.bin_slop == other.bin_slop and - self.angle_slop == other.angle_slop and - self.min_rpar == other.min_rpar and - self.max_rpar == other.max_rpar and - self.xperiod == other.xperiod and - self.yperiod == other.yperiod and - self.zperiod == other.zperiod and - np.array_equal(self.meanr, other.meanr) and - np.array_equal(self.meanlogr, other.meanlogr) and - np.array_equal(self.xi, other.xi) and - np.array_equal(self.xi_im, other.xi_im) and - np.array_equal(self.varxi, other.varxi) and - np.array_equal(self.weight, other.weight) and - np.array_equal(self.npairs, other.npairs)) - - def copy(self): - """Make a copy""" - ret = NGCorrelation.__new__(NGCorrelation) - for key, item in self.__dict__.items(): - if isinstance(item, np.ndarray): - # Only items that might change need to by deep copied. - ret.__dict__[key] = item.copy() - else: - # For everything else, shallow copy is fine. - # In particular don't deep copy config or logger - # Most of the rest are scalars, which copy fine this way. - # And the read-only things are all in _ro. - # The results dict is trickier. We rely on it being copied in places, but we - # never add more to it after the copy, so shallow copy is fine. - ret.__dict__[key] = item - ret._corr = None # We'll want to make a new one of these if we need it. - if self.xi is self.raw_xi: - ret.raw_xi = ret.xi - ret.raw_xi_im = ret.xi_im - else: - ret.raw_xi = self.raw_xi.copy() - ret.raw_xi_im = self.raw_xi_im.copy() - if self._rg is not None: - ret._rg = self._rg.copy() - return ret - - def __repr__(self): - return f'NGCorrelation({self._repr_kwargs})' - - def process_cross(self, cat1, cat2, *, metric=None, num_threads=None): - """Process a single pair of catalogs, accumulating the cross-correlation. - - This accumulates the weighted sums into the bins, but does not finalize - the calculation by dividing by the total weight at the end. After - calling this function as often as desired, the `finalize` command will - finish the calculation. - - Parameters: - cat1 (Catalog): The first catalog to process - cat2 (Catalog): The second catalog to process - metric (str): Which metric to use. See `Metrics` for details. - (default: 'Euclidean'; this value can also be given in the - constructor in the config dict.) - num_threads (int): How many OpenMP threads to use during the calculation. - (default: use the number of cpu cores; this value can also be given - in the constructor in the config dict.) - """ - if cat1.name == '' and cat2.name == '': - self.logger.info('Starting process NG cross-correlations') - else: - self.logger.info('Starting process NG cross-correlations for cats %s, %s.', - cat1.name, cat2.name) - - self._set_metric(metric, cat1.coords, cat2.coords) - self._set_num_threads(num_threads) - min_size, max_size = self._get_minmax_size() - - f1 = cat1.getNField(min_size=min_size, max_size=max_size, - split_method=self.split_method, - brute=self.brute is True or self.brute == 1, - min_top=self.min_top, max_top=self.max_top, - coords=self.coords) - f2 = cat2.getGField(min_size=min_size, max_size=max_size, - split_method=self.split_method, - brute=self.brute is True or self.brute == 2, - min_top=self.min_top, max_top=self.max_top, - coords=self.coords) - - self.logger.info('Starting %d jobs.',f1.nTopLevelNodes) - self.corr.processCross(f1.data, f2.data, self.output_dots, self._metric) - - def _finalize(self): - mask1 = self.weight != 0 - mask2 = self.weight == 0 - - self.raw_xi[mask1] /= self.weight[mask1] - self.raw_xi_im[mask1] /= self.weight[mask1] - self.meanr[mask1] /= self.weight[mask1] - self.meanlogr[mask1] /= self.weight[mask1] - - # Update the units of meanr, meanlogr - self._apply_units(mask1) - - # Use meanr, meanlogr when available, but set to nominal when no pairs in bin. - self.meanr[mask2] = self.rnom[mask2] - self.meanlogr[mask2] = self.logr[mask2] + super().__init__(config, logger=logger, **kwargs) def finalize(self, varg): """Finalize the calculation of the correlation function. - The `process_cross` command accumulates values in each bin, so it can be called + The `Corr2.process_cross` command accumulates values in each bin, so it can be called multiple times if appropriate. Afterwards, this command finishes the calculation by dividing each column by the total weight. Parameters: varg (float): The variance per component of the shear field. """ - self._finalize() - self._var_num = varg - - self.xi = self.raw_xi - self.xi_im = self.raw_xi_im - - @property - def raw_varxi(self): - if self._raw_varxi is None: - self._raw_varxi = np.zeros_like(self.rnom, dtype=float) - if self._var_num != 0: - self._raw_varxi.ravel()[:] = self.cov_diag - return self._raw_varxi - - @property - def varxi(self): - if self._varxi is None: - self._varxi = self.raw_varxi - return self._varxi - - def _clear(self): - """Clear the data vectors - """ - self.raw_xi.ravel()[:] = 0 - self.raw_xi_im.ravel()[:] = 0 - self.meanr.ravel()[:] = 0 - self.meanlogr.ravel()[:] = 0 - self.weight.ravel()[:] = 0 - self.npairs.ravel()[:] = 0 - self.xi = self.raw_xi - self.xi_im = self.raw_xi_im - self._raw_varxi = None - self._varxi = None - self._cov = None - - def __iadd__(self, other): - """Add a second `NGCorrelation`'s data to this one. - - .. note:: - - For this to make sense, both `NGCorrelation` objects should not have had `finalize` - called yet. Then, after adding them together, you should call `finalize` on the sum. - """ - if not isinstance(other, NGCorrelation): - raise TypeError("Can only add another NGCorrelation object") - if not (self._nbins == other._nbins and - self.min_sep == other.min_sep and - self.max_sep == other.max_sep): - raise ValueError("NGCorrelation to be added is not compatible with this one.") - - self._set_metric(other.metric, other.coords, other.coords) - self.raw_xi.ravel()[:] += other.raw_xi.ravel()[:] - self.raw_xi_im.ravel()[:] += other.raw_xi_im.ravel()[:] - self.meanr.ravel()[:] += other.meanr.ravel()[:] - self.meanlogr.ravel()[:] += other.meanlogr.ravel()[:] - self.weight.ravel()[:] += other.weight.ravel()[:] - self.npairs.ravel()[:] += other.npairs.ravel()[:] - return self - - def _sum(self, others): - # Equivalent to the operation of: - # self._clear() - # for other in others: - # self += other - # but no sanity checks and use numpy.sum for faster calculation. - np.sum([c.raw_xi for c in others], axis=0, out=self.raw_xi) - np.sum([c.raw_xi_im for c in others], axis=0, out=self.raw_xi_im) - np.sum([c.meanr for c in others], axis=0, out=self.meanr) - np.sum([c.meanlogr for c in others], axis=0, out=self.meanlogr) - np.sum([c.weight for c in others], axis=0, out=self.weight) - np.sum([c.npairs for c in others], axis=0, out=self.npairs) - self.xi = self.raw_xi - self.xi_im = self.raw_xi_im - self._raw_varxi = None - self._varxi = None - self._cov = None - - def process(self, cat1, cat2, *, metric=None, num_threads=None, comm=None, low_mem=False, - initialize=True, finalize=True, patch_method='global'): - """Compute the correlation function. - - Both arguments may be lists, in which case all items in the list are used - for that element of the correlation. - - Parameters: - cat1 (Catalog): A catalog or list of catalogs for the N field. - cat2 (Catalog): A catalog or list of catalogs for the G field. - metric (str): Which metric to use. See `Metrics` for details. - (default: 'Euclidean'; this value can also be given in the - constructor in the config dict.) - num_threads (int): How many OpenMP threads to use during the calculation. - (default: use the number of cpu cores; this value can also be given - in the constructor in the config dict.) - comm (mpi4py.Comm): If running MPI, an mpi4py Comm object to communicate between - processes. If used, the rank=0 process will have the final - computation. This only works if using patches. (default: None) - low_mem (bool): Whether to sacrifice a little speed to try to reduce memory usage. - This only works if using patches. (default: False) - initialize (bool): Whether to begin the calculation with a call to - `Corr2.clear`. (default: True) - finalize (bool): Whether to complete the calculation with a call to `finalize`. - (default: True) - patch_method (str): Which patch method to use. (default: 'global') - """ - import math - if initialize: - self.clear() - self._rg = None - self._processed_cats.clear() - - if patch_method not in ['local', 'global']: - raise ValueError("Invalid patch_method %s"%patch_method) - local = patch_method == 'local' - - if not isinstance(cat1,list): - cat1 = cat1.get_patches(low_mem=low_mem) - if not isinstance(cat2,list): - cat2 = cat2.get_patches(low_mem=low_mem) - - self._process_all_cross(cat1, cat2, metric, num_threads, comm, low_mem, local) - - self._processed_cats.extend(cat2) - if finalize: - varg = calculateVarG(self._processed_cats, low_mem=low_mem) - self.logger.info("varg = %f: sig_sn (per component) = %f",varg,math.sqrt(varg)) - self.finalize(varg) - self._processed_cats.clear() + super().finalize(varg) def calculateXi(self, *, rg=None): r"""Calculate the correlation function possibly given another correlation function @@ -402,50 +146,7 @@ def calculateXi(self, *, rg=None): - xi_im = array of the imaginary part of :math:`\xi(R)` - varxi = array of the variance estimates of the above values """ - if rg is not None: - self.xi = self.raw_xi - rg.xi - self.xi_im = self.raw_xi_im - rg.xi_im - self._rg = rg - - if rg.npatch1 not in (1,self.npatch1) or rg.npatch2 != self.npatch2: - raise RuntimeError("RG must be run with the same patches as DG") - - if len(self.results) > 0: - # If there are any rg patch pairs that aren't in results (e.g. due to different - # edge effects among the various pairs in consideration), then we need to add - # some dummy results to make sure all the right pairs are computed when we make - # the vectors for the covariance matrix. - template = next(iter(self.results.values())) # Just need something to copy. - for ij in rg.results: - if ij in self.results: continue - new_cij = template.copy() - new_cij.xi.ravel()[:] = 0 - new_cij.weight.ravel()[:] = 0 - self.results[ij] = new_cij - - self._cov = self.estimate_cov(self.var_method) - self._varxi = np.zeros_like(self.rnom, dtype=float) - self._varxi.ravel()[:] = self.cov_diag - else: - self._varxi = self.raw_varxi + rg.varxi - else: - self.xi = self.raw_xi - self.xi_im = self.raw_xi_im - self._varxi = self.raw_varxi - - return self.xi, self.xi_im, self.varxi - - def _calculate_xi_from_pairs(self, pairs): - self._sum([self.results[ij] for ij in pairs]) - self._finalize() - if self._rg is not None: - # If rg has npatch1 = 1, adjust pairs appropriately - if self._rg.npatch1 == 1 and not all([p[0] == 0 for p in pairs]): - pairs = [(0,ij[1]) for ij in pairs if ij[0] == ij[1]] - # Make sure all ij are in the rg results (some might be missing, which is ok) - pairs = [ij for ij in pairs if self._rg._ok[ij[0],ij[1]]] - self._rg._calculate_xi_from_pairs(pairs) - self.xi -= self._rg.xi + return super().calculateXi(rz=rg) def write(self, file_name, *, rg=None, file_type=None, precision=None, write_patch_results=False, write_cov=False): @@ -491,96 +192,7 @@ def write(self, file_name, *, rg=None, file_type=None, precision=None, (default: False) write_cov (bool): Whether to write the covariance matrix as well. (default: False) """ - self.logger.info('Writing NG correlations to %s',file_name) - self.calculateXi(rg=rg) - precision = self.config.get('precision', 4) if precision is None else precision - with make_writer(file_name, precision, file_type, self.logger) as writer: - self._write(writer, None, write_patch_results, write_cov=write_cov) - - @property - def _write_col_names(self): - return ['r_nom','meanr','meanlogr','gamT','gamX','sigma','weight','npairs'] - - @property - def _write_data(self): - data = [ self.rnom, self.meanr, self.meanlogr, - self.xi, self.xi_im, np.sqrt(self.varxi), self.weight, self.npairs ] - data = [ col.flatten() for col in data ] - return data - - @property - def _write_params(self): - params = make_minimal_config(self.config, Corr2._valid_params) - # Add in a couple other things we want to preserve that aren't construction kwargs. - params['coords'] = self.coords - params['metric'] = self.metric - return params - - @classmethod - def from_file(cls, file_name, *, file_type=None, logger=None, rng=None): - """Create an NGCorrelation instance from an output file. - - This should be a file that was written by TreeCorr. - - Parameters: - file_name (str): The name of the file to read in. - file_type (str): The type of file ('ASCII', 'FITS', or 'HDF'). (default: determine - the type automatically from the extension of file_name.) - logger (Logger): If desired, a logger object to use for logging. (default: None) - rng (RandomState): If desired, a numpy.random.RandomState instance to use for bootstrap - random number generation. (default: None) - - Returns: - corr: An NGCorrelation object, constructed from the information in the file. - """ - if logger: - logger.info('Building NGCorrelation from %s',file_name) - with make_reader(file_name, file_type, logger) as reader: - name = 'main' if 'main' in reader else None - params = reader.read_params(ext=name) - kwargs = make_minimal_config(params, Corr2._valid_params) - corr = cls(**kwargs, logger=logger, rng=rng) - corr.logger.info('Reading NG correlations from %s',file_name) - corr._read(reader, name=name, params=params) - return corr - - def read(self, file_name, *, file_type=None): - """Read in values from a file. - - This should be a file that was written by TreeCorr, preferably a FITS or HDF5 file, so - there is no loss of information. - - .. warning:: - - The `NGCorrelation` object should be constructed with the same configuration - parameters as the one being read. e.g. the same min_sep, max_sep, etc. This is not - checked by the read function. - - Parameters: - file_name (str): The name of the file to read in. - file_type (str): The type of file ('ASCII' or 'FITS'). (default: determine the type - automatically from the extension of file_name.) - """ - self.logger.info('Reading NG correlations from %s',file_name) - with make_reader(file_name, file_type, self.logger) as reader: - self._read(reader) - - def _read_from_data(self, data, params): - s = self.logr.shape - self.meanr = data['meanr'].reshape(s) - self.meanlogr = data['meanlogr'].reshape(s) - self.xi = data['gamT'].reshape(s) - self.xi_im = data['gamX'].reshape(s) - self._varxi = data['sigma'].reshape(s)**2 - self.weight = data['weight'].reshape(s) - self.npairs = data['npairs'].reshape(s) - self.coords = params['coords'].strip() - self.metric = params['metric'].strip() - self.raw_xi = self.xi - self.raw_xi_im = self.xi_im - self._raw_varxi = self._varxi - self.npatch1 = params.get('npatch1', 1) - self.npatch2 = params.get('npatch2', 1) + super().write(file_name, rg, file_type, precision, write_patch_results, write_cov) def calculateNMap(self, *, R=None, rg=None, m2_uform=None): r"""Calculate the aperture mass statistics from the correlation function. diff --git a/treecorr/nkcorrelation.py b/treecorr/nkcorrelation.py index 174521dc..81a1b449 100644 --- a/treecorr/nkcorrelation.py +++ b/treecorr/nkcorrelation.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following @@ -20,7 +20,7 @@ from . import _treecorr from .catalog import calculateVarK from .corr2base import Corr2 -from .util import make_writer, make_reader +from .util import make_writer from .config import make_minimal_config @@ -77,7 +77,7 @@ class NKCorrelation(Corr2): .. note:: - If you separate out the steps of the `process` command and use `process_cross`, + If you separate out the steps of the `Corr2.process` command and use `Corr2.process_cross`, then the units will not be applied to ``meanr`` or ``meanlogr`` until the `finalize` function is called. @@ -99,151 +99,48 @@ class NKCorrelation(Corr2): **kwargs: See the documentation for `Corr2` for the list of allowed keyword arguments, which may be passed either directly or in the config dict. """ + _cls = 'NKCorrelation' + _letter1 = 'N' + _letter2 = 'K' + _letters = 'NK' + _builder = _treecorr.NKCorr + _calculateVar1 = lambda *args, **kwargs: None + _calculateVar2 = staticmethod(calculateVarK) + _sig1 = None + _sig2 = 'sig_k' # The angles are not important for accuracy of NK correlations. _default_angle_slop = 1 def __init__(self, config=None, *, logger=None, **kwargs): """Initialize `NKCorrelation`. See class doc for details. """ - Corr2.__init__(self, config, logger=logger, **kwargs) - - self.xi = np.zeros_like(self.rnom, dtype=float) - self.meanr = np.zeros_like(self.rnom, dtype=float) - self.meanlogr = np.zeros_like(self.rnom, dtype=float) - self.weight = np.zeros_like(self.rnom, dtype=float) - self.npairs = np.zeros_like(self.rnom, dtype=float) - self.raw_xi = self.xi + super().__init__(config, logger=logger, **kwargs) + + self._xi1 = np.zeros_like(self.rnom, dtype=float) + self._xi2 = self._xi3 = self._xi4 = np.array([]) + self.xi = self.raw_xi self._rk = None + self._varxi = None self._raw_varxi = None - self._cov = None - self._var_num = 0 - self._processed_cats = [] self.logger.debug('Finished building NKCorr') @property - def corr(self): - if self._corr is None: - x = np.array([]) - self._corr = _treecorr.NKCorr(self._bintype, self._min_sep, self._max_sep, self._nbins, - self._bin_size, self.b, self.angle_slop, - self.min_rpar, self.max_rpar, - self.xperiod, self.yperiod, self.zperiod, - self.raw_xi, x, x, x, - self.meanr, self.meanlogr, self.weight, self.npairs) - return self._corr - - def __eq__(self, other): - """Return whether two `NKCorrelation` instances are equal""" - return (isinstance(other, NKCorrelation) and - self.nbins == other.nbins and - self.bin_size == other.bin_size and - self.min_sep == other.min_sep and - self.max_sep == other.max_sep and - self.sep_units == other.sep_units and - self.coords == other.coords and - self.bin_type == other.bin_type and - self.bin_slop == other.bin_slop and - self.angle_slop == other.angle_slop and - self.min_rpar == other.min_rpar and - self.max_rpar == other.max_rpar and - self.xperiod == other.xperiod and - self.yperiod == other.yperiod and - self.zperiod == other.zperiod and - np.array_equal(self.meanr, other.meanr) and - np.array_equal(self.meanlogr, other.meanlogr) and - np.array_equal(self.xi, other.xi) and - np.array_equal(self.varxi, other.varxi) and - np.array_equal(self.weight, other.weight) and - np.array_equal(self.npairs, other.npairs)) + def raw_xi(self): + return self._xi1 def copy(self): """Make a copy""" - ret = NKCorrelation.__new__(NKCorrelation) - for key, item in self.__dict__.items(): - if isinstance(item, np.ndarray): - # Only items that might change need to by deep copied. - ret.__dict__[key] = item.copy() - else: - # For everything else, shallow copy is fine. - # In particular don't deep copy config or logger - # Most of the rest are scalars, which copy fine this way. - # And the read-only things are all in _ro. - # The results dict is trickier. We rely on it being copied in places, but we - # never add more to it after the copy, so shallow copy is fine. - ret.__dict__[key] = item - ret._corr = None # We'll want to make a new one of these if we need it. + ret = super().copy() if self.xi is self.raw_xi: - ret.raw_xi = ret.xi - else: - ret.raw_xi = self.raw_xi.copy() + ret.xi = ret.raw_xi if self._rk is not None: ret._rk = self._rk.copy() return ret - def __repr__(self): - return f'NKCorrelation({self._repr_kwargs})' - - def process_cross(self, cat1, cat2, *, metric=None, num_threads=None): - """Process a single pair of catalogs, accumulating the cross-correlation. - - This accumulates the weighted sums into the bins, but does not finalize - the calculation by dividing by the total weight at the end. After - calling this function as often as desired, the `finalize` command will - finish the calculation. - - Parameters: - cat1 (Catalog): The first catalog to process - cat2 (Catalog): The second catalog to process - metric (str): Which metric to use. See `Metrics` for details. - (default: 'Euclidean'; this value can also be given in the - constructor in the config dict.) - num_threads (int): How many OpenMP threads to use during the calculation. - (default: use the number of cpu cores; this value can also be given - in the constructor in the config dict.) - """ - if cat1.name == '' and cat2.name == '': - self.logger.info('Starting process NK cross-correlations') - else: - self.logger.info('Starting process NK cross-correlations for cats %s, %s.', - cat1.name, cat2.name) - - self._set_metric(metric, cat1.coords, cat2.coords) - self._set_num_threads(num_threads) - min_size, max_size = self._get_minmax_size() - - f1 = cat1.getNField(min_size=min_size, max_size=max_size, - split_method=self.split_method, - brute=self.brute is True or self.brute == 1, - min_top=self.min_top, max_top=self.max_top, - coords=self.coords) - f2 = cat2.getKField(min_size=min_size, max_size=max_size, - split_method=self.split_method, - brute=self.brute is True or self.brute == 2, - min_top=self.min_top, max_top=self.max_top, - coords=self.coords) - - self.logger.info('Starting %d jobs.',f1.nTopLevelNodes) - self.corr.processCross(f1.data, f2.data, self.output_dots, self._metric) - - def _finalize(self): - mask1 = self.weight != 0 - mask2 = self.weight == 0 - - self.raw_xi[mask1] /= self.weight[mask1] - self.meanr[mask1] /= self.weight[mask1] - self.meanlogr[mask1] /= self.weight[mask1] - - # Update the units of meanr, meanlogr - self._apply_units(mask1) - - # Use meanr, meanlogr when available, but set to nominal when no pairs in bin. - self.meanr[mask2] = self.rnom[mask2] - self.meanlogr[mask2] = self.logr[mask2] - def finalize(self, vark): """Finalize the calculation of the correlation function. - The `process_cross` command accumulates values in each bin, so it can be called + The `Corr2.process_cross` command accumulates values in each bin, so it can be called multiple times if appropriate. Afterwards, this command finishes the calculation by dividing each column by the total weight. @@ -252,6 +149,7 @@ def finalize(self, vark): """ self._finalize() self._var_num = vark + self.xi = self.raw_xi @property def raw_varxi(self): @@ -270,38 +168,11 @@ def varxi(self): def _clear(self): """Clear the data vectors """ - self.raw_xi.ravel()[:] = 0 - self.meanr.ravel()[:] = 0 - self.meanlogr.ravel()[:] = 0 - self.weight.ravel()[:] = 0 - self.npairs.ravel()[:] = 0 + super()._clear() self.xi = self.raw_xi + self._rk = None self._raw_varxi = None self._varxi = None - self._cov = None - - def __iadd__(self, other): - """Add a second `NKCorrelation`'s data to this one. - - .. note:: - - For this to make sense, both `NKCorrelation` objects should not have had `finalize` - called yet. Then, after adding them together, you should call `finalize` on the sum. - """ - if not isinstance(other, NKCorrelation): - raise TypeError("Can only add another NKCorrelation object") - if not (self._nbins == other._nbins and - self.min_sep == other.min_sep and - self.max_sep == other.max_sep): - raise ValueError("NKCorrelation to be added is not compatible with this one.") - - self._set_metric(other.metric, other.coords, other.coords) - self.raw_xi.ravel()[:] += other.raw_xi.ravel()[:] - self.meanr.ravel()[:] += other.meanr.ravel()[:] - self.meanlogr.ravel()[:] += other.meanlogr.ravel()[:] - self.weight.ravel()[:] += other.weight.ravel()[:] - self.npairs.ravel()[:] += other.npairs.ravel()[:] - return self def _sum(self, others): # Equivalent to the operation of: @@ -309,7 +180,7 @@ def _sum(self, others): # for other in others: # self += other # but no sanity checks and use numpy.sum for faster calculation. - np.sum([c.raw_xi for c in others], axis=0, out=self.raw_xi) + np.sum([c._xi1 for c in others], axis=0, out=self._xi1) np.sum([c.meanr for c in others], axis=0, out=self.meanr) np.sum([c.meanlogr for c in others], axis=0, out=self.meanlogr) np.sum([c.weight for c in others], axis=0, out=self.weight) @@ -319,57 +190,6 @@ def _sum(self, others): self._varxi = None self._cov = None - def process(self, cat1, cat2, *, metric=None, num_threads=None, comm=None, low_mem=False, - initialize=True, finalize=True, patch_method='global'): - """Compute the correlation function. - - Both arguments may be lists, in which case all items in the list are used - for that element of the correlation. - - Parameters: - cat1 (Catalog): A catalog or list of catalogs for the N field. - cat2 (Catalog): A catalog or list of catalogs for the K field. - metric (str): Which metric to use. See `Metrics` for details. - (default: 'Euclidean'; this value can also be given in the - constructor in the config dict.) - num_threads (int): How many OpenMP threads to use during the calculation. - (default: use the number of cpu cores; this value can also be given - in the constructor in the config dict.) - comm (mpi4py.Comm): If running MPI, an mpi4py Comm object to communicate between - processes. If used, the rank=0 process will have the final - computation. This only works if using patches. (default: None) - low_mem (bool): Whether to sacrifice a little speed to try to reduce memory usage. - This only works if using patches. (default: False) - initialize (bool): Whether to begin the calculation with a call to - `Corr2.clear`. (default: True) - finalize (bool): Whether to complete the calculation with a call to `finalize`. - (default: True) - patch_method (str): Which patch method to use. (default: 'global') - """ - import math - if initialize: - self.clear() - self._rk = None - self._processed_cats.clear() - - if patch_method not in ['local', 'global']: - raise ValueError("Invalid patch_method %s"%patch_method) - local = patch_method == 'local' - - if not isinstance(cat1,list): - cat1 = cat1.get_patches(low_mem=low_mem) - if not isinstance(cat2,list): - cat2 = cat2.get_patches(low_mem=low_mem) - - self._process_all_cross(cat1, cat2, metric, num_threads, comm, low_mem, local) - - self._processed_cats.extend(cat2) - if finalize: - vark = calculateVarK(self._processed_cats, low_mem=low_mem) - self.logger.info("vark = %f: sig_k = %f",vark,math.sqrt(vark)) - self.finalize(vark) - self._processed_cats.clear() - def calculateXi(self, *, rk=None): r"""Calculate the correlation function possibly given another correlation function that uses random points for the foreground objects. @@ -497,74 +317,11 @@ def _write_data(self): data = [ col.flatten() for col in data ] return data - @property - def _write_params(self): - params = make_minimal_config(self.config, Corr2._valid_params) - # Add in a couple other things we want to preserve that aren't construction kwargs. - params['coords'] = self.coords - params['metric'] = self.metric - return params - - @classmethod - def from_file(cls, file_name, *, file_type=None, logger=None, rng=None): - """Create an NKCorrelation instance from an output file. - - This should be a file that was written by TreeCorr. - - Parameters: - file_name (str): The name of the file to read in. - file_type (str): The type of file ('ASCII', 'FITS', or 'HDF'). (default: determine - the type automatically from the extension of file_name.) - logger (Logger): If desired, a logger object to use for logging. (default: None) - rng (RandomState): If desired, a numpy.random.RandomState instance to use for bootstrap - random number generation. (default: None) - - Returns: - corr: An NKCorrelation object, constructed from the information in the file. - """ - if logger: - logger.info('Building NKCorrelation from %s',file_name) - with make_reader(file_name, file_type, logger) as reader: - name = 'main' if 'main' in reader else None - params = reader.read_params(ext=name) - kwargs = make_minimal_config(params, Corr2._valid_params) - corr = cls(**kwargs, logger=logger, rng=rng) - corr.logger.info('Reading NK correlations from %s',file_name) - corr._read(reader, name=name, params=params) - return corr - - def read(self, file_name, *, file_type=None): - """Read in values from a file. - - This should be a file that was written by TreeCorr, preferably a FITS or HDF5 file, so - there is no loss of information. - - .. warning:: - - The `NKCorrelation` object should be constructed with the same configuration - parameters as the one being read. e.g. the same min_sep, max_sep, etc. This is not - checked by the read function. - - Parameters: - file_name (str): The name of the file to read in. - file_type (str): The type of file ('ASCII' or 'FITS'). (default: determine the type - automatically from the extension of file_name.) - """ - self.logger.info('Reading NK correlations from %s',file_name) - with make_reader(file_name, file_type, self.logger) as reader: - self._read(reader) - def _read_from_data(self, data, params): + super()._read_from_data(data, params) s = self.logr.shape - self.meanr = data['meanr'].reshape(s) - self.meanlogr = data['meanlogr'].reshape(s) - self.xi = data['kappa'].reshape(s) - self._varxi = data['sigma'].reshape(s)**2 self.weight = data['weight'].reshape(s) - self.npairs = data['npairs'].reshape(s) - self.coords = params['coords'].strip() - self.metric = params['metric'].strip() - self.raw_xi = self.xi + self._xi1 = data['kappa'].reshape(s) + self._varxi = data['sigma'].reshape(s)**2 + self.xi = self.raw_xi self._raw_varxi = self._varxi - self.npatch1 = params.get('npatch1', 1) - self.npatch2 = params.get('npatch2', 1) diff --git a/treecorr/nncorrelation.py b/treecorr/nncorrelation.py index 7ef07dad..7372122b 100644 --- a/treecorr/nncorrelation.py +++ b/treecorr/nncorrelation.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following @@ -63,9 +63,9 @@ class NNCorrelation(Corr2): .. note:: - If you separate out the steps of the `process` command and use `process_auto` and/or - `process_cross`, then the units will not be applied to ``meanr`` or ``meanlogr`` until - the `finalize` function is called. + If you separate out the steps of the `Corr2.process` command and use `process_auto` + and/or `Corr2.process_cross`, then the units will not be applied to ``meanr`` or + ``meanlogr`` until the `finalize` function is called. The typical usage pattern is as follows: @@ -89,19 +89,24 @@ class NNCorrelation(Corr2): **kwargs: See the documentation for `Corr2` for the list of allowed keyword arguments, which may be passed either directly or in the config dict. """ + _cls = 'NNCorrelation' + _letter1 = 'N' + _letter2 = 'N' + _letters = 'NN' + _builder = _treecorr.NNCorr + _calculateVar1 = lambda *args, **kwargs: None + _calculateVar2 = lambda *args, **kwargs: None + _sigk1 = None + _sigk2 = None # The angles are not important for accuracy of NN correlations. _default_angle_slop = 1 def __init__(self, config=None, *, logger=None, **kwargs): """Initialize `NNCorrelation`. See class doc for details. """ - Corr2.__init__(self, config, logger=logger, **kwargs) - - self.meanr = np.zeros_like(self.rnom, dtype=float) - self.meanlogr = np.zeros_like(self.rnom, dtype=float) - self.weight = np.zeros_like(self.rnom, dtype=float) - self.npairs = np.zeros_like(self.rnom, dtype=float) + super().__init__(config, logger=logger, **kwargs) self.tot = 0. + self._xi1 = self._xi2 = self._xi3 = self._xi4 = np.array([]) self._rr_weight = None # Marker that calculateXi hasn't been called yet. self._rr = None self._dr = None @@ -110,61 +115,11 @@ def __init__(self, config=None, *, logger=None, **kwargs): self._write_dr = None self._write_rd = None self._write_patch_results = False - self._cov = None - self._var_num = 0 self.logger.debug('Finished building NNCorr') - @property - def corr(self): - if self._corr is None: - x = np.array([]) - self._corr = _treecorr.NNCorr(self._bintype, self._min_sep, self._max_sep, self._nbins, - self._bin_size, self.b, self.angle_slop, - self.min_rpar, self.max_rpar, - self.xperiod, self.yperiod, self.zperiod, - x, x, x, x, - self.meanr, self.meanlogr, self.weight, self.npairs) - return self._corr - - def __eq__(self, other): - """Return whether two `NNCorrelation` instances are equal""" - return (isinstance(other, NNCorrelation) and - self.nbins == other.nbins and - self.bin_size == other.bin_size and - self.min_sep == other.min_sep and - self.max_sep == other.max_sep and - self.sep_units == other.sep_units and - self.coords == other.coords and - self.bin_type == other.bin_type and - self.bin_slop == other.bin_slop and - self.angle_slop == other.angle_slop and - self.min_rpar == other.min_rpar and - self.max_rpar == other.max_rpar and - self.xperiod == other.xperiod and - self.yperiod == other.yperiod and - self.zperiod == other.zperiod and - self.tot == other.tot and - np.array_equal(self.meanr, other.meanr) and - np.array_equal(self.meanlogr, other.meanlogr) and - np.array_equal(self.weight, other.weight) and - np.array_equal(self.npairs, other.npairs)) - def copy(self): """Make a copy""" - ret = NNCorrelation.__new__(NNCorrelation) - for key, item in self.__dict__.items(): - if isinstance(item, np.ndarray): - # Only items that might change need to by deep copied. - ret.__dict__[key] = item.copy() - else: - # For everything else, shallow copy is fine. - # In particular don't deep copy config or logger - # Most of the rest are scalars, which copy fine this way. - # And the read-only things are all in _ro. - # The results dict is trickier. We rely on it being copied in places, but we - # never add more to it after the copy, so shallow copy is fine. - ret.__dict__[key] = item - ret._corr = None # We'll want to make a new one of these if we need it. + ret = super().copy() # True is possible during read before we finish reading in these attributes. if self._rr is not None and self._rr is not True: ret._rr = self._rr.copy() @@ -172,8 +127,6 @@ def copy(self): ret._dr = self._dr.copy() if self._rd is not None and self._rd is not True: ret._rd = self._rd.copy() - if self._cov is not None: - ret._cov = self._cov.copy() return ret @lazy_property @@ -205,9 +158,6 @@ def _zero_copy(self, tot): setattr(ret, '_nonzero', False) return ret - def __repr__(self): - return f'NNCorrelation({self._repr_kwargs})' - def process_auto(self, cat, *, metric=None, num_threads=None): """Process a single catalog, accumulating the auto-correlation. @@ -224,22 +174,7 @@ def process_auto(self, cat, *, metric=None, num_threads=None): (default: use the number of cpu cores; this value can also be given in the constructor in the config dict.) """ - if cat.name == '': - self.logger.info('Starting process NN auto-correlations') - else: - self.logger.info('Starting process NN auto-correlations for cat %s.', cat.name) - - self._set_metric(metric, cat.coords) - self._set_num_threads(num_threads) - min_size, max_size = self._get_minmax_size() - - field = cat.getNField(min_size=min_size, max_size=max_size, - split_method=self.split_method, brute=bool(self.brute), - min_top=self.min_top, max_top=self.max_top, - coords=self.coords) - - self.logger.info('Starting %d jobs.',field.nTopLevelNodes) - self.corr.processAuto(field.data, self.output_dots, self._metric) + super()._process_auto(cat, metric, num_threads) self.tot += 0.5 * cat.sumw**2 def process_cross(self, cat1, cat2, *, metric=None, num_threads=None): @@ -259,49 +194,13 @@ def process_cross(self, cat1, cat2, *, metric=None, num_threads=None): (default: use the number of cpu cores; this value can also be given in the constructor in the config dict.) """ - if cat1.name == '' and cat2.name == '': - self.logger.info('Starting process NN cross-correlations') - else: - self.logger.info('Starting process NN cross-correlations for cats %s, %s.', - cat1.name, cat2.name) - - self._set_metric(metric, cat1.coords, cat2.coords) - self._set_num_threads(num_threads) - min_size, max_size = self._get_minmax_size() - - f1 = cat1.getNField(min_size=min_size, max_size=max_size, - split_method=self.split_method, - brute=self.brute is True or self.brute == 1, - min_top=self.min_top, max_top=self.max_top, - coords=self.coords) - f2 = cat2.getNField(min_size=min_size, max_size=max_size, - split_method=self.split_method, - brute=self.brute is True or self.brute == 2, - min_top=self.min_top, max_top=self.max_top, - coords=self.coords) - - self.logger.info('Starting %d jobs.',f1.nTopLevelNodes) - self.corr.processCross(f1.data, f2.data, self.output_dots, self._metric) - self.tot += cat1.sumw*cat2.sumw - - def _finalize(self): - mask1 = self.weight != 0 - mask2 = self.weight == 0 - - self.meanr[mask1] /= self.weight[mask1] - self.meanlogr[mask1] /= self.weight[mask1] - - # Update the units of meanr, meanlogr - self._apply_units(mask1) - - # Use meanr, meanlogr when available, but set to nominal when no pairs in bin. - self.meanr[mask2] = self.rnom[mask2] - self.meanlogr[mask2] = self.logr[mask2] + super().process_cross(cat1, cat2, metric=metric, num_threads=num_threads) + self.tot += cat1.sumw * cat2.sumw def finalize(self): """Finalize the calculation of the correlation function. - The `process_auto` and `process_cross` commands accumulate values in each bin, + The `process_auto` and `Corr2.process_cross` commands accumulate values in each bin, so they can be called multiple times if appropriate. Afterwards, this command finishes the calculation of meanr, meanlogr by dividing by the total weight. """ @@ -315,32 +214,18 @@ def _nonzero(self): def _clear(self): """Clear the data vectors """ - self.meanr.ravel()[:] = 0. - self.meanlogr.ravel()[:] = 0. - self.weight.ravel()[:] = 0. - self.npairs.ravel()[:] = 0. + super()._clear() self.tot = 0. def __iadd__(self, other): - """Add a second `NNCorrelation`'s data to this one. + """Add a second Correlation object's data to this one. .. note:: - For this to make sense, both `NNCorrelation` objects should not have had `finalize` - called yet. Then, after adding them together, you should call `finalize` on the sum. + For this to make sense, both objects should not have had `finalize` called yet. + Then, after adding them together, you should call `finalize` on the sum. """ - if not isinstance(other, NNCorrelation): - raise TypeError("Can only add another NNCorrelation object") - if not (self._nbins == other._nbins and - self.min_sep == other.min_sep and - self.max_sep == other.max_sep): - raise ValueError("NNCorrelation to be added is not compatible with this one.") - - self._set_metric(other.metric, other.coords, other.coords) - self.meanr.ravel()[:] += other.meanr.ravel()[:] - self.meanlogr.ravel()[:] += other.meanlogr.ravel()[:] - self.weight.ravel()[:] += other.weight.ravel()[:] - self.npairs.ravel()[:] += other.npairs.ravel()[:] + super().__iadd__(other) self.tot += other.tot return self @@ -374,57 +259,6 @@ def _add_tot(self, ij, c1, c2): # to save some time. self.results[ij] = self._zero_copy(tot) - def process(self, cat1, cat2=None, *, metric=None, num_threads=None, comm=None, low_mem=False, - initialize=True, finalize=True, patch_method='global'): - """Compute the correlation function. - - - If only 1 argument is given, then compute an auto-correlation function. - - If 2 arguments are given, then compute a cross-correlation function. - - Both arguments may be lists, in which case all items in the list are used - for that element of the correlation. - - Parameters: - cat1 (Catalog): A catalog or list of catalogs for the first N field. - cat2 (Catalog): A catalog or list of catalogs for the second N field, if any. - (default: None) - metric (str): Which metric to use. See `Metrics` for details. - (default: 'Euclidean'; this value can also be given in the - constructor in the config dict.) - num_threads (int): How many OpenMP threads to use during the calculation. - (default: use the number of cpu cores; this value can also be given - in the constructor in the config dict.) - comm (mpi4py.Comm): If running MPI, an mpi4py Comm object to communicate between - processes. If used, the rank=0 process will have the final - computation. This only works if using patches. (default: None) - low_mem (bool): Whether to sacrifice a little speed to try to reduce memory usage. - This only works if using patches. (default: False) - initialize (bool): Whether to begin the calculation with a call to - `Corr2.clear`. (default: True) - finalize (bool): Whether to complete the calculation with a call to `finalize`. - (default: True) - patch_method (str): Which patch method to use. (default: 'global') - """ - if initialize: - self.clear() - - if patch_method not in ['local', 'global']: - raise ValueError("Invalid patch_method %s"%patch_method) - local = patch_method == 'local' - - if not isinstance(cat1,list): - cat1 = cat1.get_patches(low_mem=low_mem) - if cat2 is not None and not isinstance(cat2,list): - cat2 = cat2.get_patches(low_mem=low_mem) - - if cat2 is None or len(cat2) == 0: - self._process_all_auto(cat1, metric, num_threads, comm, low_mem, local) - else: - self._process_all_cross(cat1, cat2, metric, num_threads, comm, low_mem, local) - - if finalize: - self.finalize() - def _mean_weight(self): mean_np = np.mean(self.npairs) return 1 if mean_np == 0 else np.mean(self.weight)/mean_np @@ -793,11 +627,8 @@ def _write_data(self): @property def _write_params(self): - params = make_minimal_config(self.config, Corr2._valid_params) - # Add in a couple other things we want to preserve that aren't construction kwargs. + params = super()._write_params params['tot'] = self.tot - params['coords'] = self.coords - params['metric'] = self.metric if self._write_patch_results: params['_rr'] = bool(self._rr) params['_dr'] = bool(self._dr) @@ -869,19 +700,13 @@ def _do_read(self, reader, name=None, params=None): self._rd = rd def _read_from_data(self, data, params): + super()._read_from_data(data, params) s = self.logr.shape - self.meanr = data['meanr'].reshape(s) - self.meanlogr = data['meanlogr'].reshape(s) self.weight = data['DD'].reshape(s) - self.npairs = data['npairs'].reshape(s) self.tot = params['tot'] - self.coords = params['coords'].strip() - self.metric = params['metric'].strip() if 'xi' in data.dtype.names: self.xi = data['xi'].reshape(s) self.varxi = data['sigma_xi'].reshape(s)**2 - self.npatch1 = params.get('npatch1', 1) - self.npatch2 = params.get('npatch2', 1) # Note: "or None" turns False -> None self._rr = params.get('_rr', None) or None self._dr = params.get('_dr', None) or None diff --git a/treecorr/nnncorrelation.py b/treecorr/nnncorrelation.py index 12086fa6..c34b5689 100644 --- a/treecorr/nnncorrelation.py +++ b/treecorr/nnncorrelation.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following diff --git a/treecorr/nqcorrelation.py b/treecorr/nqcorrelation.py index ffb43c5e..100b3801 100644 --- a/treecorr/nqcorrelation.py +++ b/treecorr/nqcorrelation.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following @@ -19,12 +19,12 @@ from . import _treecorr from .catalog import calculateVarQ -from .corr2base import Corr2 +from .nzcorrelation import BaseNZCorrelation from .util import make_writer, make_reader from .config import make_minimal_config -class NQCorrelation(Corr2): +class NQCorrelation(BaseNZCorrelation): r"""This class handles the calculation and storage of a 2-point count-spin-4 correlation function. @@ -71,7 +71,7 @@ class NQCorrelation(Corr2): .. note:: - If you separate out the steps of the `process` command and use `process_cross`, + If you separate out the steps of the `Corr2.process` command and use `Corr2.process_cross`, then the units will not be applied to ``meanr`` or ``meanlogr`` until the `finalize` function is called. @@ -93,288 +93,32 @@ class NQCorrelation(Corr2): **kwargs: See the documentation for `Corr2` for the list of allowed keyword arguments, which may be passed either directly or in the config dict. """ + _cls = 'NQCorrelation' + _letter1 = 'N' + _letter2 = 'Q' + _letters = 'NQ' + _builder = _treecorr.NQCorr + _calculateVar1 = lambda *args, **kwargs: None + _calculateVar2 = staticmethod(calculateVarQ) + _zreal = 'qR' + _zimag = 'qR_im' + def __init__(self, config=None, *, logger=None, **kwargs): """Initialize `NQCorrelation`. See class doc for details. """ - Corr2.__init__(self, config, logger=logger, **kwargs) - - self.xi = np.zeros_like(self.rnom, dtype=float) - self.xi_im = np.zeros_like(self.rnom, dtype=float) - self.meanr = np.zeros_like(self.rnom, dtype=float) - self.meanlogr = np.zeros_like(self.rnom, dtype=float) - self.weight = np.zeros_like(self.rnom, dtype=float) - self.npairs = np.zeros_like(self.rnom, dtype=float) - self.raw_xi = self.xi - self.raw_xi_im = self.xi_im - self._rq = None - self._raw_varxi = None - self._varxi = None - self._cov = None - self._var_num = 0 - self._processed_cats = [] - self.logger.debug('Finished building NQCorr') - - @property - def corr(self): - if self._corr is None: - x = np.array([]) - self._corr = _treecorr.NQCorr(self._bintype, self._min_sep, self._max_sep, self._nbins, - self._bin_size, self.b, self.angle_slop, - self.min_rpar, self.max_rpar, - self.xperiod, self.yperiod, self.zperiod, - self.raw_xi, self.raw_xi_im, x, x, - self.meanr, self.meanlogr, self.weight, self.npairs) - return self._corr - - def __eq__(self, other): - """Return whether two `NQCorrelation` instances are equal""" - return (isinstance(other, NQCorrelation) and - self.nbins == other.nbins and - self.bin_size == other.bin_size and - self.min_sep == other.min_sep and - self.max_sep == other.max_sep and - self.sep_units == other.sep_units and - self.coords == other.coords and - self.bin_type == other.bin_type and - self.bin_slop == other.bin_slop and - self.angle_slop == other.angle_slop and - self.min_rpar == other.min_rpar and - self.max_rpar == other.max_rpar and - self.xperiod == other.xperiod and - self.yperiod == other.yperiod and - self.zperiod == other.zperiod and - np.array_equal(self.meanr, other.meanr) and - np.array_equal(self.meanlogr, other.meanlogr) and - np.array_equal(self.xi, other.xi) and - np.array_equal(self.xi_im, other.xi_im) and - np.array_equal(self.varxi, other.varxi) and - np.array_equal(self.weight, other.weight) and - np.array_equal(self.npairs, other.npairs)) - - def copy(self): - """Make a copy""" - ret = NQCorrelation.__new__(NQCorrelation) - for key, item in self.__dict__.items(): - if isinstance(item, np.ndarray): - # Only items that might change need to by deep copied. - ret.__dict__[key] = item.copy() - else: - # For everything else, shallow copy is fine. - # In particular don't deep copy config or logger - # Most of the rest are scalars, which copy fine this way. - # And the read-only things are all in _ro. - # The results dict is trickier. We rely on it being copied in places, but we - # never add more to it after the copy, so shallow copy is fine. - ret.__dict__[key] = item - ret._corr = None # We'll want to make a new one of these if we need it. - if self.xi is self.raw_xi: - ret.raw_xi = ret.xi - ret.raw_xi_im = ret.xi_im - else: - ret.raw_xi = self.raw_xi.copy() - ret.raw_xi_im = self.raw_xi_im.copy() - if self._rq is not None: - ret._rq = self._rq.copy() - return ret - - def __repr__(self): - return f'NQCorrelation({self._repr_kwargs})' - - def process_cross(self, cat1, cat2, *, metric=None, num_threads=None): - """Process a single pair of catalogs, accumulating the cross-correlation. - - This accumulates the weighted sums into the bins, but does not finalize - the calculation by dividing by the total weight at the end. After - calling this function as often as desired, the `finalize` command will - finish the calculation. - - Parameters: - cat1 (Catalog): The first catalog to process - cat2 (Catalog): The second catalog to process - metric (str): Which metric to use. See `Metrics` for details. - (default: 'Euclidean'; this value can also be given in the - constructor in the config dict.) - num_threads (int): How many OpenMP threads to use during the calculation. - (default: use the number of cpu cores; this value can also be given - in the constructor in the config dict.) - """ - if cat1.name == '' and cat2.name == '': - self.logger.info('Starting process NQ cross-correlations') - else: - self.logger.info('Starting process NQ cross-correlations for cats %s, %s.', - cat1.name, cat2.name) - - self._set_metric(metric, cat1.coords, cat2.coords) - self._set_num_threads(num_threads) - min_size, max_size = self._get_minmax_size() - - f1 = cat1.getNField(min_size=min_size, max_size=max_size, - split_method=self.split_method, - brute=self.brute is True or self.brute == 1, - min_top=self.min_top, max_top=self.max_top, - coords=self.coords) - f2 = cat2.getQField(min_size=min_size, max_size=max_size, - split_method=self.split_method, - brute=self.brute is True or self.brute == 2, - min_top=self.min_top, max_top=self.max_top, - coords=self.coords) - - self.logger.info('Starting %d jobs.',f1.nTopLevelNodes) - self.corr.processCross(f1.data, f2.data, self.output_dots, self._metric) - - def _finalize(self): - mask1 = self.weight != 0 - mask2 = self.weight == 0 - - self.raw_xi[mask1] /= self.weight[mask1] - self.raw_xi_im[mask1] /= self.weight[mask1] - self.meanr[mask1] /= self.weight[mask1] - self.meanlogr[mask1] /= self.weight[mask1] - - # Update the units of meanr, meanlogr - self._apply_units(mask1) - - # Use meanr, meanlogr when available, but set to nominal when no pairs in bin. - self.meanr[mask2] = self.rnom[mask2] - self.meanlogr[mask2] = self.logr[mask2] + super().__init__(config, logger=logger, **kwargs) def finalize(self, varq): """Finalize the calculation of the correlation function. - The `process_cross` command accumulates values in each bin, so it can be called + The `Corr2.process_cross` command accumulates values in each bin, so it can be called multiple times if appropriate. Afterwards, this command finishes the calculation by dividing each column by the total weight. Parameters: varq (float): The variance per component of the spin-4 field. """ - self._finalize() - self._var_num = varq - - self.xi = self.raw_xi - self.xi_im = self.raw_xi_im - - @property - def raw_varxi(self): - if self._raw_varxi is None: - self._raw_varxi = np.zeros_like(self.rnom, dtype=float) - if self._var_num != 0: - self._raw_varxi.ravel()[:] = self.cov_diag - return self._raw_varxi - - @property - def varxi(self): - if self._varxi is None: - self._varxi = self.raw_varxi - return self._varxi - - def _clear(self): - """Clear the data vectors - """ - self.raw_xi.ravel()[:] = 0 - self.raw_xi_im.ravel()[:] = 0 - self.meanr.ravel()[:] = 0 - self.meanlogr.ravel()[:] = 0 - self.weight.ravel()[:] = 0 - self.npairs.ravel()[:] = 0 - self.xi = self.raw_xi - self.xi_im = self.raw_xi_im - self._raw_varxi = None - self._varxi = None - self._cov = None - - def __iadd__(self, other): - """Add a second `NQCorrelation`'s data to this one. - - .. note:: - - For this to make sense, both `NQCorrelation` objects should not have had `finalize` - called yet. Then, after adding them together, you should call `finalize` on the sum. - """ - if not isinstance(other, NQCorrelation): - raise TypeError("Can only add another NQCorrelation object") - if not (self._nbins == other._nbins and - self.min_sep == other.min_sep and - self.max_sep == other.max_sep): - raise ValueError("NQCorrelation to be added is not compatible with this one.") - - self._set_metric(other.metric, other.coords, other.coords) - self.raw_xi.ravel()[:] += other.raw_xi.ravel()[:] - self.raw_xi_im.ravel()[:] += other.raw_xi_im.ravel()[:] - self.meanr.ravel()[:] += other.meanr.ravel()[:] - self.meanlogr.ravel()[:] += other.meanlogr.ravel()[:] - self.weight.ravel()[:] += other.weight.ravel()[:] - self.npairs.ravel()[:] += other.npairs.ravel()[:] - return self - - def _sum(self, others): - # Equivalent to the operation of: - # self._clear() - # for other in others: - # self += other - # but no sanity checks and use numpy.sum for faster calculation. - np.sum([c.raw_xi for c in others], axis=0, out=self.raw_xi) - np.sum([c.raw_xi_im for c in others], axis=0, out=self.raw_xi_im) - np.sum([c.meanr for c in others], axis=0, out=self.meanr) - np.sum([c.meanlogr for c in others], axis=0, out=self.meanlogr) - np.sum([c.weight for c in others], axis=0, out=self.weight) - np.sum([c.npairs for c in others], axis=0, out=self.npairs) - self.xi = self.raw_xi - self.xi_im = self.raw_xi_im - self._raw_varxi = None - self._varxi = None - self._cov = None - - def process(self, cat1, cat2, *, metric=None, num_threads=None, comm=None, low_mem=False, - initialize=True, finalize=True, patch_method='global'): - """Compute the correlation function. - - Both arguments may be lists, in which case all items in the list are used - for that element of the correlation. - - Parameters: - cat1 (Catalog): A catalog or list of catalogs for the N field. - cat2 (Catalog): A catalog or list of catalogs for the Q field. - metric (str): Which metric to use. See `Metrics` for details. - (default: 'Euclidean'; this value can also be given in the - constructor in the config dict.) - num_threads (int): How many OpenMP threads to use during the calculation. - (default: use the number of cpu cores; this value can also be given - in the constructor in the config dict.) - comm (mpi4py.Comm): If running MPI, an mpi4py Comm object to communicate between - processes. If used, the rank=0 process will have the final - computation. This only works if using patches. (default: None) - low_mem (bool): Whether to sacrifice a little speed to try to reduce memory usage. - This only works if using patches. (default: False) - initialize (bool): Whether to begin the calculation with a call to - `Corr2.clear`. (default: True) - finalize (bool): Whether to complete the calculation with a call to `finalize`. - (default: True) - patch_method (str): Which patch method to use. (default: 'global') - """ - import math - if initialize: - self.clear() - self._rq = None - self._processed_cats.clear() - - if patch_method not in ['local', 'global']: - raise ValueError("Invalid patch_method %s"%patch_method) - local = patch_method == 'local' - - if not isinstance(cat1,list): - cat1 = cat1.get_patches(low_mem=low_mem) - if not isinstance(cat2,list): - cat2 = cat2.get_patches(low_mem=low_mem) - - self._process_all_cross(cat1, cat2, metric, num_threads, comm, low_mem, local) - - self._processed_cats.extend(cat2) - if finalize: - varq = calculateVarQ(self._processed_cats, low_mem=low_mem) - self.logger.info("varq = %f: sig_sn (per component) = %f",varq,math.sqrt(varq)) - self.finalize(varq) - self._processed_cats.clear() + super().finalize(varq) def calculateXi(self, *, rq=None): r"""Calculate the correlation function possibly given another correlation function @@ -402,50 +146,7 @@ def calculateXi(self, *, rq=None): - xi_im = array of the imaginary part of :math:`\xi(R)` - varxi = array of the variance estimates of the above values """ - if rq is not None: - self.xi = self.raw_xi - rq.xi - self.xi_im = self.raw_xi_im - rq.xi_im - self._rq = rq - - if rq.npatch1 not in (1,self.npatch1) or rq.npatch2 != self.npatch2: - raise RuntimeError("RQ must be run with the same patches as DQ") - - if len(self.results) > 0: - # If there are any rq patch pairs that aren't in results (e.g. due to different - # edge effects among the various pairs in consideration), then we need to add - # some dummy results to make sure all the right pairs are computed when we make - # the vectors for the covariance matrix. - template = next(iter(self.results.values())) # Just need something to copy. - for ij in rq.results: - if ij in self.results: continue - new_cij = template.copy() - new_cij.xi.ravel()[:] = 0 - new_cij.weight.ravel()[:] = 0 - self.results[ij] = new_cij - - self._cov = self.estimate_cov(self.var_method) - self._varxi = np.zeros_like(self.rnom, dtype=float) - self._varxi.ravel()[:] = self.cov_diag - else: - self._varxi = self.raw_varxi + rq.varxi - else: - self.xi = self.raw_xi - self.xi_im = self.raw_xi_im - self._varxi = self.raw_varxi - - return self.xi, self.xi_im, self.varxi - - def _calculate_xi_from_pairs(self, pairs): - self._sum([self.results[ij] for ij in pairs]) - self._finalize() - if self._rq is not None: - # If rq has npatch1 = 1, adjust pairs appropriately - if self._rq.npatch1 == 1 and not all([p[0] == 0 for p in pairs]): - pairs = [(0,ij[1]) for ij in pairs if ij[0] == ij[1]] - # Make sure all ij are in the rq results (some might be missing, which is ok) - pairs = [ij for ij in pairs if self._rq._ok[ij[0],ij[1]]] - self._rq._calculate_xi_from_pairs(pairs) - self.xi -= self._rq.xi + return super().calculateXi(rz=rq) def write(self, file_name, *, rq=None, file_type=None, precision=None, write_patch_results=False, write_cov=False): @@ -491,93 +192,4 @@ def write(self, file_name, *, rq=None, file_type=None, precision=None, (default: False) write_cov (bool): Whether to write the covariance matrix as well. (default: False) """ - self.logger.info('Writing NQ correlations to %s',file_name) - self.calculateXi(rq=rq) - precision = self.config.get('precision', 4) if precision is None else precision - with make_writer(file_name, precision, file_type, self.logger) as writer: - self._write(writer, None, write_patch_results, write_cov=write_cov) - - @property - def _write_col_names(self): - return ['r_nom','meanr','meanlogr','qR','qR_im','sigma','weight','npairs'] - - @property - def _write_data(self): - data = [ self.rnom, self.meanr, self.meanlogr, - self.xi, self.xi_im, np.sqrt(self.varxi), self.weight, self.npairs ] - data = [ col.flatten() for col in data ] - return data - - @property - def _write_params(self): - params = make_minimal_config(self.config, Corr2._valid_params) - # Add in a couple other things we want to preserve that aren't construction kwargs. - params['coords'] = self.coords - params['metric'] = self.metric - return params - - @classmethod - def from_file(cls, file_name, *, file_type=None, logger=None, rng=None): - """Create an NQCorrelation instance from an output file. - - This should be a file that was written by TreeCorr. - - Parameters: - file_name (str): The name of the file to read in. - file_type (str): The type of file ('ASCII', 'FITS', or 'HDF'). (default: determine - the type automatically from the extension of file_name.) - logger (Logger): If desired, a logger object to use for logging. (default: None) - rng (RandomState): If desired, a numpy.random.RandomState instance to use for bootstrap - random number generation. (default: None) - - Returns: - corr: An NQCorrelation object, constructed from the information in the file. - """ - if logger: - logger.info('Building NQCorrelation from %s',file_name) - with make_reader(file_name, file_type, logger) as reader: - name = 'main' if 'main' in reader else None - params = reader.read_params(ext=name) - kwargs = make_minimal_config(params, Corr2._valid_params) - corr = cls(**kwargs, logger=logger, rng=rng) - corr.logger.info('Reading NQ correlations from %s',file_name) - corr._read(reader, name=name, params=params) - return corr - - def read(self, file_name, *, file_type=None): - """Read in values from a file. - - This should be a file that was written by TreeCorr, preferably a FITS or HDF5 file, so - there is no loss of information. - - .. warning:: - - The `NQCorrelation` object should be constructed with the same configuration - parameters as the one being read. e.g. the same min_sep, max_sep, etc. This is not - checked by the read function. - - Parameters: - file_name (str): The name of the file to read in. - file_type (str): The type of file ('ASCII' or 'FITS'). (default: determine the type - automatically from the extension of file_name.) - """ - self.logger.info('Reading NQ correlations from %s',file_name) - with make_reader(file_name, file_type, self.logger) as reader: - self._read(reader) - - def _read_from_data(self, data, params): - s = self.logr.shape - self.meanr = data['meanr'].reshape(s) - self.meanlogr = data['meanlogr'].reshape(s) - self.xi = data['qR'].reshape(s) - self.xi_im = data['qR_im'].reshape(s) - self._varxi = data['sigma'].reshape(s)**2 - self.weight = data['weight'].reshape(s) - self.npairs = data['npairs'].reshape(s) - self.coords = params['coords'].strip() - self.metric = params['metric'].strip() - self.raw_xi = self.xi - self.raw_xi_im = self.xi_im - self._raw_varxi = self._varxi - self.npatch1 = params.get('npatch1', 1) - self.npatch2 = params.get('npatch2', 1) + super().write(file_name, rq, file_type, precision, write_patch_results, write_cov) diff --git a/treecorr/ntcorrelation.py b/treecorr/ntcorrelation.py index 0e32a791..46132771 100644 --- a/treecorr/ntcorrelation.py +++ b/treecorr/ntcorrelation.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following @@ -19,12 +19,12 @@ from . import _treecorr from .catalog import calculateVarT -from .corr2base import Corr2 +from .nzcorrelation import BaseNZCorrelation from .util import make_writer, make_reader from .config import make_minimal_config -class NTCorrelation(Corr2): +class NTCorrelation(BaseNZCorrelation): r"""This class handles the calculation and storage of a 2-point count-spin-3 correlation function. @@ -71,7 +71,7 @@ class NTCorrelation(Corr2): .. note:: - If you separate out the steps of the `process` command and use `process_cross`, + If you separate out the steps of the `Corr2.process` command and use `Corr2.process_cross`, then the units will not be applied to ``meanr`` or ``meanlogr`` until the `finalize` function is called. @@ -93,288 +93,32 @@ class NTCorrelation(Corr2): **kwargs: See the documentation for `Corr2` for the list of allowed keyword arguments, which may be passed either directly or in the config dict. """ + _cls = 'NTCorrelation' + _letter1 = 'N' + _letter2 = 'T' + _letters = 'NT' + _builder = _treecorr.NTCorr + _calculateVar1 = lambda *args, **kwargs: None + _calculateVar2 = staticmethod(calculateVarT) + _zreal = 'tR' + _zimag = 'tR_im' + def __init__(self, config=None, *, logger=None, **kwargs): """Initialize `NTCorrelation`. See class doc for details. """ - Corr2.__init__(self, config, logger=logger, **kwargs) - - self.xi = np.zeros_like(self.rnom, dtype=float) - self.xi_im = np.zeros_like(self.rnom, dtype=float) - self.meanr = np.zeros_like(self.rnom, dtype=float) - self.meanlogr = np.zeros_like(self.rnom, dtype=float) - self.weight = np.zeros_like(self.rnom, dtype=float) - self.npairs = np.zeros_like(self.rnom, dtype=float) - self.raw_xi = self.xi - self.raw_xi_im = self.xi_im - self._rt = None - self._raw_varxi = None - self._varxi = None - self._cov = None - self._var_num = 0 - self._processed_cats = [] - self.logger.debug('Finished building NTCorr') - - @property - def corr(self): - if self._corr is None: - x = np.array([]) - self._corr = _treecorr.NTCorr(self._bintype, self._min_sep, self._max_sep, self._nbins, - self._bin_size, self.b, self.angle_slop, - self.min_rpar, self.max_rpar, - self.xperiod, self.yperiod, self.zperiod, - self.raw_xi, self.raw_xi_im, x, x, - self.meanr, self.meanlogr, self.weight, self.npairs) - return self._corr - - def __eq__(self, other): - """Return whether two `NTCorrelation` instances are equal""" - return (isinstance(other, NTCorrelation) and - self.nbins == other.nbins and - self.bin_size == other.bin_size and - self.min_sep == other.min_sep and - self.max_sep == other.max_sep and - self.sep_units == other.sep_units and - self.coords == other.coords and - self.bin_type == other.bin_type and - self.bin_slop == other.bin_slop and - self.angle_slop == other.angle_slop and - self.min_rpar == other.min_rpar and - self.max_rpar == other.max_rpar and - self.xperiod == other.xperiod and - self.yperiod == other.yperiod and - self.zperiod == other.zperiod and - np.array_equal(self.meanr, other.meanr) and - np.array_equal(self.meanlogr, other.meanlogr) and - np.array_equal(self.xi, other.xi) and - np.array_equal(self.xi_im, other.xi_im) and - np.array_equal(self.varxi, other.varxi) and - np.array_equal(self.weight, other.weight) and - np.array_equal(self.npairs, other.npairs)) - - def copy(self): - """Make a copy""" - ret = NTCorrelation.__new__(NTCorrelation) - for key, item in self.__dict__.items(): - if isinstance(item, np.ndarray): - # Only items that might change need to by deep copied. - ret.__dict__[key] = item.copy() - else: - # For everything else, shallow copy is fine. - # In particular don't deep copy config or logger - # Most of the rest are scalars, which copy fine this way. - # And the read-only things are all in _ro. - # The results dict is trickier. We rely on it being copied in places, but we - # never add more to it after the copy, so shallow copy is fine. - ret.__dict__[key] = item - ret._corr = None # We'll want to make a new one of these if we need it. - if self.xi is self.raw_xi: - ret.raw_xi = ret.xi - ret.raw_xi_im = ret.xi_im - else: - ret.raw_xi = self.raw_xi.copy() - ret.raw_xi_im = self.raw_xi_im.copy() - if self._rt is not None: - ret._rt = self._rt.copy() - return ret - - def __repr__(self): - return f'NTCorrelation({self._repr_kwargs})' - - def process_cross(self, cat1, cat2, *, metric=None, num_threads=None): - """Process a single pair of catalogs, accumulating the cross-correlation. - - This accumulates the weighted sums into the bins, but does not finalize - the calculation by dividing by the total weight at the end. After - calling this function as often as desired, the `finalize` command will - finish the calculation. - - Parameters: - cat1 (Catalog): The first catalog to process - cat2 (Catalog): The second catalog to process - metric (str): Which metric to use. See `Metrics` for details. - (default: 'Euclidean'; this value can also be given in the - constructor in the config dict.) - num_threads (int): How many OpenMP threads to use during the calculation. - (default: use the number of cpu cores; this value can also be given - in the constructor in the config dict.) - """ - if cat1.name == '' and cat2.name == '': - self.logger.info('Starting process NT cross-correlations') - else: - self.logger.info('Starting process NT cross-correlations for cats %s, %s.', - cat1.name, cat2.name) - - self._set_metric(metric, cat1.coords, cat2.coords) - self._set_num_threads(num_threads) - min_size, max_size = self._get_minmax_size() - - f1 = cat1.getNField(min_size=min_size, max_size=max_size, - split_method=self.split_method, - brute=self.brute is True or self.brute == 1, - min_top=self.min_top, max_top=self.max_top, - coords=self.coords) - f2 = cat2.getTField(min_size=min_size, max_size=max_size, - split_method=self.split_method, - brute=self.brute is True or self.brute == 2, - min_top=self.min_top, max_top=self.max_top, - coords=self.coords) - - self.logger.info('Starting %d jobs.',f1.nTopLevelNodes) - self.corr.processCross(f1.data, f2.data, self.output_dots, self._metric) - - def _finalize(self): - mask1 = self.weight != 0 - mask2 = self.weight == 0 - - self.raw_xi[mask1] /= self.weight[mask1] - self.raw_xi_im[mask1] /= self.weight[mask1] - self.meanr[mask1] /= self.weight[mask1] - self.meanlogr[mask1] /= self.weight[mask1] - - # Update the units of meanr, meanlogr - self._apply_units(mask1) - - # Use meanr, meanlogr when available, but set to nominal when no pairs in bin. - self.meanr[mask2] = self.rnom[mask2] - self.meanlogr[mask2] = self.logr[mask2] + super().__init__(config, logger=logger, **kwargs) def finalize(self, vart): """Finalize the calculation of the correlation function. - The `process_cross` command accumulates values in each bin, so it can be called + The `Corr2.process_cross` command accumulates values in each bin, so it can be called multiple times if appropriate. Afterwards, this command finishes the calculation by dividing each column by the total weight. Parameters: vart (float): The variance per component of the spin-3 field. """ - self._finalize() - self._var_num = vart - - self.xi = self.raw_xi - self.xi_im = self.raw_xi_im - - @property - def raw_varxi(self): - if self._raw_varxi is None: - self._raw_varxi = np.zeros_like(self.rnom, dtype=float) - if self._var_num != 0: - self._raw_varxi.ravel()[:] = self.cov_diag - return self._raw_varxi - - @property - def varxi(self): - if self._varxi is None: - self._varxi = self.raw_varxi - return self._varxi - - def _clear(self): - """Clear the data vectors - """ - self.raw_xi.ravel()[:] = 0 - self.raw_xi_im.ravel()[:] = 0 - self.meanr.ravel()[:] = 0 - self.meanlogr.ravel()[:] = 0 - self.weight.ravel()[:] = 0 - self.npairs.ravel()[:] = 0 - self.xi = self.raw_xi - self.xi_im = self.raw_xi_im - self._raw_varxi = None - self._varxi = None - self._cov = None - - def __iadd__(self, other): - """Add a second `NTCorrelation`'s data to this one. - - .. note:: - - For this to make sense, both `NTCorrelation` objects should not have had `finalize` - called yet. Then, after adding them together, you should call `finalize` on the sum. - """ - if not isinstance(other, NTCorrelation): - raise TypeError("Can only add another NTCorrelation object") - if not (self._nbins == other._nbins and - self.min_sep == other.min_sep and - self.max_sep == other.max_sep): - raise ValueError("NTCorrelation to be added is not compatible with this one.") - - self._set_metric(other.metric, other.coords, other.coords) - self.raw_xi.ravel()[:] += other.raw_xi.ravel()[:] - self.raw_xi_im.ravel()[:] += other.raw_xi_im.ravel()[:] - self.meanr.ravel()[:] += other.meanr.ravel()[:] - self.meanlogr.ravel()[:] += other.meanlogr.ravel()[:] - self.weight.ravel()[:] += other.weight.ravel()[:] - self.npairs.ravel()[:] += other.npairs.ravel()[:] - return self - - def _sum(self, others): - # Equivalent to the operation of: - # self._clear() - # for other in others: - # self += other - # but no sanity checks and use numpy.sum for faster calculation. - np.sum([c.raw_xi for c in others], axis=0, out=self.raw_xi) - np.sum([c.raw_xi_im for c in others], axis=0, out=self.raw_xi_im) - np.sum([c.meanr for c in others], axis=0, out=self.meanr) - np.sum([c.meanlogr for c in others], axis=0, out=self.meanlogr) - np.sum([c.weight for c in others], axis=0, out=self.weight) - np.sum([c.npairs for c in others], axis=0, out=self.npairs) - self.xi = self.raw_xi - self.xi_im = self.raw_xi_im - self._raw_varxi = None - self._varxi = None - self._cov = None - - def process(self, cat1, cat2, *, metric=None, num_threads=None, comm=None, low_mem=False, - initialize=True, finalize=True, patch_method='global'): - """Compute the correlation function. - - Both arguments may be lists, in which case all items in the list are used - for that element of the correlation. - - Parameters: - cat1 (Catalog): A catalog or list of catalogs for the N field. - cat2 (Catalog): A catalog or list of catalogs for the T field. - metric (str): Which metric to use. See `Metrics` for details. - (default: 'Euclidean'; this value can also be given in the - constructor in the config dict.) - num_threads (int): How many OpenMP threads to use during the calculation. - (default: use the number of cpu cores; this value can also be given - in the constructor in the config dict.) - comm (mpi4py.Comm): If running MPI, an mpi4py Comm object to communicate between - processes. If used, the rank=0 process will have the final - computation. This only works if using patches. (default: None) - low_mem (bool): Whether to sacrifice a little speed to try to reduce memory usage. - This only works if using patches. (default: False) - initialize (bool): Whether to begin the calculation with a call to - `Corr2.clear`. (default: True) - finalize (bool): Whether to complete the calculation with a call to `finalize`. - (default: True) - patch_method (str): Which patch method to use. (default: 'global') - """ - import math - if initialize: - self.clear() - self._rt = None - self._processed_cats.clear() - - if patch_method not in ['local', 'global']: - raise ValueError("Invalid patch_method %s"%patch_method) - local = patch_method == 'local' - - if not isinstance(cat1,list): - cat1 = cat1.get_patches(low_mem=low_mem) - if not isinstance(cat2,list): - cat2 = cat2.get_patches(low_mem=low_mem) - - self._process_all_cross(cat1, cat2, metric, num_threads, comm, low_mem, local) - - self._processed_cats.extend(cat2) - if finalize: - vart = calculateVarT(self._processed_cats, low_mem=low_mem) - self.logger.info("vart = %f: sig_sn (per component) = %f",vart,math.sqrt(vart)) - self.finalize(vart) - self._processed_cats.clear() + super().finalize(vart) def calculateXi(self, *, rt=None): r"""Calculate the correlation function possibly given another correlation function @@ -402,50 +146,7 @@ def calculateXi(self, *, rt=None): - xi_im = array of the imaginary part of :math:`\xi(R)` - varxi = array of the variance estimates of the above values """ - if rt is not None: - self.xi = self.raw_xi - rt.xi - self.xi_im = self.raw_xi_im - rt.xi_im - self._rt = rt - - if rt.npatch1 not in (1,self.npatch1) or rt.npatch2 != self.npatch2: - raise RuntimeError("RT must be run with the same patches as DT") - - if len(self.results) > 0: - # If there are any rt patch pairs that aren't in results (e.g. due to different - # edge effects among the various pairs in consideration), then we need to add - # some dummy results to make sure all the right pairs are computed when we make - # the vectors for the covariance matrix. - template = next(iter(self.results.values())) # Just need something to copy. - for ij in rt.results: - if ij in self.results: continue - new_cij = template.copy() - new_cij.xi.ravel()[:] = 0 - new_cij.weight.ravel()[:] = 0 - self.results[ij] = new_cij - - self._cov = self.estimate_cov(self.var_method) - self._varxi = np.zeros_like(self.rnom, dtype=float) - self._varxi.ravel()[:] = self.cov_diag - else: - self._varxi = self.raw_varxi + rt.varxi - else: - self.xi = self.raw_xi - self.xi_im = self.raw_xi_im - self._varxi = self.raw_varxi - - return self.xi, self.xi_im, self.varxi - - def _calculate_xi_from_pairs(self, pairs): - self._sum([self.results[ij] for ij in pairs]) - self._finalize() - if self._rt is not None: - # If rt has npatch1 = 1, adjust pairs appropriately - if self._rt.npatch1 == 1 and not all([p[0] == 0 for p in pairs]): - pairs = [(0,ij[1]) for ij in pairs if ij[0] == ij[1]] - # Make sure all ij are in the rt results (some might be missing, which is ok) - pairs = [ij for ij in pairs if self._rt._ok[ij[0],ij[1]]] - self._rt._calculate_xi_from_pairs(pairs) - self.xi -= self._rt.xi + return super().calculateXi(rz=rt) def write(self, file_name, *, rt=None, file_type=None, precision=None, write_patch_results=False, write_cov=False): @@ -491,93 +192,4 @@ def write(self, file_name, *, rt=None, file_type=None, precision=None, (default: False) write_cov (bool): Whether to write the covariance matrix as well. (default: False) """ - self.logger.info('Writing NT correlations to %s',file_name) - self.calculateXi(rt=rt) - precision = self.config.get('precision', 4) if precision is None else precision - with make_writer(file_name, precision, file_type, self.logger) as writer: - self._write(writer, None, write_patch_results, write_cov=write_cov) - - @property - def _write_col_names(self): - return ['r_nom','meanr','meanlogr','tR','tR_im','sigma','weight','npairs'] - - @property - def _write_data(self): - data = [ self.rnom, self.meanr, self.meanlogr, - self.xi, self.xi_im, np.sqrt(self.varxi), self.weight, self.npairs ] - data = [ col.flatten() for col in data ] - return data - - @property - def _write_params(self): - params = make_minimal_config(self.config, Corr2._valid_params) - # Add in a couple other things we want to preserve that aren't construction kwargs. - params['coords'] = self.coords - params['metric'] = self.metric - return params - - @classmethod - def from_file(cls, file_name, *, file_type=None, logger=None, rng=None): - """Create an NTCorrelation instance from an output file. - - This should be a file that was written by TreeCorr. - - Parameters: - file_name (str): The name of the file to read in. - file_type (str): The type of file ('ASCII', 'FITS', or 'HDF'). (default: determine - the type automatically from the extension of file_name.) - logger (Logger): If desired, a logger object to use for logging. (default: None) - rng (RandomState): If desired, a numpy.random.RandomState instance to use for bootstrap - random number generation. (default: None) - - Returns: - corr: An NTCorrelation object, constructed from the information in the file. - """ - if logger: - logger.info('Building NTCorrelation from %s',file_name) - with make_reader(file_name, file_type, logger) as reader: - name = 'main' if 'main' in reader else None - params = reader.read_params(ext=name) - kwargs = make_minimal_config(params, Corr2._valid_params) - corr = cls(**kwargs, logger=logger, rng=rng) - corr.logger.info('Reading NT correlations from %s',file_name) - corr._read(reader, name=name, params=params) - return corr - - def read(self, file_name, *, file_type=None): - """Read in values from a file. - - This should be a file that was written by TreeCorr, preferably a FITS or HDF5 file, so - there is no loss of information. - - .. warning:: - - The `NTCorrelation` object should be constructed with the same configuration - parameters as the one being read. e.g. the same min_sep, max_sep, etc. This is not - checked by the read function. - - Parameters: - file_name (str): The name of the file to read in. - file_type (str): The type of file ('ASCII' or 'FITS'). (default: determine the type - automatically from the extension of file_name.) - """ - self.logger.info('Reading NT correlations from %s',file_name) - with make_reader(file_name, file_type, self.logger) as reader: - self._read(reader) - - def _read_from_data(self, data, params): - s = self.logr.shape - self.meanr = data['meanr'].reshape(s) - self.meanlogr = data['meanlogr'].reshape(s) - self.xi = data['tR'].reshape(s) - self.xi_im = data['tR_im'].reshape(s) - self._varxi = data['sigma'].reshape(s)**2 - self.weight = data['weight'].reshape(s) - self.npairs = data['npairs'].reshape(s) - self.coords = params['coords'].strip() - self.metric = params['metric'].strip() - self.raw_xi = self.xi - self.raw_xi_im = self.xi_im - self._raw_varxi = self._varxi - self.npatch1 = params.get('npatch1', 1) - self.npatch2 = params.get('npatch2', 1) + super().write(file_name, rt, file_type, precision, write_patch_results, write_cov) diff --git a/treecorr/nvcorrelation.py b/treecorr/nvcorrelation.py index a62b20e2..c8d65775 100644 --- a/treecorr/nvcorrelation.py +++ b/treecorr/nvcorrelation.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following @@ -19,12 +19,12 @@ from . import _treecorr from .catalog import calculateVarV -from .corr2base import Corr2 +from .nzcorrelation import BaseNZCorrelation from .util import make_writer, make_reader from .config import make_minimal_config -class NVCorrelation(Corr2): +class NVCorrelation(BaseNZCorrelation): r"""This class handles the calculation and storage of a 2-point count-vector correlation function. @@ -71,7 +71,7 @@ class NVCorrelation(Corr2): .. note:: - If you separate out the steps of the `process` command and use `process_cross`, + If you separate out the steps of the `Corr2.process` command and use `Corr2.process_cross`, then the units will not be applied to ``meanr`` or ``meanlogr`` until the `finalize` function is called. @@ -93,288 +93,32 @@ class NVCorrelation(Corr2): **kwargs: See the documentation for `Corr2` for the list of allowed keyword arguments, which may be passed either directly or in the config dict. """ + _cls = 'NVCorrelation' + _letter1 = 'N' + _letter2 = 'V' + _letters = 'NV' + _builder = _treecorr.NVCorr + _calculateVar1 = lambda *args, **kwargs: None + _calculateVar2 = staticmethod(calculateVarV) + _zreal = 'vR' + _zimag = 'vT' + def __init__(self, config=None, *, logger=None, **kwargs): """Initialize `NVCorrelation`. See class doc for details. """ - Corr2.__init__(self, config, logger=logger, **kwargs) - - self.xi = np.zeros_like(self.rnom, dtype=float) - self.xi_im = np.zeros_like(self.rnom, dtype=float) - self.meanr = np.zeros_like(self.rnom, dtype=float) - self.meanlogr = np.zeros_like(self.rnom, dtype=float) - self.weight = np.zeros_like(self.rnom, dtype=float) - self.npairs = np.zeros_like(self.rnom, dtype=float) - self.raw_xi = self.xi - self.raw_xi_im = self.xi_im - self._rv = None - self._raw_varxi = None - self._varxi = None - self._cov = None - self._var_num = 0 - self._processed_cats = [] - self.logger.debug('Finished building NVCorr') - - @property - def corr(self): - if self._corr is None: - x = np.array([]) - self._corr = _treecorr.NVCorr(self._bintype, self._min_sep, self._max_sep, self._nbins, - self._bin_size, self.b, self.angle_slop, - self.min_rpar, self.max_rpar, - self.xperiod, self.yperiod, self.zperiod, - self.raw_xi, self.raw_xi_im, x, x, - self.meanr, self.meanlogr, self.weight, self.npairs) - return self._corr - - def __eq__(self, other): - """Return whether two `NVCorrelation` instances are equal""" - return (isinstance(other, NVCorrelation) and - self.nbins == other.nbins and - self.bin_size == other.bin_size and - self.min_sep == other.min_sep and - self.max_sep == other.max_sep and - self.sep_units == other.sep_units and - self.coords == other.coords and - self.bin_type == other.bin_type and - self.bin_slop == other.bin_slop and - self.angle_slop == other.angle_slop and - self.min_rpar == other.min_rpar and - self.max_rpar == other.max_rpar and - self.xperiod == other.xperiod and - self.yperiod == other.yperiod and - self.zperiod == other.zperiod and - np.array_equal(self.meanr, other.meanr) and - np.array_equal(self.meanlogr, other.meanlogr) and - np.array_equal(self.xi, other.xi) and - np.array_equal(self.xi_im, other.xi_im) and - np.array_equal(self.varxi, other.varxi) and - np.array_equal(self.weight, other.weight) and - np.array_equal(self.npairs, other.npairs)) - - def copy(self): - """Make a copy""" - ret = NVCorrelation.__new__(NVCorrelation) - for key, item in self.__dict__.items(): - if isinstance(item, np.ndarray): - # Only items that might change need to by deep copied. - ret.__dict__[key] = item.copy() - else: - # For everything else, shallow copy is fine. - # In particular don't deep copy config or logger - # Most of the rest are scalars, which copy fine this way. - # And the read-only things are all in _ro. - # The results dict is trickier. We rely on it being copied in places, but we - # never add more to it after the copy, so shallow copy is fine. - ret.__dict__[key] = item - ret._corr = None # We'll want to make a new one of these if we need it. - if self.xi is self.raw_xi: - ret.raw_xi = ret.xi - ret.raw_xi_im = ret.xi_im - else: - ret.raw_xi = self.raw_xi.copy() - ret.raw_xi_im = self.raw_xi_im.copy() - if self._rv is not None: - ret._rv = self._rv.copy() - return ret - - def __repr__(self): - return f'NVCorrelation({self._repr_kwargs})' - - def process_cross(self, cat1, cat2, *, metric=None, num_threads=None): - """Process a single pair of catalogs, accumulating the cross-correlation. - - This accumulates the weighted sums into the bins, but does not finalize - the calculation by dividing by the total weight at the end. After - calling this function as often as desired, the `finalize` command will - finish the calculation. - - Parameters: - cat1 (Catalog): The first catalog to process - cat2 (Catalog): The second catalog to process - metric (str): Which metric to use. See `Metrics` for details. - (default: 'Euclidean'; this value can also be given in the - constructor in the config dict.) - num_threads (int): How many OpenMP threads to use during the calculation. - (default: use the number of cpu cores; this value can also be given - in the constructor in the config dict.) - """ - if cat1.name == '' and cat2.name == '': - self.logger.info('Starting process NV cross-correlations') - else: - self.logger.info('Starting process NV cross-correlations for cats %s, %s.', - cat1.name, cat2.name) - - self._set_metric(metric, cat1.coords, cat2.coords) - self._set_num_threads(num_threads) - min_size, max_size = self._get_minmax_size() - - f1 = cat1.getNField(min_size=min_size, max_size=max_size, - split_method=self.split_method, - brute=self.brute is True or self.brute == 1, - min_top=self.min_top, max_top=self.max_top, - coords=self.coords) - f2 = cat2.getVField(min_size=min_size, max_size=max_size, - split_method=self.split_method, - brute=self.brute is True or self.brute == 2, - min_top=self.min_top, max_top=self.max_top, - coords=self.coords) - - self.logger.info('Starting %d jobs.',f1.nTopLevelNodes) - self.corr.processCross(f1.data, f2.data, self.output_dots, self._metric) - - def _finalize(self): - mask1 = self.weight != 0 - mask2 = self.weight == 0 - - self.raw_xi[mask1] /= self.weight[mask1] - self.raw_xi_im[mask1] /= self.weight[mask1] - self.meanr[mask1] /= self.weight[mask1] - self.meanlogr[mask1] /= self.weight[mask1] - - # Update the units of meanr, meanlogr - self._apply_units(mask1) - - # Use meanr, meanlogr when available, but set to nominal when no pairs in bin. - self.meanr[mask2] = self.rnom[mask2] - self.meanlogr[mask2] = self.logr[mask2] + super().__init__(config, logger=logger, **kwargs) def finalize(self, varv): """Finalize the calculation of the correlation function. - The `process_cross` command accumulates values in each bin, so it can be called + The `Corr2.process_cross` command accumulates values in each bin, so it can be called multiple times if appropriate. Afterwards, this command finishes the calculation by dividing each column by the total weight. Parameters: varv (float): The variance per component of the vector field. """ - self._finalize() - self._var_num = varv - - self.xi = self.raw_xi - self.xi_im = self.raw_xi_im - - @property - def raw_varxi(self): - if self._raw_varxi is None: - self._raw_varxi = np.zeros_like(self.rnom, dtype=float) - if self._var_num != 0: - self._raw_varxi.ravel()[:] = self.cov_diag - return self._raw_varxi - - @property - def varxi(self): - if self._varxi is None: - self._varxi = self.raw_varxi - return self._varxi - - def _clear(self): - """Clear the data vectors - """ - self.raw_xi.ravel()[:] = 0 - self.raw_xi_im.ravel()[:] = 0 - self.meanr.ravel()[:] = 0 - self.meanlogr.ravel()[:] = 0 - self.weight.ravel()[:] = 0 - self.npairs.ravel()[:] = 0 - self.xi = self.raw_xi - self.xi_im = self.raw_xi_im - self._raw_varxi = None - self._varxi = None - self._cov = None - - def __iadd__(self, other): - """Add a second `NVCorrelation`'s data to this one. - - .. note:: - - For this to make sense, both `NVCorrelation` objects should not have had `finalize` - called yet. Then, after adding them together, you should call `finalize` on the sum. - """ - if not isinstance(other, NVCorrelation): - raise TypeError("Can only add another NVCorrelation object") - if not (self._nbins == other._nbins and - self.min_sep == other.min_sep and - self.max_sep == other.max_sep): - raise ValueError("NVCorrelation to be added is not compatible with this one.") - - self._set_metric(other.metric, other.coords, other.coords) - self.raw_xi.ravel()[:] += other.raw_xi.ravel()[:] - self.raw_xi_im.ravel()[:] += other.raw_xi_im.ravel()[:] - self.meanr.ravel()[:] += other.meanr.ravel()[:] - self.meanlogr.ravel()[:] += other.meanlogr.ravel()[:] - self.weight.ravel()[:] += other.weight.ravel()[:] - self.npairs.ravel()[:] += other.npairs.ravel()[:] - return self - - def _sum(self, others): - # Equivalent to the operation of: - # self._clear() - # for other in others: - # self += other - # but no sanity checks and use numpy.sum for faster calculation. - np.sum([c.raw_xi for c in others], axis=0, out=self.raw_xi) - np.sum([c.raw_xi_im for c in others], axis=0, out=self.raw_xi_im) - np.sum([c.meanr for c in others], axis=0, out=self.meanr) - np.sum([c.meanlogr for c in others], axis=0, out=self.meanlogr) - np.sum([c.weight for c in others], axis=0, out=self.weight) - np.sum([c.npairs for c in others], axis=0, out=self.npairs) - self.xi = self.raw_xi - self.xi_im = self.raw_xi_im - self._raw_varxi = None - self._varxi = None - self._cov = None - - def process(self, cat1, cat2, *, metric=None, num_threads=None, comm=None, low_mem=False, - initialize=True, finalize=True, patch_method='global'): - """Compute the correlation function. - - Both arguments may be lists, in which case all items in the list are used - for that element of the correlation. - - Parameters: - cat1 (Catalog): A catalog or list of catalogs for the N field. - cat2 (Catalog): A catalog or list of catalogs for the V field. - metric (str): Which metric to use. See `Metrics` for details. - (default: 'Euclidean'; this value can also be given in the - constructor in the config dict.) - num_threads (int): How many OpenMP threads to use during the calculation. - (default: use the number of cpu cores; this value can also be given - in the constructor in the config dict.) - comm (mpi4py.Comm): If running MPI, an mpi4py Comm object to communicate between - processes. If used, the rank=0 process will have the final - computation. This only works if using patches. (default: None) - low_mem (bool): Whether to sacrifice a little speed to try to reduce memory usage. - This only works if using patches. (default: False) - initialize (bool): Whether to begin the calculation with a call to - `Corr2.clear`. (default: True) - finalize (bool): Whether to complete the calculation with a call to `finalize`. - (default: True) - patch_method (str): Which patch method to use. (default: 'global') - """ - import math - if initialize: - self.clear() - self._rv = None - self._processed_cats.clear() - - if patch_method not in ['local', 'global']: - raise ValueError("Invalid patch_method %s"%patch_method) - local = patch_method == 'local' - - if not isinstance(cat1,list): - cat1 = cat1.get_patches(low_mem=low_mem) - if not isinstance(cat2,list): - cat2 = cat2.get_patches(low_mem=low_mem) - - self._process_all_cross(cat1, cat2, metric, num_threads, comm, low_mem, local) - - self._processed_cats.extend(cat2) - if finalize: - varv = calculateVarV(self._processed_cats, low_mem=low_mem) - self.logger.info("varv = %f: sig_sn (per component) = %f",varv,math.sqrt(varv)) - self.finalize(varv) - self._processed_cats.clear() + super().finalize(varv) def calculateXi(self, *, rv=None): r"""Calculate the correlation function possibly given another correlation function @@ -401,50 +145,7 @@ def calculateXi(self, *, rv=None): - xi_im = array of the imaginary part of :math:`\xi(R)` - varxi = array of the variance estimates of the above values """ - if rv is not None: - self.xi = self.raw_xi - rv.xi - self.xi_im = self.raw_xi_im - rv.xi_im - self._rv = rv - - if rv.npatch1 not in (1,self.npatch1) or rv.npatch2 != self.npatch2: - raise RuntimeError("RV must be run with the same patches as DV") - - if len(self.results) > 0: - # If there are any rv patch pairs that aren't in results (e.g. due to different - # edge effects among the various pairs in consideration), then we need to add - # some dummy results to make sure all the right pairs are computed when we make - # the vectors for the covariance matrix. - template = next(iter(self.results.values())) # Just need something to copy. - for ij in rv.results: - if ij in self.results: continue - new_cij = template.copy() - new_cij.xi.ravel()[:] = 0 - new_cij.weight.ravel()[:] = 0 - self.results[ij] = new_cij - - self._cov = self.estimate_cov(self.var_method) - self._varxi = np.zeros_like(self.rnom, dtype=float) - self._varxi.ravel()[:] = self.cov_diag - else: - self._varxi = self.raw_varxi + rv.varxi - else: - self.xi = self.raw_xi - self.xi_im = self.raw_xi_im - self._varxi = self.raw_varxi - - return self.xi, self.xi_im, self.varxi - - def _calculate_xi_from_pairs(self, pairs): - self._sum([self.results[ij] for ij in pairs]) - self._finalize() - if self._rv is not None: - # If rv has npatch1 = 1, adjust pairs appropriately - if self._rv.npatch1 == 1 and not all([p[0] == 0 for p in pairs]): - pairs = [(0,ij[1]) for ij in pairs if ij[0] == ij[1]] - # Make sure all ij are in the rv results (some might be missing, which is ok) - pairs = [ij for ij in pairs if self._rv._ok[ij[0],ij[1]]] - self._rv._calculate_xi_from_pairs(pairs) - self.xi -= self._rv.xi + return super().calculateXi(rz=rv) def write(self, file_name, *, rv=None, file_type=None, precision=None, write_patch_results=False, write_cov=False): @@ -489,93 +190,4 @@ def write(self, file_name, *, rv=None, file_type=None, precision=None, (default: False) write_cov (bool): Whether to write the covariance matrix as well. (default: False) """ - self.logger.info('Writing NV correlations to %s',file_name) - self.calculateXi(rv=rv) - precision = self.config.get('precision', 4) if precision is None else precision - with make_writer(file_name, precision, file_type, self.logger) as writer: - self._write(writer, None, write_patch_results, write_cov=write_cov) - - @property - def _write_col_names(self): - return ['r_nom','meanr','meanlogr','vR','vT','sigma','weight','npairs'] - - @property - def _write_data(self): - data = [ self.rnom, self.meanr, self.meanlogr, - self.xi, self.xi_im, np.sqrt(self.varxi), self.weight, self.npairs ] - data = [ col.flatten() for col in data ] - return data - - @property - def _write_params(self): - params = make_minimal_config(self.config, Corr2._valid_params) - # Add in a couple other things we want to preserve that aren't construction kwargs. - params['coords'] = self.coords - params['metric'] = self.metric - return params - - @classmethod - def from_file(cls, file_name, *, file_type=None, logger=None, rng=None): - """Create an NVCorrelation instance from an output file. - - This should be a file that was written by TreeCorr. - - Parameters: - file_name (str): The name of the file to read in. - file_type (str): The type of file ('ASCII', 'FITS', or 'HDF'). (default: determine - the type automatically from the extension of file_name.) - logger (Logger): If desired, a logger object to use for logging. (default: None) - rng (RandomState): If desired, a numpy.random.RandomState instance to use for bootstrap - random number generation. (default: None) - - Returns: - corr: An NVCorrelation object, constructed from the information in the file. - """ - if logger: - logger.info('Building NVCorrelation from %s',file_name) - with make_reader(file_name, file_type, logger) as reader: - name = 'main' if 'main' in reader else None - params = reader.read_params(ext=name) - kwargs = make_minimal_config(params, Corr2._valid_params) - corr = cls(**kwargs, logger=logger, rng=rng) - corr.logger.info('Reading NV correlations from %s',file_name) - corr._read(reader, name=name, params=params) - return corr - - def read(self, file_name, *, file_type=None): - """Read in values from a file. - - This should be a file that was written by TreeCorr, preferably a FITS or HDF5 file, so - there is no loss of information. - - .. warning:: - - The `NVCorrelation` object should be constructed with the same configuration - parameters as the one being read. e.g. the same min_sep, max_sep, etc. This is not - checked by the read function. - - Parameters: - file_name (str): The name of the file to read in. - file_type (str): The type of file ('ASCII' or 'FITS'). (default: determine the type - automatically from the extension of file_name.) - """ - self.logger.info('Reading NV correlations from %s',file_name) - with make_reader(file_name, file_type, self.logger) as reader: - self._read(reader) - - def _read_from_data(self, data, params): - s = self.logr.shape - self.meanr = data['meanr'].reshape(s) - self.meanlogr = data['meanlogr'].reshape(s) - self.xi = data['vR'].reshape(s) - self.xi_im = data['vT'].reshape(s) - self._varxi = data['sigma'].reshape(s)**2 - self.weight = data['weight'].reshape(s) - self.npairs = data['npairs'].reshape(s) - self.coords = params['coords'].strip() - self.metric = params['metric'].strip() - self.raw_xi = self.xi - self.raw_xi_im = self.xi_im - self._raw_varxi = self._varxi - self.npatch1 = params.get('npatch1', 1) - self.npatch2 = params.get('npatch2', 1) + super().write(file_name, rv, file_type, precision, write_patch_results, write_cov) diff --git a/treecorr/nzcorrelation.py b/treecorr/nzcorrelation.py new file mode 100644 index 00000000..df011135 --- /dev/null +++ b/treecorr/nzcorrelation.py @@ -0,0 +1,359 @@ +# Copyright (c) 2003-2024 by Mike Jarvis +# +# TreeCorr is free software: redistribution and use in source and binary forms, +# with or without modification, are permitted provided that the following +# conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions, and the disclaimer given in the accompanying LICENSE +# file. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions, and the disclaimer given in the documentation +# and/or other materials provided with the distribution. + +""" +.. module:: ngcorrelation +""" + +import numpy as np + +from . import _treecorr +from .catalog import calculateVarZ +from .corr2base import Corr2 +from .util import make_writer +from .config import make_minimal_config + + +class BaseNZCorrelation(Corr2): + """This class is a base class for all the N?Correlation classes, where ? is one of the + complex fields of varying spin. + + A lot of the implementation is shared among those types, so whenever possible the shared + implementation is done in this class. + """ + _sig1 = None + _sig2 = 'sig_sn (per component)' + + def __init__(self, config=None, *, logger=None, **kwargs): + super().__init__(config, logger=logger, **kwargs) + + self._xi1 = np.zeros_like(self.rnom, dtype=float) + self._xi2 = np.zeros_like(self.rnom, dtype=float) + self._xi3 = self._xi4 = np.array([]) + self.xi = self.raw_xi + self.xi_im = self.raw_xi_im + self._rz = None + self._raw_varxi = None + self._varxi = None + self.logger.debug('Finished building %s', self._cls) + + @property + def raw_xi(self): + return self._xi1 + + @property + def raw_xi_im(self): + return self._xi2 + + def copy(self): + """Make a copy""" + ret = super().copy() + if self.xi is self.raw_xi: + ret.xi = ret.raw_xi + ret.xi_im = ret.raw_xi_im + if self._rz is not None: + ret._rz = self._rz.copy() + return ret + + def finalize(self, varz): + self._finalize() + self._var_num = varz + + self.xi = self.raw_xi + self.xi_im = self.raw_xi_im + + @property + def raw_varxi(self): + if self._raw_varxi is None: + self._raw_varxi = np.zeros_like(self.rnom, dtype=float) + if self._var_num != 0: + self._raw_varxi.ravel()[:] = self.cov_diag + return self._raw_varxi + + @property + def varxi(self): + if self._varxi is None: + self._varxi = self.raw_varxi + return self._varxi + + def _clear(self): + """Clear the data vectors + """ + super()._clear() + self.xi = self.raw_xi + self.xi_im = self.raw_xi_im + self._rz = None + self._raw_varxi = None + self._varxi = None + + def _sum(self, others): + # Equivalent to the operation of: + # self._clear() + # for other in others: + # self += other + # but no sanity checks and use numpy.sum for faster calculation. + np.sum([c._xi1 for c in others], axis=0, out=self._xi1) + np.sum([c._xi2 for c in others], axis=0, out=self._xi2) + np.sum([c.meanr for c in others], axis=0, out=self.meanr) + np.sum([c.meanlogr for c in others], axis=0, out=self.meanlogr) + np.sum([c.weight for c in others], axis=0, out=self.weight) + np.sum([c.npairs for c in others], axis=0, out=self.npairs) + self.xi = self.raw_xi + self.xi_im = self.raw_xi_im + self._raw_varxi = None + self._varxi = None + self._cov = None + + def calculateXi(self, rz=None): + if rz is not None: + self.xi = self.raw_xi - rz.xi + self.xi_im = self.raw_xi_im - rz.xi_im + self._rz = rz + + if rz.npatch1 not in (1,self.npatch1) or rz.npatch2 != self.npatch2: + raise RuntimeError(f"R{self._letter2} must be run with the same patches as D{self._letter2}") + + if len(self.results) > 0: + # If there are any rz patch pairs that aren't in results (e.g. due to different + # edge effects among the various pairs in consideration), then we need to add + # some dummy results to make sure all the right pairs are computed when we make + # the vectors for the covariance matrix. + template = next(iter(self.results.values())) # Just need something to copy. + for ij in rz.results: + if ij in self.results: continue + new_cij = template.copy() + new_cij.xi.ravel()[:] = 0 + new_cij.weight.ravel()[:] = 0 + self.results[ij] = new_cij + + self._cov = self.estimate_cov(self.var_method) + self._varxi = np.zeros_like(self.rnom, dtype=float) + self._varxi.ravel()[:] = self.cov_diag + else: + self._varxi = self.raw_varxi + rz.varxi + else: + self.xi = self.raw_xi + self.xi_im = self.raw_xi_im + self._varxi = self.raw_varxi + + return self.xi, self.xi_im, self.varxi + + def _calculate_xi_from_pairs(self, pairs): + self._sum([self.results[ij] for ij in pairs]) + self._finalize() + if self._rz is not None: + # If rz has npatch1 = 1, adjust pairs appropriately + if self._rz.npatch1 == 1 and not all([p[0] == 0 for p in pairs]): + pairs = [(0,ij[1]) for ij in pairs if ij[0] == ij[1]] + # Make sure all ij are in the rz results (some might be missing, which is ok) + pairs = [ij for ij in pairs if self._rz._ok[ij[0],ij[1]]] + self._rz._calculate_xi_from_pairs(pairs) + self.xi -= self._rz.xi + + def write(self, file_name, rz=None, file_type=None, precision=None, + write_patch_results=False, write_cov=False): + self.logger.info(f'Writing {self._letters} correlations to %s',file_name) + BaseNZCorrelation.calculateXi(self, rz) + precision = self.config.get('precision', 4) if precision is None else precision + with make_writer(file_name, precision, file_type, self.logger) as writer: + self._write(writer, None, write_patch_results, write_cov=write_cov) + + @property + def _write_col_names(self): + return ['r_nom','meanr','meanlogr',self._zreal,self._zimag,'sigma','weight','npairs'] + + @property + def _write_data(self): + data = [ self.rnom, self.meanr, self.meanlogr, + self.xi, self.xi_im, np.sqrt(self.varxi), self.weight, self.npairs ] + data = [ col.flatten() for col in data ] + return data + + def _read_from_data(self, data, params): + super()._read_from_data(data, params) + s = self.logr.shape + self.weight = data['weight'].reshape(s) + self._xi1 = data[self._zreal].reshape(s) + self._xi2 = data[self._zimag].reshape(s) + self._varxi = data['sigma'].reshape(s)**2 + self.xi = self.raw_xi + self.xi_im = self.raw_xi_im + self._raw_varxi = self._varxi + +class NZCorrelation(BaseNZCorrelation): + r"""This class handles the calculation and storage of a 2-point count-spin-0 correlation + function. If the spin-0 field is real, you should instead use `NKCorrelation` as it will + be faster. This class is intended for correlations of a complex spin-0 field. + + Ojects of this class holds the following attributes: + + Attributes: + nbins: The number of bins in logr + bin_size: The size of the bins in logr + min_sep: The minimum separation being considered + max_sep: The maximum separation being considered + + In addition, the following attributes are numpy arrays of length (nbins): + + Attributes: + logr: The nominal center of the bin in log(r) (the natural logarithm of r). + rnom: The nominal center of the bin converted to regular distance. + i.e. r = exp(logr). + meanr: The (weighted) mean value of r for the pairs in each bin. + If there are no pairs in a bin, then exp(logr) will be used instead. + meanlogr: The (weighted) mean value of log(r) for the pairs in each bin. + If there are no pairs in a bin, then logr will be used instead. + xi: The correlation function, :math:`\xi(r) = \langle z\rangle`. + xi_im: The imaginary part of :math:`\xi(r)`. + varxi: An estimate of the variance of :math:`\xi` + weight: The total weight in each bin. + npairs: The number of pairs going into each bin (including pairs where one or + both objects have w=0). + cov: An estimate of the full covariance matrix. + raw_xi: The raw value of xi, uncorrected by an RZ calculation. cf. `calculateXi` + raw_xi_im: The raw value of xi_im, uncorrected by an RZ calculation. cf. `calculateXi` + raw_varxi: The raw value of varxi, uncorrected by an RZ calculation. cf. `calculateXi` + + .. note:: + + The default method for estimating the variance and covariance attributes (``varxi``, + and ``cov``) is 'shot', which only includes the shape noise propagated into + the final correlation. This does not include sample variance, so it is always an + underestimate of the actual variance. To get better estimates, you need to set + ``var_method`` to something else and use patches in the input catalog(s). + cf. `Covariance Estimates`. + + If ``sep_units`` are given (either in the config dict or as a named kwarg) then the distances + will all be in these units. + + .. note:: + + If you separate out the steps of the `Corr2.process` command and use `Corr2.process_cross`, + then the units will not be applied to ``meanr`` or ``meanlogr`` until the `finalize` + function is called. + + The typical usage pattern is as follows: + + >>> nz = treecorr.NZCorrelation(config) + >>> nz.process(cat1,cat2) # Compute the cross-correlation. + >>> nz.write(file_name) # Write out to a file. + >>> xi = nz.xi # Or access the correlation function directly. + + Parameters: + config (dict): A configuration dict that can be used to pass in kwargs if desired. + This dict is allowed to have addition entries besides those listed + in `Corr2`, which are ignored here. (default: None) + logger: If desired, a logger object for logging. (default: None, in which case + one will be built according to the config dict's verbose level.) + + Keyword Arguments: + **kwargs: See the documentation for `Corr2` for the list of allowed keyword + arguments, which may be passed either directly or in the config dict. + """ + _cls = 'NZCorrelation' + _letter1 = 'N' + _letter2 = 'Z' + _letters = 'NZ' + _builder = _treecorr.NZCorr + _calculateVar1 = lambda *args, **kwargs: None + _calculateVar2 = staticmethod(calculateVarZ) + _zreal = 'z_real' + _zimag = 'z_imag' + + def __init__(self, config=None, *, logger=None, **kwargs): + """Initialize `NZCorrelation`. See class doc for details. + """ + super().__init__(config, logger=logger, **kwargs) + + def finalize(self, varz): + """Finalize the calculation of the correlation function. + + The `Corr2.process_cross` command accumulates values in each bin, so it can be called + multiple times if appropriate. Afterwards, this command finishes the calculation + by dividing each column by the total weight. + + Parameters: + varz (float): The variance per component of the spin-0 field. + """ + super().finalize(varz) + + def calculateXi(self, *, rz=None): + r"""Calculate the correlation function possibly given another correlation function + that uses random points for the foreground objects. + + - If rz is None, the simple correlation function :math:`\langle z\rangle` is + returned. + - If rz is not None, then a compensated calculation is done: + :math:`\langle z\rangle = (DZ - RZ)`, where DZ represents the mean field value + around the data points and RZ represents the mean value around random points. + + After calling this function, the attributes ``xi``, ``xi_im``, ``varxi``, and ``cov`` will + correspond to the compensated values (if rz is provided). The raw, uncompensated values + are available as ``rawxi``, ``raw_xi_im``, and ``raw_varxi``. + + Parameters: + rz (NZCorrelation): The cross-correlation using random locations as the lenses + (RZ), if desired. (default: None) + + Returns: + Tuple containing + + - xi = array of the real part of :math:`\xi(R)` + - xi_im = array of the imaginary part of :math:`\xi(R)` + - varxi = array of the variance estimates of the above values + """ + return super().calculateXi(rz=rz) + + def write(self, file_name, *, rz=None, file_type=None, precision=None, + write_patch_results=False, write_cov=False): + r"""Write the correlation function to the file, file_name. + + - If rz is None, the simple correlation function :math:`\langle z\rangle` is used. + - If rz is not None, then a compensated calculation is done: + :math:`\langle z\rangle = (DZ - RZ)`, where DZ represents the mean field value + around the data points and RZ represents the mean value around random points. + + The output file will include the following columns: + + ========== ============================================================= + Column Description + ========== ============================================================= + r_nom The nominal center of the bin in r + meanr The mean value :math:`\langle r \rangle` of pairs that fell + into each bin + meanlogr The mean value :math:`\langle \log(r) \rangle` of pairs that + fell into each bin + z_real The mean real component, :math:`\langle real(z) \rangle(r)` + z_imag The mean imaginary component, :math:`\langle imag(z) \rangle(r)`. + sigma The sqrt of the variance estimate of either of these + weight The total weight contributing to each bin + npairs The total number of pairs in each bin + ========== ============================================================= + + If ``sep_units`` was given at construction, then the distances will all be in these units. + Otherwise, they will be in either the same units as x,y,z (for flat or 3d coordinates) or + radians (for spherical coordinates). + + Parameters: + file_name (str): The name of the file to write to. + rz (NZCorrelation): The cross-correlation using random locations as the lenses + (RZ), if desired. (default: None) + file_type (str): The type of file to write ('ASCII' or 'FITS'). (default: determine + the type automatically from the extension of file_name.) + precision (int): For ASCII output catalogs, the desired precision. (default: 4; + this value can also be given in the constructor in the config dict.) + write_patch_results (bool): Whether to write the patch-based results as well. + (default: False) + write_cov (bool): Whether to write the covariance matrix as well. (default: False) + """ + super().write(file_name, rz, file_type, precision, write_patch_results, write_cov) diff --git a/treecorr/qqcorrelation.py b/treecorr/qqcorrelation.py index 2a4833b9..566ce2c3 100644 --- a/treecorr/qqcorrelation.py +++ b/treecorr/qqcorrelation.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following @@ -19,12 +19,12 @@ from . import _treecorr from .catalog import calculateVarQ -from .corr2base import Corr2 +from .zzcorrelation import BaseZZCorrelation from .util import make_writer, make_reader from .config import make_minimal_config -class QQCorrelation(Corr2): +class QQCorrelation(BaseZZCorrelation): r"""This class handles the calculation and storage of a 2-point spin-4-spin-4 correlation function. @@ -73,9 +73,9 @@ class QQCorrelation(Corr2): .. note:: - If you separate out the steps of the `process` command and use `process_auto` and/or - `process_cross`, then the units will not be applied to ``meanr`` or ``meanlogr`` until - the `finalize` function is called. + If you separate out the steps of the `Corr2.process` command and use + `BaseZZCorrelation.process_auto` and/or `Corr2.process_cross`, then the units will not be + applied to ``meanr`` or ``meanlogr`` until the `finalize` function is called. The typical usage pattern is as follows: @@ -96,482 +96,28 @@ class QQCorrelation(Corr2): **kwargs: See the documentation for `Corr2` for the list of allowed keyword arguments, which may be passed either directly or in the config dict. """ + _cls = 'QQCorrelation' + _letter1 = 'Q' + _letter2 = 'Q' + _letters = 'QQ' + _builder = _treecorr.QQCorr + _calculateVar1 = staticmethod(calculateVarQ) + _calculateVar2 = staticmethod(calculateVarQ) + def __init__(self, config=None, *, logger=None, **kwargs): """Initialize `QQCorrelation`. See class doc for details. """ - Corr2.__init__(self, config, logger=logger, **kwargs) - - self.xip = np.zeros_like(self.rnom, dtype=float) - self.xim = np.zeros_like(self.rnom, dtype=float) - self.xip_im = np.zeros_like(self.rnom, dtype=float) - self.xim_im = np.zeros_like(self.rnom, dtype=float) - self.meanr = np.zeros_like(self.rnom, dtype=float) - self.meanlogr = np.zeros_like(self.rnom, dtype=float) - self.weight = np.zeros_like(self.rnom, dtype=float) - self.npairs = np.zeros_like(self.rnom, dtype=float) - self._varxip = None - self._varxim = None - self._cov = None - self._var_num = 0 - self._processed_cats1 = [] - self._processed_cats2 = [] - self.logger.debug('Finished building QQCorr') - - @property - def corr(self): - if self._corr is None: - self._corr = _treecorr.QQCorr(self._bintype, self._min_sep, self._max_sep, self._nbins, - self._bin_size, self.b, self.angle_slop, - self.min_rpar, self.max_rpar, - self.xperiod, self.yperiod, self.zperiod, - self.xip, self.xip_im, self.xim, self.xim_im, - self.meanr, self.meanlogr, self.weight, self.npairs) - return self._corr - - def __eq__(self, other): - """Return whether two `QQCorrelation` instances are equal""" - return (isinstance(other, QQCorrelation) and - self.nbins == other.nbins and - self.bin_size == other.bin_size and - self.min_sep == other.min_sep and - self.max_sep == other.max_sep and - self.sep_units == other.sep_units and - self.coords == other.coords and - self.bin_type == other.bin_type and - self.bin_slop == other.bin_slop and - self.angle_slop == other.angle_slop and - self.min_rpar == other.min_rpar and - self.max_rpar == other.max_rpar and - self.xperiod == other.xperiod and - self.yperiod == other.yperiod and - self.zperiod == other.zperiod and - np.array_equal(self.meanr, other.meanr) and - np.array_equal(self.meanlogr, other.meanlogr) and - np.array_equal(self.xip, other.xip) and - np.array_equal(self.xim, other.xim) and - np.array_equal(self.xip_im, other.xip_im) and - np.array_equal(self.xim_im, other.xim_im) and - np.array_equal(self.varxip, other.varxip) and - np.array_equal(self.varxim, other.varxim) and - np.array_equal(self.weight, other.weight) and - np.array_equal(self.npairs, other.npairs)) - - def copy(self): - """Make a copy""" - ret = QQCorrelation.__new__(QQCorrelation) - for key, item in self.__dict__.items(): - if isinstance(item, np.ndarray): - # Only items that might change need to by deep copied. - ret.__dict__[key] = item.copy() - else: - # For everything else, shallow copy is fine. - # In particular don't deep copy config or logger - # Most of the rest are scalars, which copy fine this way. - # And the read-only things are all in _ro. - # The results dict is trickier. We rely on it being copied in places, but we - # never add more to it after the copy, so shallow copy is fine. - ret.__dict__[key] = item - ret._corr = None # We'll want to make a new one of these if we need it. - return ret - - def __repr__(self): - return f'QQCorrelation({self._repr_kwargs})' - - def process_auto(self, cat, *, metric=None, num_threads=None): - """Process a single catalog, accumulating the auto-correlation. - - This accumulates the weighted sums into the bins, but does not finalize - the calculation by dividing by the total weight at the end. After - calling this function as often as desired, the `finalize` command will - finish the calculation. - - Parameters: - cat (Catalog): The catalog to process - metric (str): Which metric to use. See `Metrics` for details. - (default: 'Euclidean'; this value can also be given in the - constructor in the config dict.) - num_threads (int): How many OpenMP threads to use during the calculation. - (default: use the number of cpu cores; this value can also be given - in the constructor in the config dict.) - """ - if cat.name == '': - self.logger.info('Starting process QQ auto-correlations') - else: - self.logger.info('Starting process QQ auto-correlations for cat %s.',cat.name) - - self._set_metric(metric, cat.coords) - self._set_num_threads(num_threads) - min_size, max_size = self._get_minmax_size() - - field = cat.getQField(min_size=min_size, max_size=max_size, - split_method=self.split_method, - brute=bool(self.brute), - min_top=self.min_top, max_top=self.max_top, - coords=self.coords) - - self.logger.info('Starting %d jobs.',field.nTopLevelNodes) - self.corr.processAuto(field.data, self.output_dots, self._metric) - - - def process_cross(self, cat1, cat2, *, metric=None, num_threads=None): - """Process a single pair of catalogs, accumulating the cross-correlation. - - This accumulates the weighted sums into the bins, but does not finalize - the calculation by dividing by the total weight at the end. After - calling this function as often as desired, the `finalize` command will - finish the calculation. - - Parameters: - cat1 (Catalog): The first catalog to process - cat2 (Catalog): The second catalog to process - metric (str): Which metric to use. See `Metrics` for details. - (default: 'Euclidean'; this value can also be given in the - constructor in the config dict.) - num_threads (int): How many OpenMP threads to use during the calculation. - (default: use the number of cpu cores; this value can also be given - in the constructor in the config dict.) - """ - if cat1.name == '' and cat2.name == '': - self.logger.info('Starting process QQ cross-correlations') - else: - self.logger.info('Starting process QQ cross-correlations for cats %s, %s.', - cat1.name, cat2.name) - - self._set_metric(metric, cat1.coords, cat2.coords) - self._set_num_threads(num_threads) - min_size, max_size = self._get_minmax_size() - - f1 = cat1.getQField(min_size=min_size, max_size=max_size, - split_method=self.split_method, - brute=self.brute is True or self.brute == 1, - min_top=self.min_top, max_top=self.max_top, - coords=self.coords) - f2 = cat2.getQField(min_size=min_size, max_size=max_size, - split_method=self.split_method, - brute=self.brute is True or self.brute == 2, - min_top=self.min_top, max_top=self.max_top, - coords=self.coords) - - self.logger.info('Starting %d jobs.',f1.nTopLevelNodes) - self.corr.processCross(f1.data, f2.data, self.output_dots, self._metric) - - def getStat(self): - """The standard statistic for the current correlation object as a 1-d array. - - In this case, this is the concatenation of self.xip and self.xim (raveled if necessary). - """ - return np.concatenate([self.xip.ravel(), self.xim.ravel()]) - - def getWeight(self): - """The weight array for the current correlation object as a 1-d array. - - This is the weight array corresponding to `getStat`. In this case, the weight is - duplicated to account for both xip and xim returned as part of getStat(). - """ - return np.concatenate([self.weight.ravel(), self.weight.ravel()]) - - def _finalize(self): - mask1 = self.weight != 0 - mask2 = self.weight == 0 - - self.xip[mask1] /= self.weight[mask1] - self.xim[mask1] /= self.weight[mask1] - self.xip_im[mask1] /= self.weight[mask1] - self.xim_im[mask1] /= self.weight[mask1] - self.meanr[mask1] /= self.weight[mask1] - self.meanlogr[mask1] /= self.weight[mask1] - - # Update the units of meanr, meanlogr - self._apply_units(mask1) - - # Use meanr, meanlogr when available, but set to nominal when no pairs in bin. - self.meanr[mask2] = self.rnom[mask2] - self.meanlogr[mask2] = self.logr[mask2] + super().__init__(config, logger=logger, **kwargs) def finalize(self, varq1, varq2): """Finalize the calculation of the correlation function. - The `process_auto` and `process_cross` commands accumulate values in each bin, - so they can be called multiple times if appropriate. Afterwards, this command + The `BaseZZCorrelation.process_auto` and `Corr2.process_cross` commands accumulate values + in each bin, so they can be called multiple times if appropriate. Afterwards, this command finishes the calculation by dividing each column by the total weight. Parameters: varq1 (float): The variance per component of the first spin-4 field. varq2 (float): The variance per component of the second spin-4 field. """ - self._finalize() - self._var_num = 2. * varq1 * varq2 - - @property - def varxip(self): - if self._varxip is None: - self._varxip = np.zeros_like(self.rnom, dtype=float) - if self._var_num != 0: - self._varxip.ravel()[:] = self.cov_diag[:self._nbins] - return self._varxip - - @property - def varxim(self): - if self._varxim is None: - self._varxim = np.zeros_like(self.rnom, dtype=float) - if self._var_num != 0: - self._varxim.ravel()[:] = self.cov_diag[self._nbins:] - return self._varxim - - def _clear(self): - """Clear the data vectors - """ - self.xip.ravel()[:] = 0 - self.xim.ravel()[:] = 0 - self.xip_im.ravel()[:] = 0 - self.xim_im.ravel()[:] = 0 - self.meanr.ravel()[:] = 0 - self.meanlogr.ravel()[:] = 0 - self.weight.ravel()[:] = 0 - self.npairs.ravel()[:] = 0 - self._varxip = None - self._varxim = None - self._cov = None - - def __iadd__(self, other): - """Add a second `QQCorrelation`'s data to this one. - - .. note:: - - For this to make sense, both `QQCorrelation` objects should not have had `finalize` - called yet. Then, after adding them together, you should call `finalize` on the sum. - """ - if not isinstance(other, QQCorrelation): - raise TypeError("Can only add another QQCorrelation object") - if not (self._nbins == other._nbins and - self.min_sep == other.min_sep and - self.max_sep == other.max_sep): - raise ValueError("QQCorrelation to be added is not compatible with this one.") - - self._set_metric(other.metric, other.coords, other.coords) - self.xip.ravel()[:] += other.xip.ravel()[:] - self.xim.ravel()[:] += other.xim.ravel()[:] - self.xip_im.ravel()[:] += other.xip_im.ravel()[:] - self.xim_im.ravel()[:] += other.xim_im.ravel()[:] - self.meanr.ravel()[:] += other.meanr.ravel()[:] - self.meanlogr.ravel()[:] += other.meanlogr.ravel()[:] - self.weight.ravel()[:] += other.weight.ravel()[:] - self.npairs.ravel()[:] += other.npairs.ravel()[:] - return self - - def _sum(self, others): - # Equivalent to the operation of: - # self._clear() - # for other in others: - # self += other - # but no sanity checks and use numpy.sum for faster calculation. - np.sum([c.xip for c in others], axis=0, out=self.xip) - np.sum([c.xim for c in others], axis=0, out=self.xim) - np.sum([c.xip_im for c in others], axis=0, out=self.xip_im) - np.sum([c.xim_im for c in others], axis=0, out=self.xim_im) - np.sum([c.meanr for c in others], axis=0, out=self.meanr) - np.sum([c.meanlogr for c in others], axis=0, out=self.meanlogr) - np.sum([c.weight for c in others], axis=0, out=self.weight) - np.sum([c.npairs for c in others], axis=0, out=self.npairs) - - def process(self, cat1, cat2=None, *, metric=None, num_threads=None, comm=None, low_mem=False, - initialize=True, finalize=True, patch_method='global'): - """Compute the correlation function. - - - If only 1 argument is given, then compute an auto-correlation function. - - If 2 arguments are given, then compute a cross-correlation function. - - Both arguments may be lists, in which case all items in the list are used - for that element of the correlation. - - Parameters: - cat1 (Catalog): A catalog or list of catalogs for the first Q field. - cat2 (Catalog): A catalog or list of catalogs for the second Q field, if any. - (default: None) - metric (str): Which metric to use. See `Metrics` for details. - (default: 'Euclidean'; this value can also be given in the - constructor in the config dict.) - num_threads (int): How many OpenMP threads to use during the calculation. - (default: use the number of cpu cores; this value can also be given - in the constructor in the config dict.) - comm (mpi4py.Comm): If running MPI, an mpi4py Comm object to communicate between - processes. If used, the rank=0 process will have the final - computation. This only works if using patches. (default: None) - low_mem (bool): Whether to sacrifice a little speed to try to reduce memory usage. - This only works if using patches. (default: False) - initialize (bool): Whether to begin the calculation with a call to - `Corr2.clear`. (default: True) - finalize (bool): Whether to complete the calculation with a call to `finalize`. - (default: True) - patch_method (str): Which patch method to use. (default: 'global') - """ - num_threads = 1 - import math - if initialize: - self.clear() - self._processed_cats1.clear() - self._processed_cats2.clear() - - if patch_method not in ['local', 'global']: - raise ValueError("Invalid patch_method %s"%patch_method) - local = patch_method == 'local' - - if not isinstance(cat1,list): - cat1 = cat1.get_patches(low_mem=low_mem) - if cat2 is not None and not isinstance(cat2,list): - cat2 = cat2.get_patches(low_mem=low_mem) - - if cat2 is None: - self._process_all_auto(cat1, metric, num_threads, comm, low_mem, local) - else: - self._process_all_cross(cat1, cat2, metric, num_threads, comm, low_mem, local) - - self._processed_cats1.extend(cat1) - if cat2 is not None: - self._processed_cats2.extend(cat2) - if finalize: - if cat2 is None: - varq1 = calculateVarQ(self._processed_cats1, low_mem=low_mem) - varq2 = varq1 - self.logger.info("varq = %f: sig_sn (per component) = %f",varq1,math.sqrt(varq1)) - else: - varq1 = calculateVarQ(self._processed_cats1, low_mem=low_mem) - varq2 = calculateVarQ(self._processed_cats2, low_mem=low_mem) - self.logger.info("varq1 = %f: sig_sn (per component) = %f",varq1,math.sqrt(varq1)) - self.logger.info("varq2 = %f: sig_sn (per component) = %f",varq2,math.sqrt(varq2)) - self.finalize(varq1,varq2) - self._processed_cats1.clear() - self._processed_cats2.clear() - - def write(self, file_name, *, file_type=None, precision=None, write_patch_results=False, - write_cov=False): - r"""Write the correlation function to the file, file_name. - - The output file will include the following columns: - - ========= ======================================================== - Column Description - ========= ======================================================== - r_nom The nominal center of the bin in r - meanr The mean value :math:`\langle r \rangle` of pairs that - fell into each bin - meanlogr The mean value :math:`\langle \log(r) \rangle` of pairs - that fell into each bin - xip The real part of the :math:`\xi_+` correlation function - xim The real part of the :math:`\xi_-` correlation function - xip_im The imag part of the :math:`\xi_+` correlation function - xim_im The imag part of the :math:`\xi_-` correlation function - sigma_xip The sqrt of the variance estimate of :math:`\xi_+` - sigma_xim The sqrt of the variance estimate of :math:`\xi_-` - weight The total weight contributing to each bin - npairs The total number of pairs in each bin - ========= ======================================================== - - If ``sep_units`` was given at construction, then the distances will all be in these units. - Otherwise, they will be in either the same units as x,y,z (for flat or 3d coordinates) or - radians (for spherical coordinates). - - Parameters: - file_name (str): The name of the file to write to. - file_type (str): The type of file to write ('ASCII' or 'FITS'). (default: determine - the type automatically from the extension of file_name.) - precision (int): For ASCII output catalogs, the desired precision. (default: 4; - this value can also be given in the constructor in the config dict.) - write_patch_results (bool): Whether to write the patch-based results as well. - (default: False) - write_cov (bool): Whether to write the covariance matrix as well. (default: False) - """ - self.logger.info('Writing QQ correlations to %s',file_name) - precision = self.config.get('precision', 4) if precision is None else precision - with make_writer(file_name, precision, file_type, self.logger) as writer: - self._write(writer, None, write_patch_results, write_cov=write_cov) - - @property - def _write_col_names(self): - return ['r_nom', 'meanr', 'meanlogr', 'xip', 'xim', 'xip_im', 'xim_im', - 'sigma_xip', 'sigma_xim', 'weight', 'npairs'] - - @property - def _write_data(self): - data = [ self.rnom, self.meanr, self.meanlogr, - self.xip, self.xim, self.xip_im, self.xim_im, - np.sqrt(self.varxip), np.sqrt(self.varxim), - self.weight, self.npairs ] - data = [ col.flatten() for col in data ] - return data - - @property - def _write_params(self): - params = make_minimal_config(self.config, Corr2._valid_params) - # Add in a couple other things we want to preserve that aren't construction kwargs. - params['coords'] = self.coords - params['metric'] = self.metric - return params - - @classmethod - def from_file(cls, file_name, *, file_type=None, logger=None, rng=None): - """Create a QQCorrelation instance from an output file. - - This should be a file that was written by TreeCorr. - - Parameters: - file_name (str): The name of the file to read in. - file_type (str): The type of file ('ASCII', 'FITS', or 'HDF'). (default: determine - the type automatically from the extension of file_name.) - logger (Logger): If desired, a logger object to use for logging. (default: None) - rng (RandomState): If desired, a numpy.random.RandomState instance to use for bootstrap - random number generation. (default: None) - - Returns: - corr: A QQCorrelation object, constructed from the information in the file. - """ - if logger: - logger.info('Building QQCorrelation from %s',file_name) - with make_reader(file_name, file_type, logger) as reader: - name = 'main' if 'main' in reader else None - params = reader.read_params(ext=name) - kwargs = make_minimal_config(params, Corr2._valid_params) - corr = cls(**kwargs, logger=logger, rng=rng) - corr.logger.info('Reading QQ correlations from %s',file_name) - corr._read(reader, name=name, params=params) - return corr - - def read(self, file_name, *, file_type=None): - """Read in values from a file. - - This should be a file that was written by TreeCorr, preferably a FITS or HDF5 file, so - there is no loss of information. - - .. warning:: - - The `QQCorrelation` object should be constructed with the same configuration - parameters as the one being read. e.g. the same min_sep, max_sep, etc. This is not - checked by the read function. - - Parameters: - file_name (str): The name of the file to read in. - file_type (str): The type of file ('ASCII' or 'FITS'). (default: determine the type - automatically from the extension of file_name.) - """ - self.logger.info('Reading QQ correlations from %s',file_name) - with make_reader(file_name, file_type, self.logger) as reader: - self._read(reader) - - # Helper function used by _read - def _read_from_data(self, data, params): - s = self.logr.shape - self.meanr = data['meanr'].reshape(s) - self.meanlogr = data['meanlogr'].reshape(s) - self.xip = data['xip'].reshape(s) - self.xim = data['xim'].reshape(s) - self.xip_im = data['xip_im'].reshape(s) - self.xim_im = data['xim_im'].reshape(s) - # Read old output files without error. - self._varxip = data['sigma_xip'].reshape(s)**2 - self._varxim = data['sigma_xim'].reshape(s)**2 - self.weight = data['weight'].reshape(s) - self.npairs = data['npairs'].reshape(s) - self.coords = params['coords'].strip() - self.metric = params['metric'].strip() - self.npatch1 = params.get('npatch1', 1) - self.npatch2 = params.get('npatch2', 1) + super().finalize(varq1, varq2) diff --git a/treecorr/reader.py b/treecorr/reader.py index 87288a83..f09c0b08 100644 --- a/treecorr/reader.py +++ b/treecorr/reader.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2020 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following diff --git a/treecorr/ttcorrelation.py b/treecorr/ttcorrelation.py index 356bc251..e78aec90 100644 --- a/treecorr/ttcorrelation.py +++ b/treecorr/ttcorrelation.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following @@ -19,12 +19,12 @@ from . import _treecorr from .catalog import calculateVarT -from .corr2base import Corr2 +from .zzcorrelation import BaseZZCorrelation from .util import make_writer, make_reader from .config import make_minimal_config -class TTCorrelation(Corr2): +class TTCorrelation(BaseZZCorrelation): r"""This class handles the calculation and storage of a 2-point spin-3-spin-3 correlation function. @@ -73,9 +73,9 @@ class TTCorrelation(Corr2): .. note:: - If you separate out the steps of the `process` command and use `process_auto` and/or - `process_cross`, then the units will not be applied to ``meanr`` or ``meanlogr`` until - the `finalize` function is called. + If you separate out the steps of the `Corr2.process` command and use + `BaseZZCorrelation.process_auto` and/or `Corr2.process_cross`, then the units will not be + applied to ``meanr`` or ``meanlogr`` until the `finalize` function is called. The typical usage pattern is as follows: @@ -96,481 +96,28 @@ class TTCorrelation(Corr2): **kwargs: See the documentation for `Corr2` for the list of allowed keyword arguments, which may be passed either directly or in the config dict. """ + _cls = 'TTCorrelation' + _letter1 = 'T' + _letter2 = 'T' + _letters = 'TT' + _builder = _treecorr.TTCorr + _calculateVar1 = staticmethod(calculateVarT) + _calculateVar2 = staticmethod(calculateVarT) + def __init__(self, config=None, *, logger=None, **kwargs): """Initialize `TTCorrelation`. See class doc for details. """ - Corr2.__init__(self, config, logger=logger, **kwargs) - - self.xip = np.zeros_like(self.rnom, dtype=float) - self.xim = np.zeros_like(self.rnom, dtype=float) - self.xip_im = np.zeros_like(self.rnom, dtype=float) - self.xim_im = np.zeros_like(self.rnom, dtype=float) - self.meanr = np.zeros_like(self.rnom, dtype=float) - self.meanlogr = np.zeros_like(self.rnom, dtype=float) - self.weight = np.zeros_like(self.rnom, dtype=float) - self.npairs = np.zeros_like(self.rnom, dtype=float) - self._varxip = None - self._varxim = None - self._cov = None - self._var_num = 0 - self._processed_cats1 = [] - self._processed_cats2 = [] - self.logger.debug('Finished building TTCorr') - - @property - def corr(self): - if self._corr is None: - self._corr = _treecorr.TTCorr(self._bintype, self._min_sep, self._max_sep, self._nbins, - self._bin_size, self.b, self.angle_slop, - self.min_rpar, self.max_rpar, - self.xperiod, self.yperiod, self.zperiod, - self.xip, self.xip_im, self.xim, self.xim_im, - self.meanr, self.meanlogr, self.weight, self.npairs) - return self._corr - - def __eq__(self, other): - """Return whether two `TTCorrelation` instances are equal""" - return (isinstance(other, TTCorrelation) and - self.nbins == other.nbins and - self.bin_size == other.bin_size and - self.min_sep == other.min_sep and - self.max_sep == other.max_sep and - self.sep_units == other.sep_units and - self.coords == other.coords and - self.bin_type == other.bin_type and - self.bin_slop == other.bin_slop and - self.angle_slop == other.angle_slop and - self.min_rpar == other.min_rpar and - self.max_rpar == other.max_rpar and - self.xperiod == other.xperiod and - self.yperiod == other.yperiod and - self.zperiod == other.zperiod and - np.array_equal(self.meanr, other.meanr) and - np.array_equal(self.meanlogr, other.meanlogr) and - np.array_equal(self.xip, other.xip) and - np.array_equal(self.xim, other.xim) and - np.array_equal(self.xip_im, other.xip_im) and - np.array_equal(self.xim_im, other.xim_im) and - np.array_equal(self.varxip, other.varxip) and - np.array_equal(self.varxim, other.varxim) and - np.array_equal(self.weight, other.weight) and - np.array_equal(self.npairs, other.npairs)) - - def copy(self): - """Make a copy""" - ret = TTCorrelation.__new__(TTCorrelation) - for key, item in self.__dict__.items(): - if isinstance(item, np.ndarray): - # Only items that might change need to by deep copied. - ret.__dict__[key] = item.copy() - else: - # For everything else, shallow copy is fine. - # In particular don't deep copy config or logger - # Most of the rest are scalars, which copy fine this way. - # And the read-only things are all in _ro. - # The results dict is trickier. We rely on it being copied in places, but we - # never add more to it after the copy, so shallow copy is fine. - ret.__dict__[key] = item - ret._corr = None # We'll want to make a new one of these if we need it. - return ret - - def __repr__(self): - return f'TTCorrelation({self._repr_kwargs})' - - def process_auto(self, cat, *, metric=None, num_threads=None): - """Process a single catalog, accumulating the auto-correlation. - - This accumulates the weighted sums into the bins, but does not finalize - the calculation by dividing by the total weight at the end. After - calling this function as often as desired, the `finalize` command will - finish the calculation. - - Parameters: - cat (Catalog): The catalog to process - metric (str): Which metric to use. See `Metrics` for details. - (default: 'Euclidean'; this value can also be given in the - constructor in the config dict.) - num_threads (int): How many OpenMP threads to use during the calculation. - (default: use the number of cpu cores; this value can also be given - in the constructor in the config dict.) - """ - if cat.name == '': - self.logger.info('Starting process TT auto-correlations') - else: - self.logger.info('Starting process TT auto-correlations for cat %s.',cat.name) - - self._set_metric(metric, cat.coords) - self._set_num_threads(num_threads) - min_size, max_size = self._get_minmax_size() - - field = cat.getTField(min_size=min_size, max_size=max_size, - split_method=self.split_method, - brute=bool(self.brute), - min_top=self.min_top, max_top=self.max_top, - coords=self.coords) - - self.logger.info('Starting %d jobs.',field.nTopLevelNodes) - self.corr.processAuto(field.data, self.output_dots, self._metric) - - - def process_cross(self, cat1, cat2, *, metric=None, num_threads=None): - """Process a single pair of catalogs, accumulating the cross-correlation. - - This accumulates the weighted sums into the bins, but does not finalize - the calculation by dividing by the total weight at the end. After - calling this function as often as desired, the `finalize` command will - finish the calculation. - - Parameters: - cat1 (Catalog): The first catalog to process - cat2 (Catalog): The second catalog to process - metric (str): Which metric to use. See `Metrics` for details. - (default: 'Euclidean'; this value can also be given in the - constructor in the config dict.) - num_threads (int): How many OpenMP threads to use during the calculation. - (default: use the number of cpu cores; this value can also be given - in the constructor in the config dict.) - """ - if cat1.name == '' and cat2.name == '': - self.logger.info('Starting process TT cross-correlations') - else: - self.logger.info('Starting process TT cross-correlations for cats %s, %s.', - cat1.name, cat2.name) - - self._set_metric(metric, cat1.coords, cat2.coords) - self._set_num_threads(num_threads) - min_size, max_size = self._get_minmax_size() - - f1 = cat1.getTField(min_size=min_size, max_size=max_size, - split_method=self.split_method, - brute=self.brute is True or self.brute == 1, - min_top=self.min_top, max_top=self.max_top, - coords=self.coords) - f2 = cat2.getTField(min_size=min_size, max_size=max_size, - split_method=self.split_method, - brute=self.brute is True or self.brute == 2, - min_top=self.min_top, max_top=self.max_top, - coords=self.coords) - - self.logger.info('Starting %d jobs.',f1.nTopLevelNodes) - self.corr.processCross(f1.data, f2.data, self.output_dots, self._metric) - - def getStat(self): - """The standard statistic for the current correlation object as a 1-d array. - - In this case, this is the concatenation of self.xip and self.xim (raveled if necessary). - """ - return np.concatenate([self.xip.ravel(), self.xim.ravel()]) - - def getWeight(self): - """The weight array for the current correlation object as a 1-d array. - - This is the weight array corresponding to `getStat`. In this case, the weight is - duplicated to account for both xip and xim returned as part of getStat(). - """ - return np.concatenate([self.weight.ravel(), self.weight.ravel()]) - - def _finalize(self): - mask1 = self.weight != 0 - mask2 = self.weight == 0 - - self.xip[mask1] /= self.weight[mask1] - self.xim[mask1] /= self.weight[mask1] - self.xip_im[mask1] /= self.weight[mask1] - self.xim_im[mask1] /= self.weight[mask1] - self.meanr[mask1] /= self.weight[mask1] - self.meanlogr[mask1] /= self.weight[mask1] - - # Update the units of meanr, meanlogr - self._apply_units(mask1) - - # Use meanr, meanlogr when available, but set to nominal when no pairs in bin. - self.meanr[mask2] = self.rnom[mask2] - self.meanlogr[mask2] = self.logr[mask2] + super().__init__(config, logger=logger, **kwargs) def finalize(self, vart1, vart2): """Finalize the calculation of the correlation function. - The `process_auto` and `process_cross` commands accumulate values in each bin, - so they can be called multiple times if appropriate. Afterwards, this command + The `BaseZZCorrelation.process_auto` and `Corr2.process_cross` commands accumulate values + in each bin, so they can be called multiple times if appropriate. Afterwards, this command finishes the calculation by dividing each column by the total weight. Parameters: vart1 (float): The variance per component of the first spin-3 field. vart2 (float): The variance per component of the second spin-3 field. """ - self._finalize() - self._var_num = 2. * vart1 * vart2 - - @property - def varxip(self): - if self._varxip is None: - self._varxip = np.zeros_like(self.rnom, dtype=float) - if self._var_num != 0: - self._varxip.ravel()[:] = self.cov_diag[:self._nbins] - return self._varxip - - @property - def varxim(self): - if self._varxim is None: - self._varxim = np.zeros_like(self.rnom, dtype=float) - if self._var_num != 0: - self._varxim.ravel()[:] = self.cov_diag[self._nbins:] - return self._varxim - - def _clear(self): - """Clear the data vectors - """ - self.xip.ravel()[:] = 0 - self.xim.ravel()[:] = 0 - self.xip_im.ravel()[:] = 0 - self.xim_im.ravel()[:] = 0 - self.meanr.ravel()[:] = 0 - self.meanlogr.ravel()[:] = 0 - self.weight.ravel()[:] = 0 - self.npairs.ravel()[:] = 0 - self._varxip = None - self._varxim = None - self._cov = None - - def __iadd__(self, other): - """Add a second `TTCorrelation`'s data to this one. - - .. note:: - - For this to make sense, both `TTCorrelation` objects should not have had `finalize` - called yet. Then, after adding them together, you should call `finalize` on the sum. - """ - if not isinstance(other, TTCorrelation): - raise TypeError("Can only add another TTCorrelation object") - if not (self._nbins == other._nbins and - self.min_sep == other.min_sep and - self.max_sep == other.max_sep): - raise ValueError("TTCorrelation to be added is not compatible with this one.") - - self._set_metric(other.metric, other.coords, other.coords) - self.xip.ravel()[:] += other.xip.ravel()[:] - self.xim.ravel()[:] += other.xim.ravel()[:] - self.xip_im.ravel()[:] += other.xip_im.ravel()[:] - self.xim_im.ravel()[:] += other.xim_im.ravel()[:] - self.meanr.ravel()[:] += other.meanr.ravel()[:] - self.meanlogr.ravel()[:] += other.meanlogr.ravel()[:] - self.weight.ravel()[:] += other.weight.ravel()[:] - self.npairs.ravel()[:] += other.npairs.ravel()[:] - return self - - def _sum(self, others): - # Equivalent to the operation of: - # self._clear() - # for other in others: - # self += other - # but no sanity checks and use numpy.sum for faster calculation. - np.sum([c.xip for c in others], axis=0, out=self.xip) - np.sum([c.xim for c in others], axis=0, out=self.xim) - np.sum([c.xip_im for c in others], axis=0, out=self.xip_im) - np.sum([c.xim_im for c in others], axis=0, out=self.xim_im) - np.sum([c.meanr for c in others], axis=0, out=self.meanr) - np.sum([c.meanlogr for c in others], axis=0, out=self.meanlogr) - np.sum([c.weight for c in others], axis=0, out=self.weight) - np.sum([c.npairs for c in others], axis=0, out=self.npairs) - - def process(self, cat1, cat2=None, *, metric=None, num_threads=None, comm=None, low_mem=False, - initialize=True, finalize=True, patch_method='global'): - """Compute the correlation function. - - - If only 1 argument is given, then compute an auto-correlation function. - - If 2 arguments are given, then compute a cross-correlation function. - - Both arguments may be lists, in which case all items in the list are used - for that element of the correlation. - - Parameters: - cat1 (Catalog): A catalog or list of catalogs for the first T field. - cat2 (Catalog): A catalog or list of catalogs for the second T field, if any. - (default: None) - metric (str): Which metric to use. See `Metrics` for details. - (default: 'Euclidean'; this value can also be given in the - constructor in the config dict.) - num_threads (int): How many OpenMP threads to use during the calculation. - (default: use the number of cpu cores; this value can also be given - in the constructor in the config dict.) - comm (mpi4py.Comm): If running MPI, an mpi4py Comm object to communicate between - processes. If used, the rank=0 process will have the final - computation. This only works if using patches. (default: None) - low_mem (bool): Whether to sacrifice a little speed to try to reduce memory usage. - This only works if using patches. (default: False) - initialize (bool): Whether to begin the calculation with a call to - `Corr2.clear`. (default: True) - finalize (bool): Whether to complete the calculation with a call to `finalize`. - (default: True) - patch_method (str): Which patch method to use. (default: 'global') - """ - import math - if initialize: - self.clear() - self._processed_cats1.clear() - self._processed_cats2.clear() - - if patch_method not in ['local', 'global']: - raise ValueError("Invalid patch_method %s"%patch_method) - local = patch_method == 'local' - - if not isinstance(cat1,list): - cat1 = cat1.get_patches(low_mem=low_mem) - if cat2 is not None and not isinstance(cat2,list): - cat2 = cat2.get_patches(low_mem=low_mem) - - if cat2 is None: - self._process_all_auto(cat1, metric, num_threads, comm, low_mem, local) - else: - self._process_all_cross(cat1, cat2, metric, num_threads, comm, low_mem, local) - - self._processed_cats1.extend(cat1) - if cat2 is not None: - self._processed_cats2.extend(cat2) - if finalize: - if cat2 is None: - vart1 = calculateVarT(self._processed_cats1, low_mem=low_mem) - vart2 = vart1 - self.logger.info("vart = %f: sig_sn (per component) = %f",vart1,math.sqrt(vart1)) - else: - vart1 = calculateVarT(self._processed_cats1, low_mem=low_mem) - vart2 = calculateVarT(self._processed_cats2, low_mem=low_mem) - self.logger.info("vart1 = %f: sig_sn (per component) = %f",vart1,math.sqrt(vart1)) - self.logger.info("vart2 = %f: sig_sn (per component) = %f",vart2,math.sqrt(vart2)) - self.finalize(vart1,vart2) - self._processed_cats1.clear() - self._processed_cats2.clear() - - def write(self, file_name, *, file_type=None, precision=None, write_patch_results=False, - write_cov=False): - r"""Write the correlation function to the file, file_name. - - The output file will include the following columns: - - ========= ======================================================== - Column Description - ========= ======================================================== - r_nom The nominal center of the bin in r - meanr The mean value :math:`\langle r \rangle` of pairs that - fell into each bin - meanlogr The mean value :math:`\langle \log(r) \rangle` of pairs - that fell into each bin - xip The real part of the :math:`\xi_+` correlation function - xim The real part of the :math:`\xi_-` correlation function - xip_im The imag part of the :math:`\xi_+` correlation function - xim_im The imag part of the :math:`\xi_-` correlation function - sigma_xip The sqrt of the variance estimate of :math:`\xi_+` - sigma_xim The sqrt of the variance estimate of :math:`\xi_-` - weight The total weight contributing to each bin - npairs The total number of pairs in each bin - ========= ======================================================== - - If ``sep_units`` was given at construction, then the distances will all be in these units. - Otherwise, they will be in either the same units as x,y,z (for flat or 3d coordinates) or - radians (for spherical coordinates). - - Parameters: - file_name (str): The name of the file to write to. - file_type (str): The type of file to write ('ASCII' or 'FITS'). (default: determine - the type automatically from the extension of file_name.) - precision (int): For ASCII output catalogs, the desired precision. (default: 4; - this value can also be given in the constructor in the config dict.) - write_patch_results (bool): Whether to write the patch-based results as well. - (default: False) - write_cov (bool): Whether to write the covariance matrix as well. (default: False) - """ - self.logger.info('Writing TT correlations to %s',file_name) - precision = self.config.get('precision', 4) if precision is None else precision - with make_writer(file_name, precision, file_type, self.logger) as writer: - self._write(writer, None, write_patch_results, write_cov=write_cov) - - @property - def _write_col_names(self): - return ['r_nom', 'meanr', 'meanlogr', 'xip', 'xim', 'xip_im', 'xim_im', - 'sigma_xip', 'sigma_xim', 'weight', 'npairs'] - - @property - def _write_data(self): - data = [ self.rnom, self.meanr, self.meanlogr, - self.xip, self.xim, self.xip_im, self.xim_im, - np.sqrt(self.varxip), np.sqrt(self.varxim), - self.weight, self.npairs ] - data = [ col.flatten() for col in data ] - return data - - @property - def _write_params(self): - params = make_minimal_config(self.config, Corr2._valid_params) - # Add in a couple other things we want to preserve that aren't construction kwargs. - params['coords'] = self.coords - params['metric'] = self.metric - return params - - @classmethod - def from_file(cls, file_name, *, file_type=None, logger=None, rng=None): - """Create a TTCorrelation instance from an output file. - - This should be a file that was written by TreeCorr. - - Parameters: - file_name (str): The name of the file to read in. - file_type (str): The type of file ('ASCII', 'FITS', or 'HDF'). (default: determine - the type automatically from the extension of file_name.) - logger (Logger): If desired, a logger object to use for logging. (default: None) - rng (RandomState): If desired, a numpy.random.RandomState instance to use for bootstrap - random number generation. (default: None) - - Returns: - corr: A TTCorrelation object, constructed from the information in the file. - """ - if logger: - logger.info('Building TTCorrelation from %s',file_name) - with make_reader(file_name, file_type, logger) as reader: - name = 'main' if 'main' in reader else None - params = reader.read_params(ext=name) - kwargs = make_minimal_config(params, Corr2._valid_params) - corr = cls(**kwargs, logger=logger, rng=rng) - corr.logger.info('Reading TT correlations from %s',file_name) - corr._read(reader, name=name, params=params) - return corr - - def read(self, file_name, *, file_type=None): - """Read in values from a file. - - This should be a file that was written by TreeCorr, preferably a FITS or HDF5 file, so - there is no loss of information. - - .. warning:: - - The `TTCorrelation` object should be constructed with the same configuration - parameters as the one being read. e.g. the same min_sep, max_sep, etc. This is not - checked by the read function. - - Parameters: - file_name (str): The name of the file to read in. - file_type (str): The type of file ('ASCII' or 'FITS'). (default: determine the type - automatically from the extension of file_name.) - """ - self.logger.info('Reading TT correlations from %s',file_name) - with make_reader(file_name, file_type, self.logger) as reader: - self._read(reader) - - # Helper function used by _read - def _read_from_data(self, data, params): - s = self.logr.shape - self.meanr = data['meanr'].reshape(s) - self.meanlogr = data['meanlogr'].reshape(s) - self.xip = data['xip'].reshape(s) - self.xim = data['xim'].reshape(s) - self.xip_im = data['xip_im'].reshape(s) - self.xim_im = data['xim_im'].reshape(s) - # Read old output files without error. - self._varxip = data['sigma_xip'].reshape(s)**2 - self._varxim = data['sigma_xim'].reshape(s)**2 - self.weight = data['weight'].reshape(s) - self.npairs = data['npairs'].reshape(s) - self.coords = params['coords'].strip() - self.metric = params['metric'].strip() - self.npatch1 = params.get('npatch1', 1) - self.npatch2 = params.get('npatch2', 1) + super().finalize(vart1, vart2) diff --git a/treecorr/util.py b/treecorr/util.py index b1ba2807..0fcc205e 100644 --- a/treecorr/util.py +++ b/treecorr/util.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following diff --git a/treecorr/vvcorrelation.py b/treecorr/vvcorrelation.py index 24e3406f..c6d999de 100644 --- a/treecorr/vvcorrelation.py +++ b/treecorr/vvcorrelation.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2019 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following @@ -19,12 +19,12 @@ from . import _treecorr from .catalog import calculateVarV -from .corr2base import Corr2 +from .zzcorrelation import BaseZZCorrelation from .util import make_writer, make_reader from .config import make_minimal_config -class VVCorrelation(Corr2): +class VVCorrelation(BaseZZCorrelation): r"""This class handles the calculation and storage of a 2-point vector-vector correlation function. @@ -73,9 +73,9 @@ class VVCorrelation(Corr2): .. note:: - If you separate out the steps of the `process` command and use `process_auto` and/or - `process_cross`, then the units will not be applied to ``meanr`` or ``meanlogr`` until - the `finalize` function is called. + If you separate out the steps of the `Corr2.process` command and use + `BaseZZCorrelation.process_auto` and/or `Corr2.process_cross`, then the units will not be + applied to ``meanr`` or ``meanlogr`` until the `finalize` function is called. The typical usage pattern is as follows: @@ -96,481 +96,28 @@ class VVCorrelation(Corr2): **kwargs: See the documentation for `Corr2` for the list of allowed keyword arguments, which may be passed either directly or in the config dict. """ + _cls = 'VVCorrelation' + _letter1 = 'V' + _letter2 = 'V' + _letters = 'VV' + _builder = _treecorr.VVCorr + _calculateVar1 = staticmethod(calculateVarV) + _calculateVar2 = staticmethod(calculateVarV) + def __init__(self, config=None, *, logger=None, **kwargs): """Initialize `VVCorrelation`. See class doc for details. """ - Corr2.__init__(self, config, logger=logger, **kwargs) - - self.xip = np.zeros_like(self.rnom, dtype=float) - self.xim = np.zeros_like(self.rnom, dtype=float) - self.xip_im = np.zeros_like(self.rnom, dtype=float) - self.xim_im = np.zeros_like(self.rnom, dtype=float) - self.meanr = np.zeros_like(self.rnom, dtype=float) - self.meanlogr = np.zeros_like(self.rnom, dtype=float) - self.weight = np.zeros_like(self.rnom, dtype=float) - self.npairs = np.zeros_like(self.rnom, dtype=float) - self._varxip = None - self._varxim = None - self._cov = None - self._var_num = 0 - self._processed_cats1 = [] - self._processed_cats2 = [] - self.logger.debug('Finished building VVCorr') - - @property - def corr(self): - if self._corr is None: - self._corr = _treecorr.VVCorr(self._bintype, self._min_sep, self._max_sep, self._nbins, - self._bin_size, self.b, self.angle_slop, - self.min_rpar, self.max_rpar, - self.xperiod, self.yperiod, self.zperiod, - self.xip, self.xip_im, self.xim, self.xim_im, - self.meanr, self.meanlogr, self.weight, self.npairs) - return self._corr - - def __eq__(self, other): - """Return whether two `VVCorrelation` instances are equal""" - return (isinstance(other, VVCorrelation) and - self.nbins == other.nbins and - self.bin_size == other.bin_size and - self.min_sep == other.min_sep and - self.max_sep == other.max_sep and - self.sep_units == other.sep_units and - self.coords == other.coords and - self.bin_type == other.bin_type and - self.bin_slop == other.bin_slop and - self.angle_slop == other.angle_slop and - self.min_rpar == other.min_rpar and - self.max_rpar == other.max_rpar and - self.xperiod == other.xperiod and - self.yperiod == other.yperiod and - self.zperiod == other.zperiod and - np.array_equal(self.meanr, other.meanr) and - np.array_equal(self.meanlogr, other.meanlogr) and - np.array_equal(self.xip, other.xip) and - np.array_equal(self.xim, other.xim) and - np.array_equal(self.xip_im, other.xip_im) and - np.array_equal(self.xim_im, other.xim_im) and - np.array_equal(self.varxip, other.varxip) and - np.array_equal(self.varxim, other.varxim) and - np.array_equal(self.weight, other.weight) and - np.array_equal(self.npairs, other.npairs)) - - def copy(self): - """Make a copy""" - ret = VVCorrelation.__new__(VVCorrelation) - for key, item in self.__dict__.items(): - if isinstance(item, np.ndarray): - # Only items that might change need to by deep copied. - ret.__dict__[key] = item.copy() - else: - # For everything else, shallow copy is fine. - # In particular don't deep copy config or logger - # Most of the rest are scalars, which copy fine this way. - # And the read-only things are all in _ro. - # The results dict is trickier. We rely on it being copied in places, but we - # never add more to it after the copy, so shallow copy is fine. - ret.__dict__[key] = item - ret._corr = None # We'll want to make a new one of these if we need it. - return ret - - def __repr__(self): - return f'VVCorrelation({self._repr_kwargs})' - - def process_auto(self, cat, *, metric=None, num_threads=None): - """Process a single catalog, accumulating the auto-correlation. - - This accumulates the weighted sums into the bins, but does not finalize - the calculation by dividing by the total weight at the end. After - calling this function as often as desired, the `finalize` command will - finish the calculation. - - Parameters: - cat (Catalog): The catalog to process - metric (str): Which metric to use. See `Metrics` for details. - (default: 'Euclidean'; this value can also be given in the - constructor in the config dict.) - num_threads (int): How many OpenMP threads to use during the calculation. - (default: use the number of cpu cores; this value can also be given - in the constructor in the config dict.) - """ - if cat.name == '': - self.logger.info('Starting process VV auto-correlations') - else: - self.logger.info('Starting process VV auto-correlations for cat %s.',cat.name) - - self._set_metric(metric, cat.coords) - self._set_num_threads(num_threads) - min_size, max_size = self._get_minmax_size() - - field = cat.getVField(min_size=min_size, max_size=max_size, - split_method=self.split_method, - brute=bool(self.brute), - min_top=self.min_top, max_top=self.max_top, - coords=self.coords) - - self.logger.info('Starting %d jobs.',field.nTopLevelNodes) - self.corr.processAuto(field.data, self.output_dots, self._metric) - - - def process_cross(self, cat1, cat2, *, metric=None, num_threads=None): - """Process a single pair of catalogs, accumulating the cross-correlation. - - This accumulates the weighted sums into the bins, but does not finalize - the calculation by dividing by the total weight at the end. After - calling this function as often as desired, the `finalize` command will - finish the calculation. - - Parameters: - cat1 (Catalog): The first catalog to process - cat2 (Catalog): The second catalog to process - metric (str): Which metric to use. See `Metrics` for details. - (default: 'Euclidean'; this value can also be given in the - constructor in the config dict.) - num_threads (int): How many OpenMP threads to use during the calculation. - (default: use the number of cpu cores; this value can also be given - in the constructor in the config dict.) - """ - if cat1.name == '' and cat2.name == '': - self.logger.info('Starting process VV cross-correlations') - else: - self.logger.info('Starting process VV cross-correlations for cats %s, %s.', - cat1.name, cat2.name) - - self._set_metric(metric, cat1.coords, cat2.coords) - self._set_num_threads(num_threads) - min_size, max_size = self._get_minmax_size() - - f1 = cat1.getVField(min_size=min_size, max_size=max_size, - split_method=self.split_method, - brute=self.brute is True or self.brute == 1, - min_top=self.min_top, max_top=self.max_top, - coords=self.coords) - f2 = cat2.getVField(min_size=min_size, max_size=max_size, - split_method=self.split_method, - brute=self.brute is True or self.brute == 2, - min_top=self.min_top, max_top=self.max_top, - coords=self.coords) - - self.logger.info('Starting %d jobs.',f1.nTopLevelNodes) - self.corr.processCross(f1.data, f2.data, self.output_dots, self._metric) - - def getStat(self): - """The standard statistic for the current correlation object as a 1-d array. - - In this case, this is the concatenation of self.xip and self.xim (raveled if necessary). - """ - return np.concatenate([self.xip.ravel(), self.xim.ravel()]) - - def getWeight(self): - """The weight array for the current correlation object as a 1-d array. - - This is the weight array corresponding to `getStat`. In this case, the weight is - duplicated to account for both xip and xim returned as part of getStat(). - """ - return np.concatenate([self.weight.ravel(), self.weight.ravel()]) - - def _finalize(self): - mask1 = self.weight != 0 - mask2 = self.weight == 0 - - self.xip[mask1] /= self.weight[mask1] - self.xim[mask1] /= self.weight[mask1] - self.xip_im[mask1] /= self.weight[mask1] - self.xim_im[mask1] /= self.weight[mask1] - self.meanr[mask1] /= self.weight[mask1] - self.meanlogr[mask1] /= self.weight[mask1] - - # Update the units of meanr, meanlogr - self._apply_units(mask1) - - # Use meanr, meanlogr when available, but set to nominal when no pairs in bin. - self.meanr[mask2] = self.rnom[mask2] - self.meanlogr[mask2] = self.logr[mask2] + super().__init__(config, logger=logger, **kwargs) def finalize(self, varv1, varv2): """Finalize the calculation of the correlation function. - The `process_auto` and `process_cross` commands accumulate values in each bin, - so they can be called multiple times if appropriate. Afterwards, this command + The `BaseZZCorrelation.process_auto` and `Corr2.process_cross` commands accumulate values + in each bin, so they can be called multiple times if appropriate. Afterwards, this command finishes the calculation by dividing each column by the total weight. Parameters: varv1 (float): The variance per component of the first vector field. varv2 (float): The variance per component of the second vector field. """ - self._finalize() - self._var_num = 2. * varv1 * varv2 - - @property - def varxip(self): - if self._varxip is None: - self._varxip = np.zeros_like(self.rnom, dtype=float) - if self._var_num != 0: - self._varxip.ravel()[:] = self.cov_diag[:self._nbins] - return self._varxip - - @property - def varxim(self): - if self._varxim is None: - self._varxim = np.zeros_like(self.rnom, dtype=float) - if self._var_num != 0: - self._varxim.ravel()[:] = self.cov_diag[self._nbins:] - return self._varxim - - def _clear(self): - """Clear the data vectors - """ - self.xip.ravel()[:] = 0 - self.xim.ravel()[:] = 0 - self.xip_im.ravel()[:] = 0 - self.xim_im.ravel()[:] = 0 - self.meanr.ravel()[:] = 0 - self.meanlogr.ravel()[:] = 0 - self.weight.ravel()[:] = 0 - self.npairs.ravel()[:] = 0 - self._varxip = None - self._varxim = None - self._cov = None - - def __iadd__(self, other): - """Add a second `VVCorrelation`'s data to this one. - - .. note:: - - For this to make sense, both `VVCorrelation` objects should not have had `finalize` - called yet. Then, after adding them together, you should call `finalize` on the sum. - """ - if not isinstance(other, VVCorrelation): - raise TypeError("Can only add another VVCorrelation object") - if not (self._nbins == other._nbins and - self.min_sep == other.min_sep and - self.max_sep == other.max_sep): - raise ValueError("VVCorrelation to be added is not compatible with this one.") - - self._set_metric(other.metric, other.coords, other.coords) - self.xip.ravel()[:] += other.xip.ravel()[:] - self.xim.ravel()[:] += other.xim.ravel()[:] - self.xip_im.ravel()[:] += other.xip_im.ravel()[:] - self.xim_im.ravel()[:] += other.xim_im.ravel()[:] - self.meanr.ravel()[:] += other.meanr.ravel()[:] - self.meanlogr.ravel()[:] += other.meanlogr.ravel()[:] - self.weight.ravel()[:] += other.weight.ravel()[:] - self.npairs.ravel()[:] += other.npairs.ravel()[:] - return self - - def _sum(self, others): - # Equivalent to the operation of: - # self._clear() - # for other in others: - # self += other - # but no sanity checks and use numpy.sum for faster calculation. - np.sum([c.xip for c in others], axis=0, out=self.xip) - np.sum([c.xim for c in others], axis=0, out=self.xim) - np.sum([c.xip_im for c in others], axis=0, out=self.xip_im) - np.sum([c.xim_im for c in others], axis=0, out=self.xim_im) - np.sum([c.meanr for c in others], axis=0, out=self.meanr) - np.sum([c.meanlogr for c in others], axis=0, out=self.meanlogr) - np.sum([c.weight for c in others], axis=0, out=self.weight) - np.sum([c.npairs for c in others], axis=0, out=self.npairs) - - def process(self, cat1, cat2=None, *, metric=None, num_threads=None, comm=None, low_mem=False, - initialize=True, finalize=True, patch_method='global'): - """Compute the correlation function. - - - If only 1 argument is given, then compute an auto-correlation function. - - If 2 arguments are given, then compute a cross-correlation function. - - Both arguments may be lists, in which case all items in the list are used - for that element of the correlation. - - Parameters: - cat1 (Catalog): A catalog or list of catalogs for the first V field. - cat2 (Catalog): A catalog or list of catalogs for the second V field, if any. - (default: None) - metric (str): Which metric to use. See `Metrics` for details. - (default: 'Euclidean'; this value can also be given in the - constructor in the config dict.) - num_threads (int): How many OpenMP threads to use during the calculation. - (default: use the number of cpu cores; this value can also be given - in the constructor in the config dict.) - comm (mpi4py.Comm): If running MPI, an mpi4py Comm object to communicate between - processes. If used, the rank=0 process will have the final - computation. This only works if using patches. (default: None) - low_mem (bool): Whether to sacrifice a little speed to try to reduce memory usage. - This only works if using patches. (default: False) - initialize (bool): Whether to begin the calculation with a call to - `Corr2.clear`. (default: True) - finalize (bool): Whether to complete the calculation with a call to `finalize`. - (default: True) - patch_method (str): Which patch method to use. (default: 'global') - """ - import math - if initialize: - self.clear() - self._processed_cats1.clear() - self._processed_cats2.clear() - - if patch_method not in ['local', 'global']: - raise ValueError("Invalid patch_method %s"%patch_method) - local = patch_method == 'local' - - if not isinstance(cat1,list): - cat1 = cat1.get_patches(low_mem=low_mem) - if cat2 is not None and not isinstance(cat2,list): - cat2 = cat2.get_patches(low_mem=low_mem) - - if cat2 is None: - self._process_all_auto(cat1, metric, num_threads, comm, low_mem, local) - else: - self._process_all_cross(cat1, cat2, metric, num_threads, comm, low_mem, local) - - self._processed_cats1.extend(cat1) - if cat2 is not None: - self._processed_cats2.extend(cat2) - if finalize: - if cat2 is None: - varv1 = calculateVarV(self._processed_cats1, low_mem=low_mem) - varv2 = varv1 - self.logger.info("varv = %f: sig_sn (per component) = %f",varv1,math.sqrt(varv1)) - else: - varv1 = calculateVarV(self._processed_cats1, low_mem=low_mem) - varv2 = calculateVarV(self._processed_cats2, low_mem=low_mem) - self.logger.info("varv1 = %f: sig_sn (per component) = %f",varv1,math.sqrt(varv1)) - self.logger.info("varv2 = %f: sig_sn (per component) = %f",varv2,math.sqrt(varv2)) - self.finalize(varv1,varv2) - self._processed_cats1.clear() - self._processed_cats2.clear() - - def write(self, file_name, *, file_type=None, precision=None, write_patch_results=False, - write_cov=False): - r"""Write the correlation function to the file, file_name. - - The output file will include the following columns: - - ========= ======================================================== - Column Description - ========= ======================================================== - r_nom The nominal center of the bin in r - meanr The mean value :math:`\langle r \rangle` of pairs that - fell into each bin - meanlogr The mean value :math:`\langle \log(r) \rangle` of pairs - that fell into each bin - xip The real part of the :math:`\xi_+` correlation function - xim The real part of the :math:`\xi_-` correlation function - xip_im The imag part of the :math:`\xi_+` correlation function - xim_im The imag part of the :math:`\xi_-` correlation function - sigma_xip The sqrt of the variance estimate of :math:`\xi_+` - sigma_xim The sqrt of the variance estimate of :math:`\xi_-` - weight The total weight contributing to each bin - npairs The total number of pairs in each bin - ========= ======================================================== - - If ``sep_units`` was given at construction, then the distances will all be in these units. - Otherwise, they will be in either the same units as x,y,z (for flat or 3d coordinates) or - radians (for spherical coordinates). - - Parameters: - file_name (str): The name of the file to write to. - file_type (str): The type of file to write ('ASCII' or 'FITS'). (default: determine - the type automatically from the extension of file_name.) - precision (int): For ASCII output catalogs, the desired precision. (default: 4; - this value can also be given in the constructor in the config dict.) - write_patch_results (bool): Whether to write the patch-based results as well. - (default: False) - write_cov (bool): Whether to write the covariance matrix as well. (default: False) - """ - self.logger.info('Writing VV correlations to %s',file_name) - precision = self.config.get('precision', 4) if precision is None else precision - with make_writer(file_name, precision, file_type, self.logger) as writer: - self._write(writer, None, write_patch_results, write_cov=write_cov) - - @property - def _write_col_names(self): - return ['r_nom', 'meanr', 'meanlogr', 'xip', 'xim', 'xip_im', 'xim_im', - 'sigma_xip', 'sigma_xim', 'weight', 'npairs'] - - @property - def _write_data(self): - data = [ self.rnom, self.meanr, self.meanlogr, - self.xip, self.xim, self.xip_im, self.xim_im, - np.sqrt(self.varxip), np.sqrt(self.varxim), - self.weight, self.npairs ] - data = [ col.flatten() for col in data ] - return data - - @property - def _write_params(self): - params = make_minimal_config(self.config, Corr2._valid_params) - # Add in a couple other things we want to preserve that aren't construction kwargs. - params['coords'] = self.coords - params['metric'] = self.metric - return params - - @classmethod - def from_file(cls, file_name, *, file_type=None, logger=None, rng=None): - """Create a VVCorrelation instance from an output file. - - This should be a file that was written by TreeCorr. - - Parameters: - file_name (str): The name of the file to read in. - file_type (str): The type of file ('ASCII', 'FITS', or 'HDF'). (default: determine - the type automatically from the extension of file_name.) - logger (Logger): If desired, a logger object to use for logging. (default: None) - rng (RandomState): If desired, a numpy.random.RandomState instance to use for bootstrap - random number generation. (default: None) - - Returns: - corr: A VVCorrelation object, constructed from the information in the file. - """ - if logger: - logger.info('Building VVCorrelation from %s',file_name) - with make_reader(file_name, file_type, logger) as reader: - name = 'main' if 'main' in reader else None - params = reader.read_params(ext=name) - kwargs = make_minimal_config(params, Corr2._valid_params) - corr = cls(**kwargs, logger=logger, rng=rng) - corr.logger.info('Reading VV correlations from %s',file_name) - corr._read(reader, name=name, params=params) - return corr - - def read(self, file_name, *, file_type=None): - """Read in values from a file. - - This should be a file that was written by TreeCorr, preferably a FITS or HDF5 file, so - there is no loss of information. - - .. warning:: - - The `VVCorrelation` object should be constructed with the same configuration - parameters as the one being read. e.g. the same min_sep, max_sep, etc. This is not - checked by the read function. - - Parameters: - file_name (str): The name of the file to read in. - file_type (str): The type of file ('ASCII' or 'FITS'). (default: determine the type - automatically from the extension of file_name.) - """ - self.logger.info('Reading VV correlations from %s',file_name) - with make_reader(file_name, file_type, self.logger) as reader: - self._read(reader) - - # Helper function used by _read - def _read_from_data(self, data, params): - s = self.logr.shape - self.meanr = data['meanr'].reshape(s) - self.meanlogr = data['meanlogr'].reshape(s) - self.xip = data['xip'].reshape(s) - self.xim = data['xim'].reshape(s) - self.xip_im = data['xip_im'].reshape(s) - self.xim_im = data['xim_im'].reshape(s) - # Read old output files without error. - self._varxip = data['sigma_xip'].reshape(s)**2 - self._varxim = data['sigma_xim'].reshape(s)**2 - self.weight = data['weight'].reshape(s) - self.npairs = data['npairs'].reshape(s) - self.coords = params['coords'].strip() - self.metric = params['metric'].strip() - self.npatch1 = params.get('npatch1', 1) - self.npatch2 = params.get('npatch2', 1) + super().finalize(varv1, varv2) diff --git a/treecorr/writer.py b/treecorr/writer.py index a1bb259d..7855a3cc 100644 --- a/treecorr/writer.py +++ b/treecorr/writer.py @@ -1,4 +1,4 @@ -# Copyright (c) 2003-2020 by Mike Jarvis +# Copyright (c) 2003-2024 by Mike Jarvis # # TreeCorr is free software: redistribution and use in source and binary forms, # with or without modification, are permitted provided that the following diff --git a/treecorr/zzcorrelation.py b/treecorr/zzcorrelation.py new file mode 100644 index 00000000..0c9223bb --- /dev/null +++ b/treecorr/zzcorrelation.py @@ -0,0 +1,317 @@ +# Copyright (c) 2003-2024 by Mike Jarvis +# +# TreeCorr is free software: redistribution and use in source and binary forms, +# with or without modification, are permitted provided that the following +# conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, this +# list of conditions, and the disclaimer given in the accompanying LICENSE +# file. +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions, and the disclaimer given in the documentation +# and/or other materials provided with the distribution. + +""" +.. module:: ggcorrelation +""" + +import numpy as np + +from . import _treecorr +from .catalog import calculateVarZ +from .corr2base import Corr2 +from .util import make_writer +from .config import make_minimal_config + + +class BaseZZCorrelation(Corr2): + """This class is a base class for all the ??Correlation classes, where both ?'s are one of the + complex fields of varying spin. + + A lot of the implementation is shared among those types, so whenever possible the shared + implementation is done in this class. + """ + _sig1 = 'sig_sn (per component)' + _sig2 = 'sig_sn (per component)' + + def __init__(self, config=None, *, logger=None, **kwargs): + super().__init__(config, logger=logger, **kwargs) + + self._xi1 = np.zeros_like(self.rnom, dtype=float) + self._xi2 = np.zeros_like(self.rnom, dtype=float) + self._xi3 = np.zeros_like(self.rnom, dtype=float) + self._xi4 = np.zeros_like(self.rnom, dtype=float) + self._varxip = None + self._varxim = None + self.logger.debug('Finished building %s', self._cls) + + @property + def xip(self): + return self._xi1 + + @property + def xip_im(self): + return self._xi2 + + @property + def xim(self): + return self._xi3 + + @property + def xim_im(self): + return self._xi4 + + def getStat(self): + """The standard statistic for the current correlation object as a 1-d array. + + In this case, this is the concatenation of self.xip and self.xim (raveled if necessary). + """ + return np.concatenate([self.xip.ravel(), self.xim.ravel()]) + + def getWeight(self): + """The weight array for the current correlation object as a 1-d array. + + This is the weight array corresponding to `getStat`. In this case, the weight is + duplicated to account for both xip and xim returned as part of getStat(). + """ + return np.concatenate([self.weight.ravel(), self.weight.ravel()]) + + def process_auto(self, cat, *, metric=None, num_threads=None): + """Process a single catalog, accumulating the auto-correlation. + + This accumulates the weighted sums into the bins, but does not finalize + the calculation by dividing by the total weight at the end. After + calling this function as often as desired, the `finalize` command will + finish the calculation. + + Parameters: + cat (Catalog): The catalog to process + metric (str): Which metric to use. See `Metrics` for details. + (default: 'Euclidean'; this value can also be given in the + constructor in the config dict.) + num_threads (int): How many OpenMP threads to use during the calculation. + (default: use the number of cpu cores; this value can also be given + in the constructor in the config dict.) + """ + super()._process_auto(cat, metric, num_threads) + + def finalize(self, varz1, varz2): + """Finalize the calculation of the correlation function. + + The `process_auto` and `Corr2.process_cross` commands accumulate values in each bin, + so they can be called multiple times if appropriate. Afterwards, this command + finishes the calculation by dividing each column by the total weight. + + Parameters: + varz1 (float): The variance per component of the first field. + varz2 (float): The variance per component of the second field. + """ + self._finalize() + self._var_num = 2. * varz1 * varz2 + + @property + def varxip(self): + if self._varxip is None: + self._varxip = np.zeros_like(self.rnom, dtype=float) + if self._var_num != 0: + self._varxip.ravel()[:] = self.cov_diag[:self._nbins] + return self._varxip + + @property + def varxim(self): + if self._varxim is None: + self._varxim = np.zeros_like(self.rnom, dtype=float) + if self._var_num != 0: + self._varxim.ravel()[:] = self.cov_diag[self._nbins:] + return self._varxim + + def _clear(self): + """Clear the data vectors + """ + super()._clear() + self._varxip = None + self._varxim = None + + def _sum(self, others): + # Equivalent to the operation of: + # self._clear() + # for other in others: + # self += other + # but no sanity checks and use numpy.sum for faster calculation. + np.sum([c._xi1 for c in others], axis=0, out=self._xi1) + np.sum([c._xi2 for c in others], axis=0, out=self._xi2) + np.sum([c._xi3 for c in others], axis=0, out=self._xi3) + np.sum([c._xi4 for c in others], axis=0, out=self._xi4) + np.sum([c.meanr for c in others], axis=0, out=self.meanr) + np.sum([c.meanlogr for c in others], axis=0, out=self.meanlogr) + np.sum([c.weight for c in others], axis=0, out=self.weight) + np.sum([c.npairs for c in others], axis=0, out=self.npairs) + self._varxip = None + self._varxim = None + self._cov = None + + def write(self, file_name, *, file_type=None, precision=None, write_patch_results=False, + write_cov=False): + r"""Write the correlation function to the file, file_name. + + The output file will include the following columns: + + ========= ======================================================== + Column Description + ========= ======================================================== + r_nom The nominal center of the bin in r + meanr The mean value :math:`\langle r \rangle` of pairs that + fell into each bin + meanlogr The mean value :math:`\langle \log(r) \rangle` of pairs + that fell into each bin + xip The real part of the :math:`\xi_+` correlation function + xim The real part of the :math:`\xi_-` correlation function + xip_im The imag part of the :math:`\xi_+` correlation function + xim_im The imag part of the :math:`\xi_-` correlation function + sigma_xip The sqrt of the variance estimate of :math:`\xi_+` + sigma_xim The sqrt of the variance estimate of :math:`\xi_-` + weight The total weight contributing to each bin + npairs The total number of pairs in each bin + ========= ======================================================== + + If ``sep_units`` was given at construction, then the distances will all be in these units. + Otherwise, they will be in either the same units as x,y,z (for flat or 3d coordinates) or + radians (for spherical coordinates). + + Parameters: + file_name (str): The name of the file to write to. + file_type (str): The type of file to write ('ASCII' or 'FITS'). (default: determine + the type automatically from the extension of file_name.) + precision (int): For ASCII output catalogs, the desired precision. (default: 4; + this value can also be given in the constructor in the config dict.) + write_patch_results (bool): Whether to write the patch-based results as well. + (default: False) + write_cov (bool): Whether to write the covariance matrix as well. (default: False) + """ + self.logger.info(f'Writing {self._letters} correlations to %s',file_name) + precision = self.config.get('precision', 4) if precision is None else precision + with make_writer(file_name, precision, file_type, self.logger) as writer: + self._write(writer, None, write_patch_results, write_cov=write_cov) + + @property + def _write_col_names(self): + return ['r_nom', 'meanr', 'meanlogr', 'xip', 'xim', 'xip_im', 'xim_im', + 'sigma_xip', 'sigma_xim', 'weight', 'npairs'] + + @property + def _write_data(self): + data = [ self.rnom, self.meanr, self.meanlogr, + self.xip, self.xim, self.xip_im, self.xim_im, + np.sqrt(self.varxip), np.sqrt(self.varxim), + self.weight, self.npairs ] + data = [ col.flatten() for col in data ] + return data + + def _read_from_data(self, data, params): + super()._read_from_data(data, params) + s = self.logr.shape + self.weight = data['weight'].reshape(s) + self._xi1 = data['xip'].reshape(s) + self._xi2 = data['xip_im'].reshape(s) + self._xi3 = data['xim'].reshape(s) + self._xi4 = data['xim_im'].reshape(s) + self._varxip = data['sigma_xip'].reshape(s)**2 + self._varxim = data['sigma_xim'].reshape(s)**2 + + +class ZZCorrelation(BaseZZCorrelation): + r"""This class handles the calculation and storage of a 2-point correlation function + of two complex spin-0 fields. If either spin-0 field is real, you should instead use + `KZCorrelation` as it will be faster, and if both are real, you should use `KKCorrelation`. + + To be consistent with the other spin correlation functions, we compute two quantities: + + .. math:: + + \xi_+ = \langle z_1 z_2^* \rangle + \xi_- = \langle z_1 z_2 \rangle + + There is no projection along the line connecting the two points as there is for the other + complex fields, since spin-0 fields don't change with orientation. + + Ojects of this class holds the following attributes: + + Attributes: + nbins: The number of bins in logr + bin_size: The size of the bins in logr + min_sep: The minimum separation being considered + max_sep: The maximum separation being considered + + In addition, the following attributes are numpy arrays of length (nbins): + + Attributes: + + logr: The nominal center of the bin in log(r) (the natural logarithm of r). + rnom: The nominal center of the bin converted to regular distance. + i.e. r = exp(logr). + meanr: The (weighted) mean value of r for the pairs in each bin. + If there are no pairs in a bin, then exp(logr) will be used instead. + meanlogr: The (weighted) mean value of log(r) for the pairs in each bin. + If there are no pairs in a bin, then logr will be used instead. + xip: The correlation function, :math:`\xi_+(r)`. + xim: The correlation function, :math:`\xi_-(r)`. + xip_im: The imaginary part of :math:`\xi_+(r)`. + xim_im: The imaginary part of :math:`\xi_-(r)`. + varxip: An estimate of the variance of :math:`\xi_+(r)` + varxim: An estimate of the variance of :math:`\xi_-(r)` + weight: The total weight in each bin. + npairs: The number of pairs going into each bin (including pairs where one or + both objects have w=0). + cov: An estimate of the full covariance matrix for the data vector with + :math:`\xi_+` first and then :math:`\xi_-`. + + .. note:: + + The default method for estimating the variance and covariance attributes (``varxip``, + ``varxim``, and ``cov``) is 'shot', which only includes the shape noise propagated into + the final correlation. This does not include sample variance, so it is always an + underestimate of the actual variance. To get better estimates, you need to set + ``var_method`` to something else and use patches in the input catalog(s). + cf. `Covariance Estimates`. + + If ``sep_units`` are given (either in the config dict or as a named kwarg) then the distances + will all be in these units. + + .. note:: + + If you separate out the steps of the `Corr2.process` command and use + `BaseZZCorrelation.process_auto` and/or `Corr2.process_cross`, then the units will not be + applied to ``meanr`` or ``meanlogr`` until the `BaseZZCorrelation.finalize` function is + called. + + The typical usage pattern is as follows: + + >>> zz = treecorr.ZZCorrelation(config) + >>> zz.process(cat) # For auto-correlation. + >>> zz.process(cat1,cat2) # For cross-correlation. + >>> zz.write(file_name) # Write out to a file. + >>> xip = zz.xip # Or access the correlation function directly. + + Parameters: + config (dict): A configuration dict that can be used to pass in kwargs if desired. + This dict is allowed to have addition entries besides those listed + in `Corr2`, which are ignored here. (default: None) + logger: If desired, a logger object for logging. (default: None, in which case + one will be built according to the config dict's verbose level.) + + Keyword Arguments: + **kwargs: See the documentation for `Corr2` for the list of allowed keyword + arguments, which may be passed either directly or in the config dict. + """ + _cls = 'ZZCorrelation' + _letter1 = 'Z' + _letter2 = 'Z' + _letters = 'ZZ' + _builder = _treecorr.ZZCorr + _calculateVar1 = staticmethod(calculateVarZ) + _calculateVar2 = staticmethod(calculateVarZ) + + def __init__(self, config=None, *, logger=None, **kwargs): + """Initialize `ZZCorrelation`. See class doc for details. + """ + super().__init__(config, logger=logger, **kwargs)