Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use updated freq_bin_expr in gnomad_methods #6

Merged
merged 1 commit into from
Dec 19, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 1 addition & 69 deletions gnomad_toolbox/analysis/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,79 +3,11 @@
from typing import Dict, List, Optional, Tuple, Union

import hail as hl
from gnomad.assessment.summary_stats import freq_bin_expr

from gnomad_toolbox.load_data import _get_gnomad_release


# TODO: Modify this function in gnomad_methods.
def freq_bin_expr(
freq_expr: Union[hl.expr.StructExpression, hl.expr.ArrayExpression],
index: int = 0,
ac_cutoffs: Optional[List[Union[int, Tuple[int, str]]]] = [
(0, "AC0"),
(1, "singleton"),
(2, "doubleton"),
],
af_cutoffs: Optional[List[Union[float, Tuple[float, str]]]] = [
(1e-4, "0.01%"),
(1e-3, "0.1%"),
(1e-2, "1%"),
(1e-1, "10%"),
],
upper_af: Optional[Union[float, Tuple[float, str]]] = (0.95, "95%"),
) -> hl.expr.StringExpression:
"""
Return frequency string annotations based on input AC or AF.

.. note::

- Default index is 0 because function assumes freq_expr was calculated with
`annotate_freq`.
- Frequency index 0 from `annotate_freq` is frequency for all pops calculated
on adj genotypes only.

:param freq_expr: Array of structs containing frequency information.
:param index: Which index of freq_expr to use for annotation. Default is 0.
:param ac_cutoffs: List of AC cutoffs to use for binning.
:param af_cutoffs: List of AF cutoffs to use for binning.
:param upper_af: Upper AF cutoff to use for binning.
:return: StringExpression containing bin name based on input AC or AF.
"""
if isinstance(freq_expr, hl.expr.ArrayExpression):
freq_expr = freq_expr[index]

if ac_cutoffs and isinstance(ac_cutoffs[0], int):
ac_cutoffs = [(c, f"AC{c}") for c in ac_cutoffs]

if af_cutoffs and isinstance(af_cutoffs[0], float):
af_cutoffs = [(f, f"{f*100}%") for f in af_cutoffs]

if isinstance(upper_af, float):
upper_af = (upper_af, f"{upper_af*100}%")

freq_bin_expr = hl.case().when(hl.is_missing(freq_expr.AC), "Missing")
prev_af = None
for ac, name in sorted(ac_cutoffs):
freq_bin_expr = freq_bin_expr.when(freq_expr.AC == ac, name)
prev_af = name

for af, name in sorted(af_cutoffs):
prev_af = "<" if prev_af is None else f"{prev_af} - "
freq_bin_expr = freq_bin_expr.when(freq_expr.AF < af, f"{prev_af}{name}")
prev_af = name

if upper_af:
freq_bin_expr = freq_bin_expr.when(
freq_expr.AF > upper_af[0], f">{upper_af[1]}"
)
default_af = "<" if prev_af is None else f"{prev_af} - "
default_af = f"{default_af}{upper_af[1]}"
else:
default_af = f">{prev_af}"

return freq_bin_expr.default(default_af)


def get_variant_count_by_freq_bin(
af_cutoffs: List[float] = [0.001, 0.01],
singletons: bool = False,
Expand Down
Loading