Skip to content

Commit

Permalink
add some tests for minus strand variant calling
Browse files Browse the repository at this point in the history
  • Loading branch information
nickzoic committed Sep 24, 2024
1 parent 0bba814 commit 4bbf073
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 1 deletion.
8 changes: 7 additions & 1 deletion countess/utils/variant.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,6 @@ def find_variant_dna(ref_seq: str, var_seq: str, offset: int = 0) -> Iterable[st
>>> list(find_variant_dna("AGAAGTAGAGG", "ATAAGAAGAGG", -200))
['198G>T', '194T>A']
"""

ref_seq = ref_seq.strip().upper()
Expand Down Expand Up @@ -524,6 +523,13 @@ def find_variant_string(
>>> find_variant_string("p.", "ATGGTTGGTTCA", "ATGGCTGCTTCA")
'p.Val2_Gly3delinsAlaAla'
MINUS STRAND
this example is actually comparing TGTAATC and TCTGAAC ...
>>> find_variant_string("g.", "GATTACA", "GTTCAGA", minus_strand=True)
'g.[2G>C;3_4insG;6del]'
CHECK FOR INVALID INPUTS
>>> find_variant_string("x.", "CAT", "CAT")
Expand Down
39 changes: 39 additions & 0 deletions tests/plugins/test_variant.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,42 @@ def test_variant_ref_offset():
assert output[0]["out"] == "g.1A>T"
assert output[1]["out"] == "g.[17A>T;19A>T]"
assert output[2]["out"] == "g.[102G>T;106T>A]"

def test_variant_ref_offset_minus():
"""check that the reverse-complement behaviour works on the minus strand."""
# genes on the minus strand are reverse-complemented, so what we're actually
# comparing is the reverse-complemented sequences:
#
# 00000000011
# num 12345678901
# ref CCTCTACTTCT
# seq1 CCTCTACTTCA => 11T>A
# seq2 CCACAACTTCT => 3T>A;5T>A
# seq3 CCTCTTCTTAT => 6A>T;10C>A
#
# plus the offset

input_df = pd.DataFrame(
[
{"seq": "TGAAGTAGAGG" },
{"seq": "AGAAGTTGTGG" },
{"seq": "ATAAGAAGAGG" },
]
)

plugin = VariantPlugin()
plugin.set_parameter("column", "seq")
plugin.set_parameter("reference", "AGAAGTAGAGG")
plugin.set_parameter("outputs.0.offset", "1000")
plugin.set_parameter("outputs.0.seq_type", "g-")
plugin.set_parameter("outputs.0.output", "out")

plugin.prepare(["test"], None)

output_df = plugin.process_dataframe(input_df)

output = output_df.to_records()

assert output[0]["out"] == "g.1011T>A"
assert output[1]["out"] == "g.[1003T>A;1005T>A]"
assert output[2]["out"] == "g.[1006A>T;1010C>A]"

0 comments on commit 4bbf073

Please sign in to comment.