diff --git a/deeprvat/annotations/annotations.py b/deeprvat/annotations/annotations.py index 72eba673..e31016cc 100644 --- a/deeprvat/annotations/annotations.py +++ b/deeprvat/annotations/annotations.py @@ -1738,7 +1738,14 @@ def process_vep( ) if "#Uploaded_variation" in vep_file.columns: vep_file = vep_file.merge(vcf_df, on="#Uploaded_variation", how = 'left') - vep_file.loc[vep_file.chrom.isna(),['chrom','pos','ref','alt']]=vep_file[vep_file['chrom'].isna()]['#Uploaded_variation'].str.replace("_", ":").str.replace("/", ":").str.split(':', expand=True).values + if vep_file.chrom.isna().sum()>0: + vep_file.loc[vep_file.chrom.isna(),['chrom','pos','ref','alt']]=vep_file[vep_file['chrom'].isna()]['#Uploaded_variation'].str.replace("_", ":").str.replace("/", ":").str.split(':', expand=True).values + assert vep_file.chrom.isna().sum() == 0 + assert vep_file.pos.isna().sum() == 0 + assert vep_file.ref.isna().sum() == 0 + assert vep_file.alt.isna().sum() == 0 + + if "pos" in vep_file.columns: vep_file["pos"] = vep_file["pos"].astype(int) diff --git a/tests/annotations/test_annotations.py b/tests/annotations/test_annotations.py index 83ae128e..962ca10d 100644 --- a/tests/annotations/test_annotations.py +++ b/tests/annotations/test_annotations.py @@ -372,6 +372,74 @@ def test_merge_absplice_scores( assert written_results.shape == expected_results.shape assert_frame_equal(written_results, expected_results, check_exact = False) +@pytest.mark.parametrize( + "test_data_name_dir, genotype_file, variant_file, expected", + [ + ( "calculate_allele_frequency_small", + "genotypes.h5", + "variants.parquet", + "af_df.parquet", + ), + ] +) +def test_calculate_allele_frequencies( + test_data_name_dir, genotype_file, variant_file, expected, tmp_path +): + current_test_data_dir = tests_data_dir / 'calculate_allele_frequency' / test_data_name_dir + genotype_filepath = current_test_data_dir / 'input' / genotype_file + variant_filepath = current_test_data_dir / 'input' /variant_file + expected_path = current_test_data_dir / 'expected' / expected + output_path = tmp_path / 'out.parquet' + cli_runner = CliRunner() + cli_parameters = [ + 'get-af-from-gt', + genotype_filepath.as_posix(), + variant_filepath.as_posix(), + output_path.as_posix(), + ] + result = cli_runner.invoke(annotations_cli, cli_parameters, catch_exceptions=False) + assert result.exit_code == 0 + written_results = pd.read_parquet(output_path) + expected_results = pd.read_parquet(expected_path) + assert written_results.shape == expected_results.shape + assert_frame_equal(written_results, expected_results[written_results.columns], check_exact = False) + + + + +# @pytest.mark.parametrize( +# "test_data_name_dir, input_file_1, input_file_2, parameter1, expected", +# [ +# ( "test_name_dir", +# "input_file1.parquet", +# "input_file2.parquet", +# "8", +# "expected.parquet", +# ), +# ] +# ) +# def template( +# test_data_name_dir, input_file_1, input_file_2, parameter1, expected, tmp_path +# ): +# current_test_data_dir = tests_data_dir / 'test_name' / test_data_name_dir +# input_path1 = current_test_data_dir / 'input' / input_file_1 +# input_path2 = current_test_data_dir / 'input' /input_file_2 +# expected_path = current_test_data_dir / 'expected' / expected +# output_path = tmp_path / 'out.parquet' +# cli_runner = CliRunner() +# cli_parameters = [ +# 'function-name', +# input_path1.as_posix(), +# input_path2.as_posix(), +# output_path.as_posix(), +# parameter1, +# ] +# result = cli_runner.invoke(annotations_cli, cli_parameters, catch_exceptions=False) +# assert result.exit_code == 0 +# written_results = pd.read_parquet(output_path) +# expected_results = pd.read_parquet(expected_path) +# assert written_results.shape == expected_results.shape +# assert_frame_equal(written_results, expected_results[written_results.columns], check_exact = False) # @pytest.mark.parametrize( # "test_name_dir, input_file_1, input_file_2, parameter1, expected", diff --git a/tests/annotations/test_data/calculate_allele_frequency/calculate_allele_frequency_small/expected/af_df.parquet b/tests/annotations/test_data/calculate_allele_frequency/calculate_allele_frequency_small/expected/af_df.parquet new file mode 100644 index 00000000..d453427a Binary files /dev/null and b/tests/annotations/test_data/calculate_allele_frequency/calculate_allele_frequency_small/expected/af_df.parquet differ diff --git a/tests/annotations/test_data/calculate_allele_frequency/calculate_allele_frequency_small/input/genotypes.h5 b/tests/annotations/test_data/calculate_allele_frequency/calculate_allele_frequency_small/input/genotypes.h5 new file mode 100644 index 00000000..614d7a38 Binary files /dev/null and b/tests/annotations/test_data/calculate_allele_frequency/calculate_allele_frequency_small/input/genotypes.h5 differ diff --git a/tests/annotations/test_data/calculate_allele_frequency/calculate_allele_frequency_small/input/variants.parquet b/tests/annotations/test_data/calculate_allele_frequency/calculate_allele_frequency_small/input/variants.parquet new file mode 100644 index 00000000..37d317c4 Binary files /dev/null and b/tests/annotations/test_data/calculate_allele_frequency/calculate_allele_frequency_small/input/variants.parquet differ