From 1da6622136dfebca2bed50a80d2569b0b8bd3a89 Mon Sep 17 00:00:00 2001 From: Cunliang Geng Date: Tue, 21 Jan 2025 16:26:39 +0100 Subject: [PATCH] update GNPSMolecularFamilyLoader for gnps2 (#298) --- .../gnps/gnps_molecular_family_loader.py | 4 ++++ .../test_gnps_molecular_family_loader.py | 20 ++++++++++++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/nplinker/metabolomics/gnps/gnps_molecular_family_loader.py b/src/nplinker/metabolomics/gnps/gnps_molecular_family_loader.py index 3a77aaae..394685c8 100644 --- a/src/nplinker/metabolomics/gnps/gnps_molecular_family_loader.py +++ b/src/nplinker/metabolomics/gnps/gnps_molecular_family_loader.py @@ -21,6 +21,10 @@ class GNPSMolecularFamilyLoader(MolecularFamilyLoaderBase): - networkedges_selfloop/*.selfloop 3. FEATURE-BASED-MOLECULAR-NETWORKING - networkedges_selfloop/*.selfloop + 4. GNPS2 classical_networking_workflow + - nf_output/networking/filtered_pairs.tsv + 5. GNPS2 feature_based_molecular_networking_workflow + - nf_output/networking/filtered_pairs.tsv The `ComponentIndex` column in the GNPS molecular family file is treated as family id. diff --git a/tests/unit/metabolomics/test_gnps_molecular_family_loader.py b/tests/unit/metabolomics/test_gnps_molecular_family_loader.py index 99dd3f1a..8e376ad7 100644 --- a/tests/unit/metabolomics/test_gnps_molecular_family_loader.py +++ b/tests/unit/metabolomics/test_gnps_molecular_family_loader.py @@ -14,7 +14,7 @@ (GNPSFormat.FBMN, 60, 5, False), ], ) -def test_gnps_molecular_family_loader( +def test_gnps_molecular_family_loader_gnps1( workflow, num_families, num_spectra, keep_singleton, gnps_mf_files ): """Test GNPSMolecularFamilyLoader class.""" @@ -24,3 +24,21 @@ def test_gnps_molecular_family_loader( # test molecular family with id "1" has correct number of spectra ids mf = [mf for mf in actual if mf.id == "1"][0] assert len(mf.spectra_ids) == num_spectra + + +@pytest.mark.parametrize( + "workflow, num_families, num_spectra, keep_singleton", + [ + (GNPSFormat.GNPS2CN, 88, 66, True), + (GNPSFormat.GNPS2FBMN, 38, 7, True), + ], +) +def test_gnps_molecular_family_loader_gnps2( + workflow, num_families, num_spectra, keep_singleton, gnps2_mf_files +): + loader = GNPSMolecularFamilyLoader(gnps2_mf_files[workflow]) + actual = loader.get_mfs(keep_singleton=keep_singleton) + assert len(actual) == num_families + # test molecular family with id "1" has correct number of spectra ids + mf = [mf for mf in actual if mf.id == "1"][0] + assert len(mf.spectra_ids) == num_spectra