Skip to content

Commit

Permalink
update GNPSFormat to support gnps2 (#292)
Browse files Browse the repository at this point in the history
Update GNPSFormat class and relevant function to support gnps2 data.
  • Loading branch information
CunliangGeng authored Jan 20, 2025
1 parent 07e750c commit d79ee60
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 15 deletions.
4 changes: 2 additions & 2 deletions src/nplinker/metabolomics/gnps/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from .gnps_format import GNPSFormat
from .gnps_format import gnps_format_from_archive
from .gnps_format import gnps_format_from_file_mapping
from .gnps_format import gnps_format_from_task_id
from .gnps_format import gnps_format_from_gnps1_task_id
from .gnps_molecular_family_loader import GNPSMolecularFamilyLoader
from .gnps_spectrum_loader import GNPSSpectrumLoader

Expand All @@ -20,5 +20,5 @@
"GNPSSpectrumLoader",
"gnps_format_from_archive",
"gnps_format_from_file_mapping",
"gnps_format_from_task_id",
"gnps_format_from_gnps1_task_id",
]
4 changes: 2 additions & 2 deletions src/nplinker/metabolomics/gnps/gnps_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from typing_extensions import Self
from nplinker.utils import download_url
from .gnps_format import GNPSFormat
from .gnps_format import gnps_format_from_task_id
from .gnps_format import gnps_format_from_gnps1_task_id


class GNPSDownloader:
Expand Down Expand Up @@ -42,7 +42,7 @@ def __init__(self, task_id: str, download_root: str | PathLike):
Examples:
>>> GNPSDownloader("c22f44b14a3d450eb836d607cb9521bb", "~/downloads")
"""
gnps_format = gnps_format_from_task_id(task_id)
gnps_format = gnps_format_from_gnps1_task_id(task_id)
if gnps_format == GNPSFormat.Unknown:
raise ValueError(
f"Unknown workflow type for GNPS task '{task_id}'."
Expand Down
21 changes: 13 additions & 8 deletions src/nplinker/metabolomics/gnps/gnps_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,6 @@
from bs4 import BeautifulSoup


GNPS_TASK_URL = "https://gnps.ucsd.edu/ProteoSAFe/status.jsp?task={}"


@unique
class GNPSFormat(Enum):
"""Enum class for GNPS formats or workflows.
Expand All @@ -24,20 +21,27 @@ class GNPSFormat(Enum):
"""

# Format: ShortName = "GNPSWorkflowName"
# For GNPS1
SNETS = "METABOLOMICS-SNETS"
SNETSV2 = "METABOLOMICS-SNETS-V2"
FBMN = "FEATURE-BASED-MOLECULAR-NETWORKING"
# For GNPS2
GNPS2CN = "classical_networking_workflow"
GNPS2FBMN = "feature_based_molecular_networking_workflow"
# Unknown format
Unknown = "Unknown-GNPS-Workflow"


def gnps_format_from_task_id(task_id: str) -> GNPSFormat:
"""Detect GNPS format for the given task id.
def gnps_format_from_gnps1_task_id(task_id: str) -> GNPSFormat:
"""Detect GNPS format or workflow for the given GNPS1 task id.
GNPS1 tasks are those generated on the platform https://gnps.ucsd.edu.
Args:
task_id: GNPS task id.
task_id: GNPS1 task id.
Returns:
The format identified in the GNPS task.
The format identified in the task.
Examples:
>>> gnps_format_from_task_id("c22f44b14a3d450eb836d607cb9521bb")
Expand All @@ -49,7 +53,8 @@ def gnps_format_from_task_id(task_id: str) -> GNPSFormat:
>>> gnps_format_from_task_id("0ad6535e34d449788f297e712f43068a")
<GNPSFormat.Unknown: 'Unknown-GNPS-Workflow'>
"""
task_html = httpx.get(GNPS_TASK_URL.format(task_id))
gnps1_task_url = "https://gnps.ucsd.edu/ProteoSAFe/status.jsp?task={}"
task_html = httpx.get(gnps1_task_url.format(task_id))
soup = BeautifulSoup(task_html.text, features="html.parser")
try:
# find the td tag that follows the th tag containing 'Workflow'
Expand Down
6 changes: 3 additions & 3 deletions tests/unit/metabolomics/test_gnps_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from nplinker.metabolomics.gnps import GNPSFormat
from nplinker.metabolomics.gnps import gnps_format_from_archive
from nplinker.metabolomics.gnps import gnps_format_from_file_mapping
from nplinker.metabolomics.gnps import gnps_format_from_task_id
from nplinker.metabolomics.gnps import gnps_format_from_gnps1_task_id


@pytest.mark.parametrize(
Expand All @@ -14,10 +14,10 @@
["0ad6535e34d449788f297e712f43068a", GNPSFormat.Unknown],
],
)
def test_gnps_format_from_task_id(task_id: str, expected: GNPSFormat, gnps_website_is_down):
def test_gnps_format_from_gnps1_task_id(task_id: str, expected: GNPSFormat, gnps_website_is_down):
if gnps_website_is_down:
pytest.skip("GNPS website is down")
actual = gnps_format_from_task_id(task_id)
actual = gnps_format_from_gnps1_task_id(task_id)
assert actual is expected


Expand Down

0 comments on commit d79ee60

Please sign in to comment.