Skip to content

Commit

Permalink
Merge pull request #274 from monarch-initiative/uniprot-troubleshooting
Browse files Browse the repository at this point in the history
Minor Uniprot service updates
  • Loading branch information
ielis authored Sep 18, 2024
2 parents be67f1e + 67b701d commit a6caa22
Showing 1 changed file with 16 additions and 4 deletions.
20 changes: 16 additions & 4 deletions src/gpsea/preprocessing/_uniprot.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,10 @@ def __init__(
timeout: float = 30.,
):
self._logger = logging.getLogger(__name__)
self._url = 'https://rest.uniprot.org/uniprotkb/search?query=(%s)AND(reviewed:true)&fields=accession,id,' \
'gene_names,gene_primary,protein_name,ft_domain,ft_motif,ft_region,ft_repeat,xref_refseq,length'
self._headers = {'Content-type': 'application/json'}
self._url = 'https://rest.uniprot.org/uniprotkb/search?query=(%s)AND(reviewed:true)&(organism_id:9606)' \
'&fields=accession,id,gene_names,gene_primary,protein_name,ft_domain,ft_motif,ft_region,ft_repeat,' \
'xref_refseq,length'
self._timeout = timeout

@staticmethod
Expand Down Expand Up @@ -89,6 +91,17 @@ def _extract_metadata(protein_id: str, data: typing.Mapping[str, typing.Any]) ->

return ProteinMetadata(protein_id, protein_name, all_features_list, protein_length)

def _fetch_uniprot_response(
self,
protein_id: str,
) -> typing.Mapping[str, typing.Any]:
api_url = self._url.format(protein_id)
return requests.get(
api_url,
headers=self._headers,
timeout=self._timeout,
).json()

def annotate(self, protein_id: str) -> ProteinMetadata:
"""
Get metadata for given protein ID.
Expand All @@ -105,7 +118,6 @@ def annotate(self, protein_id: str) -> ProteinMetadata:
raise ValueError(f"Please remove whitespace from protein id: \"{protein_id}\" and try again!")
if not protein_id.startswith("NP_"):
raise ValueError(f"only works with a RefSeq database ID (e.g. NP_037407.4), but we got {protein_id}")
api_url = self._url % protein_id
response = requests.get(api_url, timeout=self._timeout).json()

response = self._fetch_uniprot_response(protein_id)
return UniprotProteinMetadataService.parse_uniprot_json(response, protein_id)

0 comments on commit a6caa22

Please sign in to comment.