From a3fee2a6cb7f0cde3917d610bf5728313db9dee0 Mon Sep 17 00:00:00 2001 From: nuin Date: Wed, 11 Sep 2024 11:49:00 -0600 Subject: [PATCH] Add HTTPS file size retrieval to create_blast_db.py Implemented a function to retrieve file sizes over HTTPS and integrated it with the existing FTP file size logic. This ensures support for both protocols and improves flexibility in handling different types of URIs. --- src/create_blast_db.py | 7 +++++-- src/utils.py | 17 +++++++++++++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/src/create_blast_db.py b/src/create_blast_db.py index cc4d400..22f3752 100644 --- a/src/create_blast_db.py +++ b/src/create_blast_db.py @@ -32,7 +32,7 @@ from utils import (check_md5sum, check_output, edit_fasta, get_ftp_file_size, get_mod_from_json, needs_parse_id, run_command, s3_sync, - setup_logger, slack_message) + setup_logger, slack_message, get_https_file_size) # Load environment variables load_dotenv() @@ -129,7 +129,10 @@ def get_files_ftp(fasta_uri: str, md5sum: str) -> bool: return False try: - file_size = get_ftp_file_size(fasta_uri) + if fasta_uri.startswith('https'): + file_size = get_https_file_size(fasta_uri) + else: + file_size = get_ftp_file_size(fasta_uri) if file_size == 0: LOGGER.error(f"Failed to get file size for {fasta_uri}") return False diff --git a/src/utils.py b/src/utils.py index 64190e5..ec820b0 100644 --- a/src/utils.py +++ b/src/utils.py @@ -425,3 +425,20 @@ def get_ftp_file_size(fasta_uri: str) -> int: except Exception as e: console.log(f"Error getting FTP file size: {e}") return 0 + + +import requests + +def get_https_file_size(https_uri: str) -> int: + try: + response = requests.head(https_uri, allow_redirects=True) + if response.status_code == 200: + size = int(response.headers.get('content-length', 0)) + console.log(f"File size for {https_uri} is {size} bytes") + return size + else: + console.log(f"Couldn't determine size for {https_uri}. Status code: {response.status_code}") + return 0 + except Exception as e: + console.log(f"Error getting HTTPS file size: {e}") + return 0