diff --git a/.Dockerfile b/.Dockerfile index 5c135bb..78fe856 100644 --- a/.Dockerfile +++ b/.Dockerfile @@ -30,6 +30,7 @@ COPY parser ./parser COPY tests ./tests COPY *.py . COPY default.database.ini . +COPY logging.ini . COPY .kubernetes.yml . #CMD ["uvicorn", "app.api:app", "--host", "0.0.0.0", "--port", "6000"] \ No newline at end of file diff --git a/logging.ini b/logging.ini new file mode 100644 index 0000000..1ec43e5 --- /dev/null +++ b/logging.ini @@ -0,0 +1,21 @@ +[loggers] +keys=root + +[handlers] +keys=fileHandler + +[formatters] +keys=simpleFormatter + +[logger_root] +level=DEBUG +handlers=fileHandler + +[handler_fileHandler] +class=FileHandler +level=DEBUG +formatter=simpleFormatter +args=('logs/xi_mzidentml_converter.log',) + +[formatter_simpleFormatter] +format=%(asctime)s %(name)s - %(levelname)s:%(message)s \ No newline at end of file diff --git a/process_dataset.py b/process_dataset.py index 3d94129..54af66d 100644 --- a/process_dataset.py +++ b/process_dataset.py @@ -6,7 +6,7 @@ import time import ftplib from urllib.parse import urlparse -import logging +import logging.config import gc import shutil from urllib.parse import urlparse @@ -15,6 +15,8 @@ from parser.writer import Writer from db_config_parser import get_conn_str +logging.config.fileConfig('logging.ini') +logger = logging.getLogger(__name__) def main(args): @@ -48,11 +50,11 @@ def main(args): def convert_pxd_accession(px_accession, temp_dir, dont_delete=False): # get ftp location from PX px_url = 'https://proteomecentral.proteomexchange.org/cgi/GetDataset?ID=' + px_accession + '&outputMode=JSON' - print('GET request to ProteomeExchange: ' + px_url) + logger.info('GET request to ProteomeExchange: ' + px_url) px_response = requests.get(px_url) r = requests.get(px_url) if r.status_code == 200: - print('ProteomeExchange returned status code 200') + logger.info('ProteomeExchange returned status code 200') px_json = px_response.json() ftp_url = None for dataSetLink in px_json['fullDatasetLinks']: @@ -70,11 +72,11 @@ def convert_pxd_accession(px_accession, temp_dir, dont_delete=False): def convert_pxd_accession_from_pride(px_accession, temp_dir, dont_delete=False): # get ftp location from PRIDE API px_url = 'https://www.ebi.ac.uk/pride/ws/archive/v2/files/byProject?accession=' + px_accession - print('GET request to PRIDE API: ' + px_url) + logger.info('GET request to PRIDE API: ' + px_url) pride_response = requests.get(px_url) r = requests.get(px_url) if r.status_code == 200: - print('PRIDE API returned status code 200') + logger.info('PRIDE API returned status code 200') pride_json = pride_response.json() ftp_url = None @@ -95,7 +97,7 @@ def convert_pxd_accession_from_pride(px_accession, temp_dir, dont_delete=False): parent_folder += segment + '/' ftp_url = parent_folder - print('PRIDE FTP path : ' + parent_folder) + logger.info('PRIDE FTP path : ' + parent_folder) break; convert_from_ftp(ftp_url, temp_dir, px_accession, dont_delete) if not ftp_url: @@ -110,10 +112,10 @@ def convert_from_ftp(ftp_url, temp_dir, project_identifier, dont_delete): try: os.mkdir(temp_dir) except OSError as e: - print('Failed to create temp directory ' + temp_dir) - print('Error: ' + e.strerror) + logger.error('Failed to create temp directory ' + temp_dir) + logger.error('Error: ' + e.strerror) raise e - print('FTP url: ' + ftp_url) + logger.info('FTP url: ' + ftp_url) parsed_url = urlparse(ftp_url) path = os.path.join(temp_dir, project_identifier) try: @@ -129,7 +131,7 @@ def convert_from_ftp(ftp_url, temp_dir, project_identifier, dont_delete): or f.lower().endswith('raw') or f.lower().endswith('raw.gz') or f.lower().endswith('all.zip')): - print('Downloading ' + f + ' to ' + path) + logger.info('Downloading ' + f + ' to ' + path) ftp = get_ftp_login(ftp_ip) try: ftp.cwd(parsed_url.path) @@ -146,8 +148,8 @@ def convert_from_ftp(ftp_url, temp_dir, project_identifier, dont_delete): try: shutil.rmtree(path) except OSError as e: - print('Failed to delete temp directory ' + path) - print('Error: ' + e.strerror) + logger.error('Failed to delete temp directory ' + path) + logger.error('Error: ' + e.strerror) raise e @@ -158,7 +160,7 @@ def get_ftp_login(ftp_ip): ftp.login() # Uses password: anonymous@ return ftp except ftplib.all_errors as e: - print('FTP fail at ' + time.strftime("%c")) + logger.error('FTP fail at ' + time.strftime("%c")) raise e @@ -168,17 +170,17 @@ def get_ftp_file_list(ftp_ip, ftp_dir): ftp.cwd(ftp_dir) except ftplib.error_perm as e: error_msg = "%s: %s" % (ftp_dir, e.args[0]) - print(error_msg) + logger.error(error_msg) ftp.quit() raise e try: filelist = ftp.nlst() except ftplib.error_perm as resp: if str(resp) == "550 No files found": - print("FTP: No files in this directory") + logger.info("FTP: No files in this directory") else: error_msg = "%s: %s" % (ftp_dir, ftplib.error_perm.args[0]) - print(error_msg) + logger.error(error_msg) raise resp ftp.close() return filelist @@ -191,13 +193,13 @@ def convert_dir(local_dir, project_identifier): # iterate over files in local_dir for file in os.listdir(local_dir): if file.endswith(".mzid") or file.endswith(".mzid.gz"): - print("Processing " + file) + logger.info("Processing " + file) conn_str = get_conn_str() writer = Writer(conn_str, pxid=project_identifier) id_parser = MzIdParser(os.path.join(local_dir, file), local_dir, local_dir, writer, logger) try: id_parser.parse() - # print(id_parser.warnings + "\n") + # logger.info(id_parser.warnings + "\n") except Exception as e: raise e gc.collect() @@ -224,5 +226,5 @@ def convert_dir(local_dir, project_identifier): main(parser.parse_args()) sys.exit(0) except Exception as ex: - print(ex) + logger.error(ex) sys.exit(1)