From 093c4a9f299d07e0482a1f40f831e6f6e2474d8b Mon Sep 17 00:00:00 2001 From: Austin Cullar Date: Sun, 20 Oct 2024 12:31:47 -0600 Subject: [PATCH] - Add 2 new arguments - `--log-json`: log json to file, not console - `--log-file`: optional argument to specify location of log file - by default, this will be `astro_log.txt` in the current directory - Add contextmanager method to AstroLogger() to facilitate logging only to file - this is used to capture json responses, since these are very noisy - AstroLogger.print_object() has been replaced with print_video_data() - Trying to move away from rich tables, as these are not printed to log file - logging tests modified to account for log method changes --- src/astro.py | 21 ++++++----- src/data_collection/yt_data_api.py | 13 ++++++- src/log.py | 60 +++++++++++++++++++----------- src/tests/test_log.py | 7 ++-- 4 files changed, 65 insertions(+), 36 deletions(-) diff --git a/src/astro.py b/src/astro.py index 89cbb39..c81e27a 100644 --- a/src/astro.py +++ b/src/astro.py @@ -38,11 +38,14 @@ def parse_args(astro_theme): parser = argparse.ArgumentParser(description=description, formatter_class=ArgumentDefaultsRichHelpFormatter) - parser.add_argument("youtube_url", type=str, help="youtube video URL") - parser.add_argument("-l", "--log", type=str, choices=['debug', 'info', 'warn', 'error'], + parser.add_argument('youtube_url', type=str, help='youtube video URL') + parser.add_argument('-l', '--log', type=str, choices=['debug', 'info', 'warn', 'error'], help='Set the logging level', default='info') - parser.add_argument("--api-key", type=str, help="YouTube Data API key") - parser.add_argument("--db-file", type=str, help="database filename", default='astro.db') + parser.add_argument('--api-key', type=str, help='YouTube Data API key') + parser.add_argument('--db-file', type=str, help='database filename', default='astro.db') + parser.add_argument('--log-file', type=str, help='log output to specified file', default='astro_log.txt') + parser.add_argument('-j', '--log-json', type=bool, help='log json API responses', + default=False, action=argparse.BooleanOptionalAction) args = parser.parse_args() return args @@ -63,19 +66,19 @@ def main(): log_level = args.log if args.log else os.getenv("LOG_LEVEL") api_key = args.api_key if args.api_key else os.getenv("API_KEY") db_file = args.db_file if args.db_file else os.getenv("DB_FILE") + log_file = args.log_file if args.log_file else os.getenv("LOG_FILE") + log_json = args.log_json if args.log_json else os.getenv("LOG_JSON") # set up logging logging.setLoggerClass(AstroLogger) logger = logging.getLogger(__name__) - logger.astro_config(log_level, astro_theme) - - logger.info('Collecting video data...') + logger.astro_config(log_level, astro_theme, log_file=log_file) # collect metadata for provided video - youtube = YouTubeDataAPI(logger, api_key) + youtube = YouTubeDataAPI(logger, api_key, log_json) video_data = youtube.get_video_metadata(video_id) - logger.print_object(video_data, title="Video data") + logger.print_video_data(video_data) # check local database for existing data on provided video db = AstroDB(logger, db_file) diff --git a/src/data_collection/yt_data_api.py b/src/data_collection/yt_data_api.py index eccbe7b..01a37a7 100644 --- a/src/data_collection/yt_data_api.py +++ b/src/data_collection/yt_data_api.py @@ -4,6 +4,7 @@ import pandas as pd import traceback import string +import json from src.data_collection.data_structures import VideoData from googleapiclient.discovery import build @@ -13,10 +14,12 @@ class YouTubeDataAPI: logger = None api_key = None youtube = None + log_json = False - def __init__(self, logger, api_key): + def __init__(self, logger, api_key, log_json=False): self.logger = logger self.api_key = api_key + self.log_json = log_json self.youtube = build('youtube', 'v3', developerKey=self.api_key) @staticmethod @@ -119,6 +122,10 @@ def get_comments(self, video_data) -> pd.DataFrame: try: response = request.execute() + if self.log_json: + with self.logger.log_file_only(): + self.logger.debug(json.dumps(response, indent=4)) + comment_dataframe, comments_added = self.parse_comment_api_response(response, comment_dataframe) if 'nextPageToken' in response: # there are more comments to fetch page_token = response['nextPageToken'] @@ -155,6 +162,10 @@ def get_video_metadata(self, video_id: str) -> VideoData: try: response = request.execute() + if self.log_json: + with self.logger.log_file_only(): + self.logger.debug(json.dumps(response, indent=4)) + video_data = response['items'][0]['snippet'] video_stats = response['items'][0]['statistics'] diff --git a/src/log.py b/src/log.py index 3ccd3f8..af90cac 100644 --- a/src/log.py +++ b/src/log.py @@ -7,7 +7,7 @@ from rich.console import Console from rich.table import Table from rich.theme import Theme -from rich import print as rprint +from contextlib import contextmanager from src.progress import AstroProgress @@ -19,26 +19,42 @@ class AstroLogger(logging.Logger): astro_text_color: str astro_theme: Theme progress: AstroProgress + log_file: str + console_handler: RichHandler + file_handler: logging.FileHandler - def astro_config(self, log_level_str: str, astro_theme): + def astro_config(self, log_level_str: str, astro_theme, log_file='astro_log.txt'): """ Custom logging config. """ + # set log level self.log_level_str = log_level_str self.log_level = self.get_log_level(log_level_str) - self.setLevel(self.log_level) + # set color theme self.astro_theme = astro_theme # create console using the asto theme self.console = self.astro_theme.get_console() + # create log handlers + self.log_file = log_file + self.console_handler = RichHandler(rich_tracebacks=True, console=self.console) + self.file_handler = logging.FileHandler(self.log_file) + log_handlers = [self.console_handler, self.file_handler] + + # configure formatting for file handler + file_formatter = logging.Formatter( + '%(asctime)s:%(levelname)6s: %(filename)14s:%(lineno)-3d %(message)-60s', + '%Y-%m-%d %H:%M:%S') + + self.file_handler.setFormatter(file_formatter) + # configure logging logging.basicConfig(format='%(message)s', level=self.log_level, - handlers=[RichHandler(rich_tracebacks=True, - console=self.console)]) + handlers=log_handlers) # suppress google logs self.__suppress_logs('google', logging.WARNING) @@ -89,25 +105,18 @@ def __rich_table(self, title=''): return table - def print_object(self, obj, title=''): + def print_video_data(self, video_data): """ - Print the attributes of the provided object. Useful for debugging. + Print VideoData object. """ - if obj is None: + if video_data is None: return - # only print objects in debug or info mode - if self.log_level > logging.INFO: - return + exclude_fields = ['filtered_comment_count'] - table = self.__rich_table(title) - table.add_column("Attribute") - table.add_column("Value") - - for attr, value in obj.__dict__.items(): - table.add_row(attr, str(value)) - - self.console.print(table) + for attr, value in video_data.__dict__.items(): + if attr not in exclude_fields: + self.info(f'{attr:>20}: {str(value)}') def print_dataframe(self, df, title=''): """ @@ -145,8 +154,15 @@ def print_dataframe(self, df, title=''): self.console.print(table) - def print_json(self, json_obj): + @contextmanager + def log_file_only(self): """ - Easy way to print properly formatted json. + Provides a context in which logging will only go to the log file. """ - rprint(json_obj) + # temporarily raise log level of console handler + self.console_handler.setLevel(logging.CRITICAL) + + yield + + # restore original log level + self.console_handler.setLevel(self.log_level) diff --git a/src/tests/test_log.py b/src/tests/test_log.py index 5a39b8a..fde0684 100644 --- a/src/tests/test_log.py +++ b/src/tests/test_log.py @@ -28,10 +28,9 @@ def test_get_log_level(self, logger, level): assert str(exception.value) == "Invalid logger level specified: {}".format(level) - @pytest.mark.parametrize('obj', test_video_data) - @pytest.mark.parametrize('title', ['title1', 'title-2', 'title_3']) - def test_print_object(self, logger, obj, title): - logger.print_object(obj, title=title) + @pytest.mark.parametrize('video_data', test_video_data) + def test_print_video_data(self, logger, video_data): + logger.print_video_data(video_data) @pytest.mark.parametrize('title', ['title1', 'title-2', 'title_3']) def test_print_dataframe(self, logger, comment_dataframe, title):