Skip to content

Commit

Permalink
- Add 2 new arguments
Browse files Browse the repository at this point in the history
    - `--log-json`: log json to file, not console
    - `--log-file`: optional argument to specify location of log file
        - by default, this will be `astro_log.txt` in the current directory
- Add contextmanager method to AstroLogger() to facilitate logging only to file
    - this is used to capture json responses, since these are very noisy
- AstroLogger.print_object() has been replaced with print_video_data()
    - Trying to move away from rich tables, as these are not printed to log file
- logging tests modified to account for log method changes
  • Loading branch information
AustinCullar committed Oct 20, 2024
1 parent bca033e commit 093c4a9
Show file tree
Hide file tree
Showing 4 changed files with 65 additions and 36 deletions.
21 changes: 12 additions & 9 deletions src/astro.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,14 @@ def parse_args(astro_theme):
parser = argparse.ArgumentParser(description=description,
formatter_class=ArgumentDefaultsRichHelpFormatter)

parser.add_argument("youtube_url", type=str, help="youtube video URL")
parser.add_argument("-l", "--log", type=str, choices=['debug', 'info', 'warn', 'error'],
parser.add_argument('youtube_url', type=str, help='youtube video URL')
parser.add_argument('-l', '--log', type=str, choices=['debug', 'info', 'warn', 'error'],
help='Set the logging level', default='info')
parser.add_argument("--api-key", type=str, help="YouTube Data API key")
parser.add_argument("--db-file", type=str, help="database filename", default='astro.db')
parser.add_argument('--api-key', type=str, help='YouTube Data API key')
parser.add_argument('--db-file', type=str, help='database filename', default='astro.db')
parser.add_argument('--log-file', type=str, help='log output to specified file', default='astro_log.txt')
parser.add_argument('-j', '--log-json', type=bool, help='log json API responses',
default=False, action=argparse.BooleanOptionalAction)
args = parser.parse_args()

return args
Expand All @@ -63,19 +66,19 @@ def main():
log_level = args.log if args.log else os.getenv("LOG_LEVEL")
api_key = args.api_key if args.api_key else os.getenv("API_KEY")
db_file = args.db_file if args.db_file else os.getenv("DB_FILE")
log_file = args.log_file if args.log_file else os.getenv("LOG_FILE")
log_json = args.log_json if args.log_json else os.getenv("LOG_JSON")

# set up logging
logging.setLoggerClass(AstroLogger)
logger = logging.getLogger(__name__)
logger.astro_config(log_level, astro_theme)

logger.info('Collecting video data...')
logger.astro_config(log_level, astro_theme, log_file=log_file)

# collect metadata for provided video
youtube = YouTubeDataAPI(logger, api_key)
youtube = YouTubeDataAPI(logger, api_key, log_json)
video_data = youtube.get_video_metadata(video_id)

logger.print_object(video_data, title="Video data")
logger.print_video_data(video_data)

# check local database for existing data on provided video
db = AstroDB(logger, db_file)
Expand Down
13 changes: 12 additions & 1 deletion src/data_collection/yt_data_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pandas as pd
import traceback
import string
import json

from src.data_collection.data_structures import VideoData
from googleapiclient.discovery import build
Expand All @@ -13,10 +14,12 @@ class YouTubeDataAPI:
logger = None
api_key = None
youtube = None
log_json = False

def __init__(self, logger, api_key):
def __init__(self, logger, api_key, log_json=False):
self.logger = logger
self.api_key = api_key
self.log_json = log_json
self.youtube = build('youtube', 'v3', developerKey=self.api_key)

@staticmethod
Expand Down Expand Up @@ -119,6 +122,10 @@ def get_comments(self, video_data) -> pd.DataFrame:

try:
response = request.execute()
if self.log_json:
with self.logger.log_file_only():
self.logger.debug(json.dumps(response, indent=4))

comment_dataframe, comments_added = self.parse_comment_api_response(response, comment_dataframe)
if 'nextPageToken' in response: # there are more comments to fetch
page_token = response['nextPageToken']
Expand Down Expand Up @@ -155,6 +162,10 @@ def get_video_metadata(self, video_id: str) -> VideoData:

try:
response = request.execute()
if self.log_json:
with self.logger.log_file_only():
self.logger.debug(json.dumps(response, indent=4))

video_data = response['items'][0]['snippet']
video_stats = response['items'][0]['statistics']

Expand Down
60 changes: 38 additions & 22 deletions src/log.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from rich.console import Console
from rich.table import Table
from rich.theme import Theme
from rich import print as rprint
from contextlib import contextmanager

from src.progress import AstroProgress

Expand All @@ -19,26 +19,42 @@ class AstroLogger(logging.Logger):
astro_text_color: str
astro_theme: Theme
progress: AstroProgress
log_file: str
console_handler: RichHandler
file_handler: logging.FileHandler

def astro_config(self, log_level_str: str, astro_theme):
def astro_config(self, log_level_str: str, astro_theme, log_file='astro_log.txt'):
"""
Custom logging config.
"""
# set log level
self.log_level_str = log_level_str
self.log_level = self.get_log_level(log_level_str)

self.setLevel(self.log_level)

# set color theme
self.astro_theme = astro_theme

# create console using the asto theme
self.console = self.astro_theme.get_console()

# create log handlers
self.log_file = log_file
self.console_handler = RichHandler(rich_tracebacks=True, console=self.console)
self.file_handler = logging.FileHandler(self.log_file)
log_handlers = [self.console_handler, self.file_handler]

# configure formatting for file handler
file_formatter = logging.Formatter(
'%(asctime)s:%(levelname)6s: %(filename)14s:%(lineno)-3d %(message)-60s',
'%Y-%m-%d %H:%M:%S')

self.file_handler.setFormatter(file_formatter)

# configure logging
logging.basicConfig(format='%(message)s',
level=self.log_level,
handlers=[RichHandler(rich_tracebacks=True,
console=self.console)])
handlers=log_handlers)

# suppress google logs
self.__suppress_logs('google', logging.WARNING)
Expand Down Expand Up @@ -89,25 +105,18 @@ def __rich_table(self, title=''):

return table

def print_object(self, obj, title=''):
def print_video_data(self, video_data):
"""
Print the attributes of the provided object. Useful for debugging.
Print VideoData object.
"""
if obj is None:
if video_data is None:
return

# only print objects in debug or info mode
if self.log_level > logging.INFO:
return
exclude_fields = ['filtered_comment_count']

table = self.__rich_table(title)
table.add_column("Attribute")
table.add_column("Value")

for attr, value in obj.__dict__.items():
table.add_row(attr, str(value))

self.console.print(table)
for attr, value in video_data.__dict__.items():
if attr not in exclude_fields:
self.info(f'{attr:>20}: {str(value)}')

def print_dataframe(self, df, title=''):
"""
Expand Down Expand Up @@ -145,8 +154,15 @@ def print_dataframe(self, df, title=''):

self.console.print(table)

def print_json(self, json_obj):
@contextmanager
def log_file_only(self):
"""
Easy way to print properly formatted json.
Provides a context in which logging will only go to the log file.
"""
rprint(json_obj)
# temporarily raise log level of console handler
self.console_handler.setLevel(logging.CRITICAL)

yield

# restore original log level
self.console_handler.setLevel(self.log_level)
7 changes: 3 additions & 4 deletions src/tests/test_log.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,9 @@ def test_get_log_level(self, logger, level):

assert str(exception.value) == "Invalid logger level specified: {}".format(level)

@pytest.mark.parametrize('obj', test_video_data)
@pytest.mark.parametrize('title', ['title1', 'title-2', 'title_3'])
def test_print_object(self, logger, obj, title):
logger.print_object(obj, title=title)
@pytest.mark.parametrize('video_data', test_video_data)
def test_print_video_data(self, logger, video_data):
logger.print_video_data(video_data)

@pytest.mark.parametrize('title', ['title1', 'title-2', 'title_3'])
def test_print_dataframe(self, logger, comment_dataframe, title):
Expand Down

0 comments on commit 093c4a9

Please sign in to comment.