From bde077d23a32ed34c36408d0d811f7567eb428c3 Mon Sep 17 00:00:00 2001 From: Jean-Francois Simoneau Date: Sun, 22 Sep 2024 02:10:36 -0400 Subject: [PATCH] Add the ability to specify a robots.txt file, with a default disallowing the download links --- README.md | 1 + app/main.py | 11 +++++++++++ 2 files changed, 12 insertions(+) diff --git a/README.md b/README.md index d730f2e..d88dcc9 100644 --- a/README.md +++ b/README.md @@ -60,6 +60,7 @@ Certain values can be set via environment variables, using the `-e` parameter on * __DEFAULT_OPTION_PLAYLIST_ITEM_LIMIT__: Maximum numer of playlist items that can be downloaded. Defaults to `0` (no limit). * __YTDL_OPTIONS__: Additional options to pass to youtube-dl, in JSON format. [See available options here](https://github.com/yt-dlp/yt-dlp/blob/master/yt_dlp/YoutubeDL.py#L183). They roughly correspond to command-line options, though some do not have exact equivalents here, for example `--recode-video` has to be specified via `postprocessors`. Also note that dashes are replaced with underscores. * __YTDL_OPTIONS_FILE__: A path to a JSON file that will be loaded and used for populating `YTDL_OPTIONS` above. Please note that if both `YTDL_OPTIONS_FILE` and `YTDL_OPTIONS` are specified, the options in `YTDL_OPTIONS` take precedence. +* __ROBOTS_TXT__: A path to a `robots.txt` file mounted in the container The following example value for `YTDL_OPTIONS` embeds English subtitles and chapter markers (for videos that have them), and also changes the permissions on the downloaded video and sets the file modification timestamp to the date of when it was downloaded: diff --git a/app/main.py b/app/main.py index 9e10242..2ff3ec2 100644 --- a/app/main.py +++ b/app/main.py @@ -35,6 +35,7 @@ class Config: 'DEFAULT_OPTION_PLAYLIST_ITEM_LIMIT' : '0', 'YTDL_OPTIONS': '{}', 'YTDL_OPTIONS_FILE': '', + 'ROBOTS_TXT': '', 'HOST': '0.0.0.0', 'PORT': '8081', 'HTTPS': 'false', @@ -218,6 +219,16 @@ def index(request): response.set_cookie('metube_theme', config.DEFAULT_THEME) return response +@routes.get(config.URL_PREFIX + 'robots.txt') +def robots(request): + if config.ROBOTS_TXT: + response = web.FileResponse(os.path.join(config.BASE_DIR, config.ROBOTS_TXT)) + else: + response = web.Response( + text="User-agent: *\nDisallow: /download/\nDisallow: /audio_download/\n" + ) + return response + if config.URL_PREFIX != '/': @routes.get('/') def index_redirect_root(request):