From 782ea61998ebce34e10028ba7b80f1ebd1c19785 Mon Sep 17 00:00:00 2001 From: bookfere Date: Sat, 11 Nov 2023 15:47:55 +0800 Subject: [PATCH] fix: Identify pages by their type rather than a simple keyword. #169 --- lib/element.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/element.py b/lib/element.py index 6005561..67618a3 100644 --- a/lib/element.py +++ b/lib/element.py @@ -199,7 +199,7 @@ def load_element_patterns(self): def get_sorted_pages(self): return sorted( - [page for page in self.pages if 'html' in page.media_type], + [p for p in self.pages if isinstance(p.data, etree._Element)], key=lambda page: sorted_mixed_keys(page.href)) def get_elements(self):