diff --git a/app/indexer/pdfparser.py b/app/indexer/pdfparser.py index 260d89d..8e989e1 100644 --- a/app/indexer/pdfparser.py +++ b/app/indexer/pdfparser.py @@ -75,7 +75,10 @@ def extract_txt(url, contributor): except Exception: print("ERROR extracting body text from pdf...") remove(local_pdf_path) - remove(local_pdf_path.replace('.pdf','.txt')) + try: + remove(local_pdf_path.replace('.pdf','.txt')) + except: + pass return title, body_str, language, snippet, cc, error if title == "": @@ -87,16 +90,25 @@ def extract_txt(url, contributor): title = "" error = "ERROR extract_html: Couldn't detect page language." remove(local_pdf_path) - remove(local_pdf_path.replace('.pdf','.txt')) + try: + remove(local_pdf_path.replace('.pdf','.txt')) + except: + pass return title, body_str, language, snippet, cc, error if language not in installed_languages: error = "ERROR extract_html: language is not supported." title = "" remove(local_pdf_path) - remove(local_pdf_path.replace('.pdf','.txt')) + try: + remove(local_pdf_path.replace('.pdf','.txt')) + except: + pass return title, body_str, language, snippet, cc, error snippet = ' '.join(body_str.split()[:snippet_length]) remove(local_pdf_path) - remove(local_pdf_path.replace('.pdf','.txt')) + try: + remove(local_pdf_path.replace('.pdf','.txt')) + except: + pass return title, body_str, language, snippet, cc, error