HTML & Image Merge Bug FIX

HTML 생성, 이미지 병합 기능 버그 수정
pgh268400 · Nov 3, 2022 · 163d5b2 · 163d5b2
1 parent 5ee9800
commit 163d5b2
Show file tree

Hide file tree

Showing 4 changed files with 92 additions and 26 deletions.
diff --git a/main.py b/main.py
@@ -17,7 +17,7 @@
     print('<모드를 선택해주세요>')
     print('d : 다운로드')
     print('m : 이미지 병합')
-    print('h : html 생성')
+    print('h : HTML 생성')
     dialog = input('>>> ')
     if dialog.lower() == 'd':
         query = input("정보를 입력해주세요(웹툰ID, URL, 웹툰제목) : ")
@@ -60,7 +60,7 @@
         image.run()
         input('작업이 완료되었습니다.')
     elif dialog.lower() == 'h':
-        path = input("html 생성할 웹툰 경로를 입력해주세요 : ")
+        path = input("HTML을 생성할 웹툰 경로를 입력해주세요 : ")
         html = HtmlMaker(path)
         html.print_lists()
         html.run()

diff --git a/module/HtmlMaker.py b/module/HtmlMaker.py
@@ -3,6 +3,7 @@
 import chardet
 from .ImageMerger import ImageMerger
 from jinja2 import Template  # html 템플릿용
+import natsort
 
 # 파이썬 3에서는 모든 import 문은 기본적으로 절대(absolute) import다.
 # 만약 파이썬 3에서 상대 import를 사용하고 싶다면 위처럼 (.) 으로 명시적으로 표현을 해주어야 한다.
@@ -17,6 +18,7 @@ class HtmlMaker(ImageMerger):
     def __init__(self, path):
         super().__init__(path)  # 부모 생성자 호출
         self.__title = os.path.basename(path)  # 웹툰 제목
+        self.__user_input_path = path  # 사용자가 입력한 경로
 
     def __read_file(self, path):
         # 파일 열어서 인코딩 확인
@@ -40,6 +42,10 @@ def __read_file(self, path):
     # Python __ : private, _ : protected
     def _processing(self, file_lst: list):
         try:
+            # 파일리스트가 비었으면 처리하지 않는다
+            if not file_lst:
+                return
+
             rel_base_path = os.path.dirname(file_lst[0])  # 웹툰이 저장되어 있는 폴더 경로
             base_path = os.path.abspath(rel_base_path)  # 절대경로로 변환
             print("기반 경로 : ", base_path)
@@ -52,11 +58,21 @@ def _processing(self, file_lst: list):
 
             # html template 을 위한 데이터를 생성한다.
             episode = os.path.basename(os.path.dirname(file_lst[0]))
-            episode = episode.split()[1]
+            episode = " ".join(episode.split()[1:])
 
             img_lst = []
             for file in file_lst:
-                img_lst.append(os.path.basename(file))
+                # file 소문자로 변환
+                file = file.lower()
+
+
+
+                # 이미지 파일이고, output.png가 아닌 경우만 추가한다.
+                if file.endswith('output.png'):
+                    continue
+
+                if file.endswith('.png') or file.endswith('.jpg') or file.endswith('.jpeg'):
+                    img_lst.append(os.path.basename(file))
 
             # print(img_lst)
 
@@ -75,5 +91,40 @@ def _processing(self, file_lst: list):
         except Exception as e:
             raise e
 
-    def make_html(self):
-        self.run()  # 실제로 Processing 해주는 함수
+    def __make_index(self, user_input_path):
+        print(f"{user_input_path} 위치에 색인을 생성중입니다...")
+
+        # 현재 경로를 기준으로 모든 폴더를 리스트로 가져온다
+        dir_lst = os.listdir(user_input_path)
+        dir_lst = natsort.natsorted(dir_lst)  # natural sort 로 정렬
+
+        pure_name_lst = []
+
+        # 이름 리스트에서 앞의 순번은 제거
+        for element in dir_lst:
+            item = os.path.basename(element)
+            item = " ".join(item.split()[1:])
+            pure_name_lst.append(item)
+
+        html_path = [
+            os.path.join(self.__title, element, "index.html") for element in dir_lst]
+
+        item_lst = list(zip(html_path, pure_name_lst))
+
+        # template을 읽고, 데이터를 채운다.
+        html_data = self.__read_file("./module/template2.html")
+        html_data = Template(html_data).render(
+            title=self.__title, item_lst=item_lst)
+
+        # index.html 파일을 생성한다.
+        # index_path = os.path.join(user_input_path, 'index.html')
+        f = open(f'{self.__title}.html', 'w', encoding="UTF-8")
+        f.write(html_data)
+        f.close()
+        print(f"전체 인덱스 파일 생성 완료")
+
+    # ImageMerger 와 동일한 인터페이스 제공
+
+    def run(self):
+        self.__make_index(self.__user_input_path)  # html 전체 색인 생성 함수
+        super().run()  # 부모 클래스의 run() 호출, 실제로 각 폴더에 html Processing 해주는 함수
diff --git a/module/ImageMerger.py b/module/ImageMerger.py
@@ -60,9 +60,14 @@ def __vconcat_resize_min(self, im_list, interpolation=cv2.INTER_CUBIC):
                           for im in im_list]
         return cv2.vconcat(im_list_resize)
 
-     # 코드 참고 : https://stackoverflow.com/questions/53876007/how-to-vertically-merge-two-images
+    # 코드 참고 : https://stackoverflow.com/questions/53876007/how-to-vertically-merge-two-images
+    #  실제로 구현해야 하는 함수
     def _processing(self, file_lst: list):
         try:
+            # 파일리스트가 비었으면 아무것도 하지 않는다.
+            if not file_lst:
+                return
+
             rel_base_path = os.path.dirname(file_lst[0])  # 웹툰이 저장되어 있는 폴더 경로
             base_path = os.path.abspath(rel_base_path)  # 절대경로로 변환
             print("기반 경로 : ", base_path)
@@ -84,6 +89,9 @@ def _processing(self, file_lst: list):
 
             img_lst = []
             for image_file in file_lst:
+                # 소문자로 변환
+                image_file = image_file.lower()
+
                 # 이미지 읽기 (이미지 파일인경우만 수행)
                 if image_file.endswith('.png') or image_file.endswith('.jpg') or image_file.endswith('.jpeg'):
                     print("이미지 파일 : ", image_file)

diff --git a/module/Nwebtoon.py b/module/Nwebtoon.py
@@ -104,20 +104,22 @@ def filename_remover(self, string):
         cleaner = re.compile('<.*?>|&([a-z0-9]+|#[0-9]{1,6}|#x[0-9a-f]{1,6});')
         string = re.sub(cleaner, '', string)
 
+        # 끝에 . 제거 ex) test... -> test
         while string[-1] == '.':
-            string = string[:-1]  # 끝에 . 제거 ex) test... -> test
+            string = string[:-1]
 
+        # 폴더에 저장할 수 없는 문자 제거
         non_directory_letter = []
         if os.name == 'nt':
             non_directory_letter = ['/', ':', '*',
                                     '?', '<', '>', '|']  # 경로 금지 문자열 제거
         elif os.name == 'posix':
             non_directory_letter = [':', '*',
-                                    '?', '<', '>', '|']  # 경로 금지 문자열 제거
+                                    '?', '<', '>', '|']  # 경로 금지 문자열 제거 (리눅스에선 / 가 경로 구분자라 제거하지 않음)
 
-        for str_ in non_directory_letter:
-            if str_ in string:
-                string = string.replace(str_, "")
+        for char in non_directory_letter:
+            if char in string:
+                string = string.replace(char, "")
         return string
 
     def tag_remover(self, string):
@@ -139,16 +141,17 @@ def image_download(self, url, file_name):
     # 단일 이미지 다운로드
     def single_download(self, args):
         print(args, '화 다운로드 시작되었습니다')
-        url = "https://comic.naver.com/" + self.__wtype + \
-            "/detail.nhn?titleId=" + self.__title_id + "&no=" + args
+        # url = "https://comic.naver.com/" + self.__wtype + \
+        #     "/detail.nhn?titleId=" + self.__title_id + "&no=" + args
+        url = f"https://comic.naver.com/{self.__wtype}/detail.nhn?titleId={self.__title_id}&no={args}"
         req = requests.get(url)
         soup = BeautifulSoup(req.content, 'html.parser')
 
-        manga_title = soup.select('div.tit_area > div.view > h3')  # 웹툰 제목 가져오기
+        manga_title = soup.select('div.tit_area > div.view > h3')[
+            0].get_text()  # 웹툰 제목 가져오기
         # 리스트를 string 으로 바꾸고 불필요한 string 제거한다.
-        manga_title = self.tag_remover(str(manga_title[0]))
-        # path = str(self.__title) + '\\' + manga_title
-        path = os.path.join(str(self.__title), manga_title)
+        manga_title = self.tag_remover(manga_title)
+        path = os.path.join(self.__title, manga_title)
 
         try:
             print("path : ", path)
@@ -175,6 +178,7 @@ def single_download(self, args):
     def multi_download(self, dialog):
         global download_index
 
+        # 멀티 프로세싱을 이용한 병렬 다운로드 처리
         download_index = int(dialog.split('-')[0])
         core_count = multiprocessing.cpu_count() * 2
         download_range = dialog.split('-')
@@ -188,21 +192,25 @@ def get_image_link(self, args):
         global download_index
         result = []
         for i in range(int(args[0]), int(args[1]) + 1):
-            url = "https://comic.naver.com/" + self.__wtype + \
-                "/detail.nhn?titleId=" + self.__title_id + "&no=" + str(i)
+            # url = "https://comic.naver.com/" + self.__wtype + \
+            #     "/detail.nhn?titleId=" + self.__title_id + "&no=" + str(i)
+            # fstring으로 변경
+            url = f"https://comic.naver.com/{self.__wtype}/detail.nhn?titleId={self.__title_id}&no={i}"
+
             cookies = {'NID_AUT': self.NID_AUT, 'NID_SES': self.NID_SES}
             req = requests.get(url, cookies=cookies)
             soup = BeautifulSoup(req.content, 'html.parser')
+
             manga_title = soup.select(
-                'div.tit_area > div.view > h3')  # 웹툰 제목 가져오기
+                'div.tit_area > div.view > h3')[0].get_text()  # 웹툰 제목 가져오기
             # 리스트를 string 으로 바꾸고 불필요한 string 제거한다.
-            manga_title = self.filename_remover(str(manga_title[0]))
+            manga_title = self.filename_remover(manga_title)
 
-            idx = "[" + str(download_index) + "] "  # 순번매기기 형식 [0], [1]...
+            # idx = "[" + str(download_index) + "] "  # 순번매기기 형식 [0], [1]...
+            idx = f"[{download_index}] "
 
             # running_path = os.path.abspath(os.path.dirname(__file__))
-            img_path = os.path.join(str(
-                self.__title), idx + manga_title)
+            img_path = os.path.join(self.__title, idx + manga_title)
 
             path = self.filename_remover(img_path)
 
@@ -225,7 +233,6 @@ def get_image_link(self, args):
 
                 parsed = parse.urlparse(url)
                 name, ext = os.path.splitext(parsed.path)
-                # _path = path + "\\" + str(j) + ext
                 _path = os.path.join(path, str(j) + ext)
 
                 if not 'img-ctguide-white.png' in url:  # 컷툰이미지 제거하기