fix(icourse163): 修复部分课程中富文本意外匹配到附件的情况

Fixes Foair#19, Foair#21
SigureMo · Dec 13, 2018 · c872b09 · c872b09
2 parents 2bd6151 + 4a1d517
commit c872b09
Show file tree

Hide file tree

Showing 5 changed files with 38 additions and 15 deletions.
diff --git a/.editorconfig b/.editorconfig
@@ -0,0 +1,15 @@
+# EditorConfig
+# https://editorconfig.org/
+
+root = true
+
+[*]
+indent_style = space
+indent_size = 2
+end_of_line = lf
+charset = utf-8
+trim_trailing_whitespace = true
+insert_final_newline = true
+
+[*.py]
+indent_size = 4
diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md
@@ -1,9 +1,7 @@
-## （错误简述）
+## （请在这里填写错误简述）
 
 网站：中国大学MOOC（网易云课堂 MOOC、学堂在线）
 
-课程地址：（课程地址）
+课程地址：（请在这里填写课程地址）
 
-问题描述：（问题描述）
-
-（请将相关内容替换，按照格式填写 issue，方便我快速找到错误）
+问题描述：（请在这里填写问题描述）
diff --git a/.gitignore b/.gitignore
@@ -108,3 +108,11 @@ ENV/
 
 # course dir
 * - */
+
+# Visual Studio Code
+.vscode/
+
+# Node
+node_modules/
+yarn.lock
+commitlint.config.js
diff --git a/mooc/icourse163.py b/mooc/icourse163.py
@@ -18,7 +18,7 @@ def get_summary(url):
     term_id = re.search(r'termId : "(\d+)"', res).group(1)
     names = re.findall(r'name:"(.+)"', res)
 
-    dir_name = course_dir(names[0], names[1])
+    dir_name = course_dir(*names[:2])
 
     print(dir_name)
     CONFIG['term_id'] = term_id
@@ -32,7 +32,7 @@ def parse_resource(resource):
                  'httpSessionId': '5531d06316b34b9486a6891710115ebc', 'c0-scriptName': 'CourseBean',
                  'c0-methodName': 'getLessonUnitLearnVo', 'c0-id': '0', 'c0-param0': 'number:' + resource.meta[0],
                  'c0-param1': 'number:' + resource.meta[1], 'c0-param2': 'number:0',
-                 'c0-param3': 'number:' + resource.meta[2], 'batchId': str(int(time.time() * 1000))}
+                 'c0-param3': 'number:' + resource.meta[2], 'batchId': str(int(time.time()) * 1000)}
     res = CANDY.post('https://www.icourse163.org/dwr/call/plaincall/CourseBean.getLessonUnitLearnVo.dwr',
                      data=post_data).text
 
@@ -65,7 +65,7 @@ def parse_resource(resource):
             FILES['renamer'].write(re.search(r'(\w+\.mp4)', url).group(1), file_name, ext)
             FILES['video'].write_string(url)
             resource.ext = ext
-        
+
         else:
             resolutions = ['Shd', 'Hd', 'Sd']
             for sp in resolutions[CONFIG['resolution']:]:
@@ -122,7 +122,7 @@ def get_resource(term_id):
 
     post_data = {'callCount': '1', 'scriptSessionId': '${scriptSessionId}190', 'c0-scriptName': 'CourseBean',
                  'c0-methodName': 'getMocTermDto', 'c0-id': '0', 'c0-param0': 'number:' + term_id,
-                 'c0-param1': 'number:0', 'c0-param2': 'boolean:true', 'batchId': str(int(time.time() * 1000))}
+                 'c0-param1': 'number:0', 'c0-param2': 'boolean:true', 'batchId': str(int(time.time()) * 1000)}
     res = CANDY.post('https://www.icourse163.org/dwr/call/plaincall/CourseBean.getMocTermDto.dwr',
                      data=post_data).text.encode('utf_8').decode('unicode_escape')
 
@@ -153,8 +153,8 @@ def get_resource(term_id):
                     pdf_list.append(Document(counter, pdf[3], pdf))
             counter.reset()
 
-            rich_text = re.findall(r'contentId=(\d+).+contentType=(4).+id=(\d+).+jsonContent=(.+?";);.+lessonId=' +
-                                   lesson[0] + r'.+name="([\s\S]+?)"', res)
+            rich_text = re.findall(r'contentId=(\d+).+contentType=(4).+id=(\d+).+jsonContent=(.+?);.+lessonId=' +
+                                   lesson[0] + r'.+name="([\s\S]]+)"', res)
             for text in rich_text:
                 counter.add(2)
                 outline.write(text[4], counter, 2, sign='+')
@@ -196,21 +196,23 @@ def start(url, config, cookies):
     global WORK_DIR
     CANDY.set_cookies(cookies)
     CONFIG.update(config)
+
     if cookies.get('NTESSTUDYSI'):
         CONFIG['hasToken'] = True
         CONFIG['token'] = cookies.get('NTESSTUDYSI')
     else:
         CONFIG['hasToken'] = False
-    course_info = get_summary(url)
+    term_id, dir_name = get_summary(url)
 
-    WORK_DIR = WorkingDir(CONFIG['dir'], course_info[1])
+    WORK_DIR = WorkingDir(CONFIG['dir'], dir_name)
     WORK_DIR.change('Videos')
     FILES['renamer'] = Renamer(WORK_DIR.file('Rename.{ext}'))
     FILES['video'] = ClassicFile(WORK_DIR.file('Videos.txt'))
 
-    get_resource(course_info[0])
+    get_resource(term_id)
 
     if CONFIG['aria2']:
         del FILES['video']
         WORK_DIR.change('Videos')
         aria2_download(CONFIG['aria2'], WORK_DIR.path, webui=CONFIG['aria2-webui'], session=CONFIG['aria2-session'])
+
diff --git a/mooc/utils.py b/mooc/utils.py
@@ -208,7 +208,7 @@ class Crawler(requests.Session):
     """
 
     header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
-                            'Chrome/68.0.3440.106 Safari/537.36'}
+                            'Chrome/71.0.3578.80 Safari/537.36'}
 
     def __init__(self):
         """初始化 Session，并更新头部"""