Skip to content
This repository has been archived by the owner on Dec 30, 2020. It is now read-only.

Commit

Permalink
fix(icourse163): 修复部分课程中富文本意外匹配到附件的情况
Browse files Browse the repository at this point in the history
  • Loading branch information
SigureMo committed Dec 13, 2018
2 parents 2bd6151 + 4a1d517 commit c872b09
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 15 deletions.
15 changes: 15 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# EditorConfig
# https://editorconfig.org/

root = true

[*]
indent_style = space
indent_size = 2
end_of_line = lf
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true

[*.py]
indent_size = 4
8 changes: 3 additions & 5 deletions .github/ISSUE_TEMPLATE.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
## 错误简述
## 请在这里填写错误简述

网站:中国大学MOOC(网易云课堂 MOOC、学堂在线)

课程地址:(课程地址
课程地址:(请在这里填写课程地址

问题描述:(问题描述)

(请将相关内容替换,按照格式填写 issue,方便我快速找到错误)
问题描述:(请在这里填写问题描述)
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -108,3 +108,11 @@ ENV/

# course dir
* - */

# Visual Studio Code
.vscode/

# Node
node_modules/
yarn.lock
commitlint.config.js
20 changes: 11 additions & 9 deletions mooc/icourse163.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def get_summary(url):
term_id = re.search(r'termId : "(\d+)"', res).group(1)
names = re.findall(r'name:"(.+)"', res)

dir_name = course_dir(names[0], names[1])
dir_name = course_dir(*names[:2])

print(dir_name)
CONFIG['term_id'] = term_id
Expand All @@ -32,7 +32,7 @@ def parse_resource(resource):
'httpSessionId': '5531d06316b34b9486a6891710115ebc', 'c0-scriptName': 'CourseBean',
'c0-methodName': 'getLessonUnitLearnVo', 'c0-id': '0', 'c0-param0': 'number:' + resource.meta[0],
'c0-param1': 'number:' + resource.meta[1], 'c0-param2': 'number:0',
'c0-param3': 'number:' + resource.meta[2], 'batchId': str(int(time.time() * 1000))}
'c0-param3': 'number:' + resource.meta[2], 'batchId': str(int(time.time()) * 1000)}
res = CANDY.post('https://www.icourse163.org/dwr/call/plaincall/CourseBean.getLessonUnitLearnVo.dwr',
data=post_data).text

Expand Down Expand Up @@ -65,7 +65,7 @@ def parse_resource(resource):
FILES['renamer'].write(re.search(r'(\w+\.mp4)', url).group(1), file_name, ext)
FILES['video'].write_string(url)
resource.ext = ext

else:
resolutions = ['Shd', 'Hd', 'Sd']
for sp in resolutions[CONFIG['resolution']:]:
Expand Down Expand Up @@ -122,7 +122,7 @@ def get_resource(term_id):

post_data = {'callCount': '1', 'scriptSessionId': '${scriptSessionId}190', 'c0-scriptName': 'CourseBean',
'c0-methodName': 'getMocTermDto', 'c0-id': '0', 'c0-param0': 'number:' + term_id,
'c0-param1': 'number:0', 'c0-param2': 'boolean:true', 'batchId': str(int(time.time() * 1000))}
'c0-param1': 'number:0', 'c0-param2': 'boolean:true', 'batchId': str(int(time.time()) * 1000)}
res = CANDY.post('https://www.icourse163.org/dwr/call/plaincall/CourseBean.getMocTermDto.dwr',
data=post_data).text.encode('utf_8').decode('unicode_escape')

Expand Down Expand Up @@ -153,8 +153,8 @@ def get_resource(term_id):
pdf_list.append(Document(counter, pdf[3], pdf))
counter.reset()

rich_text = re.findall(r'contentId=(\d+).+contentType=(4).+id=(\d+).+jsonContent=(.+?";);.+lessonId=' +
lesson[0] + r'.+name="([\s\S]+?)"', res)
rich_text = re.findall(r'contentId=(\d+).+contentType=(4).+id=(\d+).+jsonContent=(.+?);.+lessonId=' +
lesson[0] + r'.+name="([\s\S]]+)"', res)
for text in rich_text:
counter.add(2)
outline.write(text[4], counter, 2, sign='+')
Expand Down Expand Up @@ -196,21 +196,23 @@ def start(url, config, cookies):
global WORK_DIR
CANDY.set_cookies(cookies)
CONFIG.update(config)

if cookies.get('NTESSTUDYSI'):
CONFIG['hasToken'] = True
CONFIG['token'] = cookies.get('NTESSTUDYSI')
else:
CONFIG['hasToken'] = False
course_info = get_summary(url)
term_id, dir_name = get_summary(url)

WORK_DIR = WorkingDir(CONFIG['dir'], course_info[1])
WORK_DIR = WorkingDir(CONFIG['dir'], dir_name)
WORK_DIR.change('Videos')
FILES['renamer'] = Renamer(WORK_DIR.file('Rename.{ext}'))
FILES['video'] = ClassicFile(WORK_DIR.file('Videos.txt'))

get_resource(course_info[0])
get_resource(term_id)

if CONFIG['aria2']:
del FILES['video']
WORK_DIR.change('Videos')
aria2_download(CONFIG['aria2'], WORK_DIR.path, webui=CONFIG['aria2-webui'], session=CONFIG['aria2-session'])

2 changes: 1 addition & 1 deletion mooc/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ class Crawler(requests.Session):
"""

header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/68.0.3440.106 Safari/537.36'}
'Chrome/71.0.3578.80 Safari/537.36'}

def __init__(self):
"""初始化 Session,并更新头部"""
Expand Down

0 comments on commit c872b09

Please sign in to comment.