Skip to content

Commit

Permalink
^q^
Browse files Browse the repository at this point in the history
  • Loading branch information
KurtBestor committed Jun 18, 2021
1 parent f253872 commit 09433e1
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 59 deletions.
23 changes: 13 additions & 10 deletions src/extractor/afreeca_downloader.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,13 @@
# uncompyle6 version 3.5.0
# Python bytecode 2.7 (62211)
# Decompiled from: Python 2.7.16 (v2.7.16:413a49145e, Mar 4 2019, 01:30:55) [MSC v.1500 32 bit (Intel)]
# Embedded file name: afreeca_downloader.pyo
# Compiled at: 2019-10-07 03:48:35
import downloader
from utils import Soup, Downloader, get_outdir, Session, LazyUrl, try_n, format_filename, clean_title
from utils import Soup, Downloader, get_outdir, Session, LazyUrl, try_n, format_filename, clean_title, get_print
import ree as re
from timee import sleep, time
import os
from io import BytesIO
import shutil
from m3u8_tools import playlist2stream, M3u8_stream
import errors


class Video(object):

Expand All @@ -33,7 +30,7 @@ class Downloader_afreeca(Downloader):

def read(self):
session = Session()
video = get_video(self.url, session)
video = get_video(self.url, session, self.cw)
self.urls.append(video.url)

self.setIcon(video.thumb)
Expand All @@ -53,19 +50,25 @@ def _get_stream(url_m3u8):


@try_n(8)
def get_video(url, session):
def get_video(url, session, cw):
print_ = get_print(cw)
while url.strip().endswith('/'):
url = url[:-1]

html = downloader.read_html(url, session=session)
if "document.location.href='https://login." in html:
raise errors.LoginRequired()
soup = Soup(html)
url_thumb = soup.find('meta', {'property': 'og:image'}).attrs['content']
params = re.findall('VodParameter *= *[\'"]([^\'"]+)[\'"]', html)[0]
print_('url_thumb: {}'.format(url_thumb))
params = re.find('VodParameter *= *[\'"]([^\'"]+)[\'"]', html, err='No VodParameter')
params += '&adultView=ADULT_VIEW&_={}'.format(int(time()*1000))
url_xml = 'http://afbbs.afreecatv.com:8080/api/video/get_video_info.php?' + params
url_xml = 'http://stbbs.afreecatv.com:8080/api/video/get_video_info.php?' + params
print(url_xml)
html = downloader.read_html(url_xml, session=session, referer=url)
soup = Soup(html)
if '<flag>PARTIAL_ADULT</flag>' in html:
raise errors.LoginRequired()
title = soup.find('title').string.strip()
urls_m3u8 = re.findall('https?://[^>]+playlist.m3u8', html)
if not urls_m3u8:
Expand Down
24 changes: 13 additions & 11 deletions src/extractor/daumtoon_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,16 @@ def get_id(url):
return header, body


def header_to_type(header):
if header == 'league_':
return 'leaguetoon'
return 'webtoon'


def get_info(url, session):
referer = url
header, id = get_id(referer)
if 'league_' in id:
type_ = 'leaguetoon'
else:
type_ = 'webtoon'
type_ = header_to_type(header)

info = {}
ids = set()
Expand Down Expand Up @@ -122,7 +125,7 @@ def init(self):
if '/viewer/' in self.url:
return self.Invalid(tr_('목록 주소를 입력해주세요: {}').format(self.url))
if '/view/' not in self.url and not self.url.lower().startswith('http'):
self.url = ('http://webtoon.daum.net/webtoon/view/{}').format(self.url)
self.url = 'http://webtoon.daum.net/webtoon/view/{}'.format(self.url)
self.session = None
self._info = get_info(self.url, self.session)

Expand All @@ -145,7 +148,6 @@ def read(self):

self.title = self.name
self.session = None
return


def get_imgs(page, session, cw):
Expand All @@ -158,15 +160,15 @@ def get_imgs(page, session, cw):
header, id = get_id(page.url)
t = int(time())
soup = Soup(html)
if 'league_' in id:
type_ = 'leaguetoon'
else:
type_ = 'webtoon'
type_ = header_to_type(header)

url_data = 'http://webtoon.daum.net/data/pc/{}/viewer/{}?timeStamp={}'.format(type_, id, t)
data_raw = downloader.read_html(url_data, session=session, referer=page.url)
data = json.loads(data_raw)
m_type = data['data']['webtoonEpisode']['multiType']
if header == 'league_':
m_type = None
else:
m_type = data['data']['webtoonEpisode']['multiType']
print_('m_type: {}'.format(m_type))

if m_type == 'chatting':
Expand Down
19 changes: 14 additions & 5 deletions src/extractor/kakaopage_downloader.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import downloader
import ree as re
from utils import Session, LazyUrl, Soup, Downloader, try_n, get_print, clean_title, print_error, urljoin
from utils import Session, LazyUrl, Soup, Downloader, try_n, get_print, clean_title, print_error, urljoin, get_imgs_already
from time import sleep
from translator import tr_
import page_selector
Expand Down Expand Up @@ -48,11 +48,13 @@ def read(self):
info = get_info(self.url, self.session, cw=self.cw)

for img in info['imgs']:
self.urls.append(img.url)
if isinstance(img, Image):
img = img.url
self.urls.append(img)

self.artist = info['artist']

self.title = clean_title('[{}] {}'.format(info['artist'], info['title']))
self.title = info['title']



Expand Down Expand Up @@ -150,22 +152,29 @@ def get_info(url, session, cw=None):
soup = Soup(html)

title = soup.find('h2').text.strip()
info['title'] = title
artist = soup.find('meta', {'name': 'author'})['content']
for x in [' ,', ', ']:
while x in artist:
artist = artist.replace(x, ',')
artist = artist.replace(',', ', ')
info['artist'] = artist
info['title_raw'] = title
info['title'] = clean_title('[{}] {}'.format(artist, title))

imgs = []

for i, page in enumerate(pages):
if cw is not None:
if not cw.alive:
return
cw.setTitle('{} {} / {} ({} / {})'.format(tr_('읽는 중...'), title, page.title, i + 1, len(pages)))
cw.setTitle('{} {} / {} ({} / {})'.format(tr_('읽는 중...'), info['title'], page.title, i + 1, len(pages)))

#3463
imgs_already = get_imgs_already('kakaopage', info['title'], page, cw)
if imgs_already:
imgs += imgs_already
continue

try:
_imgs = get_imgs_page(page, session)
e_msg = None
Expand Down
61 changes: 34 additions & 27 deletions src/extractor/pixiv_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,12 @@ def fix_url(cls, url):
url = 'https://www.pixiv.net/bookmark_new_illust.php'
elif not re.find(r'^https?://', url) and '.' not in url:
url = 'https://www.pixiv.net/en/users/{}'.format(url)

#3474
url = re.sub(r'(users/[0-9]+)/artworks$', r'\1', url)

url = re.sub(r'[?&]p=[0-9]+$', '', url)
return url
return url.strip('/')

@classmethod
def key_id(cls, url):
Expand Down Expand Up @@ -295,29 +299,30 @@ def get_info(url, cw=None, depth=0, tags_add=None):
if id_ is None: #
id_ = my_id()
if id_ == my_id():
rest = 'all'
rests = ['show', 'hide']
else:
rest = 'show'
rests = ['show']
process_user(id_, info, api)
info['title'] = '{} (pixiv_bmk_{})'.format(info['artist'], info['artist_id'])
ids = []
ids_set = set()
offset = 0
while len(ids) < max_pid:
data = api.bookmarks(id_, offset, rest=rest)
c = 0
for id in [work['id'] for work in data['works']]:
if id in ids_set:
continue
ids_set.add(id)
ids.append(id)
c += 1
if not c:
break
offset += LIMIT
if depth == 0:
check_alive(cw)
process_ids(ids[:max_pid], info, imgs, cw, depth)
for rest in rests:
offset = 0
while len(ids) < max_pid:
data = api.bookmarks(id_, offset, rest=rest)
c = 0
for id in [work['id'] for work in data['works']]:
if id in ids_set:
continue
ids_set.add(id)
ids.append(id)
c += 1
if not c:
break
offset += LIMIT
if depth == 0:
check_alive(cw)
process_ids(ids, info, imgs, cw, depth)
elif '/tags/' in url or 'search.php' in url: # Search
q = unquote(re.find(r'/tags/([^/]+)', url) or re.find('[?&]word=([^&]*)', url, err='no tags'))
info['title'] = '{} (pixiv_search_{})'.format(q, q.replace(' ', '+'))
Expand Down Expand Up @@ -364,7 +369,7 @@ def get_info(url, cw=None, depth=0, tags_add=None):
if not c:
break
p += 1
process_ids(ids[:max_pid], info, imgs, cw, depth)
process_ids(ids, info, imgs, cw, depth)
elif 'bookmark_new_illust.php' in url or 'bookmark_new_illust_r18.php' in url: # Newest works: Following
r18 = 'bookmark_new_illust_r18.php' in url
id_ = my_id()
Expand All @@ -384,16 +389,18 @@ def get_info(url, cw=None, depth=0, tags_add=None):
if not c:
break
p += 1
process_ids(ids[:max_pid], info, imgs, cw, depth)
process_ids(ids, info, imgs, cw, depth)
elif api.user_id(url): # User illusts
m = re.search(r'/users/[0-9]+/([\w]+)/?([^\?#/]*)', url)
if m is None:
types = ['illusts', 'manga']
tag = None
type_ = {'illustrations': 'illusts', 'manga': 'manga'}.get(m and m.groups()[0])
if type_:
types = [type_]
else:
type_ = m.groups()[0]
types = [{'illustrations': 'illusts'}.get(type_) or type_]
types = ['illusts', 'manga']
if m:
tag = unquote(m.groups()[1]) or None
else:
tag = None
print_('types: {}, tag: {}'.format(types, tag))

id_ = api.user_id(url)
Expand All @@ -410,7 +417,7 @@ def get_info(url, cw=None, depth=0, tags_add=None):
ids = sorted(ids, key=int, reverse=True)
if not ids:
raise Exception('no imgs')
process_ids(ids[:max_pid], info, imgs, cw, depth, tags_add=[tag] if tag else None)
process_ids(ids, info, imgs, cw, depth, tags_add=[tag] if tag else None)
else:
raise NotImplementedError()
info['imgs'] = imgs[:max_pid]
Expand Down
13 changes: 7 additions & 6 deletions src/extractor/rule34_xxx_downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,18 +35,19 @@ class Downloader_rule34_xxx(Downloader):
MAX_CORE = 8
display_name = 'Rule34.xxx'
_name = None

def init(self):
if 'rule34.xxx' in self.url.lower():
self.url = self.url.replace('http://', 'https://')

@classmethod
def fix_url(cls, url):
if 'rule34.xxx' in url.lower():
url = url.replace('http://', 'https://')
else:
url = self.url
url = url.replace(' ', '+')
while '++' in url:
url = url.replace('++', '+')
url = quote(url)
url = url.replace('%2B', '+')
self.url = u'https://rule34.xxx/index.php?page=post&s=list&tags={}'.format(url)
url = u'https://rule34.xxx/index.php?page=post&s=list&tags={}'.format(url)
return url

@property
def name(self):
Expand Down

0 comments on commit 09433e1

Please sign in to comment.