Skip to content

Commit

Permalink
Merge pull request #3 from Kinuseka/experimental
Browse files Browse the repository at this point in the history
Adding a CF bypass.
  • Loading branch information
Kinuseka authored Jul 14, 2022
2 parents a3928dc + c875933 commit be9a4ab
Show file tree
Hide file tree
Showing 8 changed files with 562 additions and 14 deletions.
194 changes: 194 additions & 0 deletions Lib/NHentai_cf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
from logging import exception
from bs4 import BeautifulSoup
import re
import json
import yaml
import requests
import pickle

#I recommend reading into the source code of the nhentai website to get a better understanding of what my code really does

site_domain = "net"
headers={"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.103 Safari/537.36"}
#Optional
def CheckLink(data, digit=False):
'''For MODDERS:
This part is where you modify your OWN link checker to your own target site to scrape.
Most of the time there wont be any major edits other than the website you want to check.
'''
if digit:
return(f"https://nhentai.{site_domain}/g/%s" % data)
if re.search(f"https?://nhentai.{site_domain}/g/(\d+|/)", data.lower()):
return(0, data)
else:
return(2, "Link is not nHentai")

#Main API
class Api:
# Use INIT to initialize the needed data, for increased and faster loading times to other functions
def __init__(self,data):
'''
argument 'data' should be a valid link the target booklet
'''

self.name = "NHentai" #Directory label
#NHENTAI SITE FORTUNATELY HAS A DEDICATED JSON EMBEDDED INTO A SCRIPT FILE THAT YOU CAN USE TO GAIN INFORMATION FROM THE SITE.
#DIFFERENT SITES MIGHT NOT HAVE A JSON FILE SO YOU WILL HAVE TO DO THE PROCESS MANUALLY

#Initialize path
self.cookie_path = HSite.cookie_path
self.session_path = HSite.session_path
self._data_level = data
#Check token
self.__reload_cf_token()

#Load Cookies
s = requests.Session()
self.__set_cookies(s)


caught_exception = None
for t in range(0,2):
try:
content = s.get(data)
content.raise_for_status()
break
except requests.exceptions.HTTPError as e:
http_code = e.response.status_code
caught_exception = e
if http_code == 404:
raise cferror.NotFound()
elif http_code == 503:
#CF blocked us, update the token
#Recheck token
self.__reload_cf_token(reset=True)
self.__set_cookies(s)
except requests.exceptions.ConnectionError as e:
caught_exception = e
caught_message = "There has been issues with trying to connect"
raise cferror.NetworkError()

else:
caught_code = caught_exception.response.status_code
caught_content = caught_exception.response.content
caught_message = "There has been issues communicating with the server"
if caught_code == 503:
raise cferror.CloudflareBlocked(caught_code,caught_content)
raise cferror.HTTPError(caught_code,caught_content,caught_message)



page = content.content
self.soup = BeautifulSoup(page, "html.parser")
script = (self.soup.find_all("script")[2].contents[0]).strip().replace("window._gallery = JSON.parse(", "").replace(");","")
#IF THERE IS NO ERROR THEN PROCEED
self.json = json.loads(json.loads(script))


self.__preloader_pages()

def Pages(self):
"Total available pages count"
Page = len(self.json["images"]["pages"])
return Page

def Tags(self):
"""For MODDERS:
For better readability for humans or other programs, I recommend you use Json to serialize your data.
"""
Tag = self.json["tags"]
return Tag

def Title(self):
title = self.json["title"]["english"]
return title



def Direct_link(self,value):
"""For MODDERS:
This function is only used to RETURN a valid direct link to the targeted image.
The variable 'value' is the episode/page of the certain image to return.
"""
data = self.preloaded_data[value-1]
file = data["t"]
if file == "j":
extension = "jpg"
elif file == "p":
extension = "png"
elif file == "g":
extension = "gif"
else:
print("WARNING AT PAGE: %s\nUNIDENTIFIED FORMAT DETECTED REPORT THIS BUG\nautoset: jpg" % value)
extension = "jpg"
media_id = self.json["media_id"]
url = "https://i.nhentai.net/galleries/%s/%s.%s" % (media_id, value, extension)
#url = "https://t.dogehls.xyz/galleries/%s/%s.%s" % (media_id, value, extension)
return url

def __preloader_pages(self):
dict_data = self.json["images"]["pages"]
data = []
try:
for v in range(self.Pages()):
data.append(dict_data[f"{v+2}"])
except TypeError as e:
data = dict_data
self.preloaded_data = data

def __reload_cf_token(self,reset=False):
cookieStatus = HSite.SiteCFBypass.cookie_available()
if not cookieStatus[0] or reset:
NHS = HSite.SiteCFBypass(self._data_level)
if reset:
HSite.SiteCFBypass.delete_cookies()
NHS.start()
NHS.join()

def __set_cookies(self, session):
cookies = pickle.load(open(self.cookie_path,"rb"))
selenium_headers = pickle.load(open(self.session_path,"rb"))
session.headers.update({"user-agent": selenium_headers})
for cookie in cookies:
session.cookies.set(cookie['name'], cookie['value'], domain=cookie['domain'])

class Iterdata:
"""File Iterator used to automatically detect links inside a text file
"""
def __init__(self,data):
self.available = True #Used to indicate that the feature is available. False if none
self.data = data
self._index = -1
self.temptxt = []
def __iter__(self):
return self
def __enter__(self):
self.txt_line = open(self.data,"r")
for rawline in self.txt_line:
for tline in rawline.replace(","," ").split():
if not tline.isdigit():
continue
if len(tline) > 6:
long_line = re.findall('.{1,6}', tline)
for fixline in long_line:
self.temptxt.append(fixline)
self.temptxt.append(tline)
return self
def __next__(self):
self._index += 1
if self._index >= len(self.temptxt):
raise StopIteration
return self.temptxt[self._index]
def __reversed__(self):
return self.temptxt[::-1]
def __exit__(self,tp,v,tb):
self.txt_line.close()


if __name__ == "__main__":
from ..essentials import HSite
from ..essentials.Errors import exception as cferror
else:
from essentials import HSite
from essentials.Errors import exception as cferror
2 changes: 1 addition & 1 deletion Process.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,4 +296,4 @@ def progress(self):
VolatileData = VData()

if __name__ == "__main__":
Download(input(),1,"Downloads")
pass
30 changes: 26 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,35 @@ A python script that collects data from NHentai.net.
>Anyio (Trio Backend)
>Trio
>pyyaml
>undetected-chromedriver
```
**Repositories:**


>[Undetected Chromedriver](https://github.com/ultrafunkamsterdam/undetected-chromedriver)

**Supported Sites at the moment**
```
• NHentai [Mirror sites: .to])
• NHentai.net[Mirror sites: Nhentai.to])
```

**Note:**
Mirror download is enabled by default incase the official site is not available, if you prefer
to disable this. Please set "mirror_available" to `false` in config.json
## Run:
`python Start_download.py [args -n/--nuke-code]`
**ex:**
`python Start_download.py -n 401084`




## Note:
> Bypassing cloudflare requires a modified selenium which does not support headless mode. If you are not on a
desktop environment, you are better off setting `cf_bypass` to `false` in config.json.

> Mirror server is enabled by default incase the official site is not available, if you prefer
to disable this. Please set "mirror_available" to `false` in config.json

> The Mirror server is slightly outdated compared to the official site. Some titles might not be available especially to newer release like 40000+.
though they are being continually updated as time goes.


28 changes: 20 additions & 8 deletions Start_download.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#Standard Library
from cmath import inf
import sys
import os
import time
Expand All @@ -24,11 +23,17 @@
#Custom library
import Process
from essentials import updater as Updater
from essentials.Errors import exception as cferror

#EDIT THE MAXIMUM AMOUNT OF DOWNLOAD PROCESS HAPPENING AT THE SAME TIME
#LOWER VALUE: SLOWER, MORE STABLE (BEST IN SLOW NETWORK CONDITIONS)
#HIGHER VALUE: FASTER, LESS STABLE (BEST IN FAST NETWORK CONDITIONS

methods = [
"cfbypass",
"mirror"
]

class SortData:
AcquiredTags = None
AcquiredPage = None
Expand Down Expand Up @@ -56,7 +61,7 @@ def main(args):
Dir_name = Api.name

logger.info("Found: %s pages" % AcquiredPage)
logger.info('Title name: "%s"' % TitleName)
logger.info('Title name: %s' % TitleName)
logger.info("Acquiring direct links")
AcquiredLinks = Api.Link_Page(AcquiredPage)
logger.info("Total of: %s links is loaded" % len(AcquiredLinks))
Expand Down Expand Up @@ -229,8 +234,8 @@ def getSystemInfo(logtype):
# Create handlers
File = FileName()
c_handler = logging.StreamHandler(sys.stdout)
o_handler = logging.FileHandler(File)
f_handler = logging.FileHandler(File)
o_handler = logging.FileHandler(File,mode="a",encoding='utf-8')
f_handler = logging.FileHandler(File,mode="a",encoding='utf-8')
c_handler.setLevel(logging.INFO)
o_handler.setLevel(logging.INFO)
f_handler.setLevel(logging.INFO)
Expand All @@ -250,6 +255,7 @@ def getSystemInfo(logtype):
max_process_open = sconfig(1)
API_DATA_CONFIG = sconfig(2)
API_MIRROR_ACCOMPLISHED = False
API_CF_ACCOMPLISHED = False
EMERGENCY = 255
verbose = False
info = '''
Expand Down Expand Up @@ -295,16 +301,18 @@ def callers():
logger.error("This method is not available for the current module")
else:
main(args.nukecode)
except urllib.error.HTTPError as e:
except (urllib.error.HTTPError, cferror.NotFound, cferror.NetworkError) as e:
#ONLY OCCURS WHEN THERE IS NO RESULTS
if e.code == 404:
logger.error("The content you are looking for is not found")
else:
logger.error("HTTP Error Code: %s" % e.code)
if API_DATA_CONFIG["mirror_available"] and not API_MIRROR_ACCOMPLISHED:
if API_DATA_CONFIG["cf_bypass"] and not API_CF_ACCOMPLISHED:
return 102
elif API_DATA_CONFIG["mirror_available"] and not API_MIRROR_ACCOMPLISHED:
return 101
sys.exit(1)
except urllib.error.URLError as error:
except (urllib.error.URLError, cferror.NetworkError) as error:
logger.error("A connection error has occured")
loggon.exception("Exception catched: %s" % sys.exc_info()[0])
sys.exit(1)
Expand Down Expand Up @@ -334,7 +342,11 @@ def callers():
sys.exit()
while True:
exit_code = callers()
if exit_code == 101:
if exit_code == 102 and not API_CF_ACCOMPLISHED:
logger.info("Trying to bypass CF")
API_DATA_CONFIG["module_name"] = f'{API_DATA_CONFIG["module_name"]}_cf'
API_CF_ACCOMPLISHED = True
elif exit_code == 101 and not API_MIRROR_ACCOMPLISHED:
logger.info("Mirror server enabled, trying mirror server.")
API_DATA_CONFIG["module_name"] = f'{API_DATA_CONFIG["module_name"]}_mirror'
API_MIRROR_ACCOMPLISHED = True
Expand Down
1 change: 1 addition & 0 deletions config.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"semaphore":10,
"Api": {
"module_name": "NHentai",
"cf_bypass": true,
"mirror_available": true
}
}
Expand Down
55 changes: 55 additions & 0 deletions essentials/Errors/exception.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@

class NotFound(Exception):
def __init__(self, *args, **kwargs):
default_message = 'Not Found'
self.code = 404

# if any arguments are passed...
# If you inherit from the exception that takes message as a keyword
# maybe you will need to check kwargs here
if args:
# ... pass them to the super constructor
super().__init__(*args, **kwargs)
else: # else, the exception was raised without arguments ...
# ... pass the default message to the super constructor
super().__init__(default_message, **kwargs)

class NetworkError(Exception):
def __init__(self,code,content,*args,**kwargs):
default_message = 'There has been a connection error'
self.code = code
self.content = content

if args:
# ... pass them to the super constructor
super().__init__(*args, **kwargs)
else: # else, the exception was raised without arguments ...
# ... pass the default message to the super constructor
super().__init__(default_message, **kwargs)

class HTTPError(Exception):
def __init__(self,code,content,*args,**kwargs):
default_message = 'There has been an HTTP error'
self.code = code
self.content = content

if args:
# ... pass them to the super constructor
super().__init__(*args, **kwargs)
else: # else, the exception was raised without arguments ...
# ... pass the default message to the super constructor
super().__init__(default_message, **kwargs)

class CloudflareBlocked(Exception):
def __init__(self,code,content,*args,**kwargs):
default_message = 'CF has persistently blocked us, please report this issue.'
self.code = code
self.content = content

if args:
# ... pass them to the super constructor
super().__init__(*args, **kwargs)
else: # else, the exception was raised without arguments ...
# ... pass the default message to the super constructor
super().__init__(default_message, **kwargs)

Loading

0 comments on commit be9a4ab

Please sign in to comment.