Skip to content

Commit

Permalink
add webapp route for /request_get to send get request with headers,proxy
Browse files Browse the repository at this point in the history
  • Loading branch information
lidong committed Nov 9, 2023
1 parent e408d74 commit 158236d
Show file tree
Hide file tree
Showing 5 changed files with 265 additions and 38 deletions.
113 changes: 113 additions & 0 deletions docs/reference/WebAPP.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
# Reference

```python
# ======================= server code ===========================

import uvicorn
from fastapi import FastAPI

from ichrome import AsyncTab
from ichrome.routers.fastapi_routes import ChromeAPIRouter

app = FastAPI()
# reset max_msg_size and window size for a large size screenshot
AsyncTab._DEFAULT_WS_KWARGS["max_msg_size"] = 10 * 1024**2
app.include_router(
ChromeAPIRouter(headless=True, extra_config=["--window-size=1920,1080"]),
prefix="/chrome",
)

uvicorn.run(app, port=8009)

# view url with your browser
# http://127.0.0.1:8009/chrome/screenshot?url=http://bing.com
# http://127.0.0.1:8009/chrome/download?url=http://bing.com

# ======================= client code ===========================

from inspect import getsource

import requests


# 1. request_get demo
print(
requests.get(
"http://127.0.0.1:8009/chrome/request_get",
params={
"__url": "http://httpbin.org/get?a=1", # [required] target URL
"__proxy": "http://127.0.0.1:1080", # [optional]
"__timeout": "10", # [optional]
"my_query": "OK", # [optional] params for target URL
},
# headers for target URL
headers={
"User-Agent": "OK",
"my_header": "OK",
"Cookie": "my_cookie1=OK",
},
# cookies={"my_cookie2": "OK"}, # [optional] cookies for target URL if headers["Cookie"] is None
).text,
flush=True,
)
# <html><head><meta name="color-scheme" content="light dark"></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">{
# "args": {
# "my_query": "OK"
# },
# "headers": {
# "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
# "Accept-Encoding": "gzip, deflate",
# "Cookie": "my_cookie1=OK",
# "Host": "httpbin.org",
# "My-Header": "OK",
# "Upgrade-Insecure-Requests": "1",
# "User-Agent": "OK",
# "X-Amzn-Trace-Id": "Root=1-654d0157-04ab908a3779add762b164e3"
# },
# "origin": "0.0.0.0",
# "url": "http://httpbin.org/get?my_query=OK"
# }
# </pre></body></html>


# 2. test tab_callback
async def tab_callback(self, tab, data, timeout):
await tab.set_url(data["url"], timeout=timeout)
return (await tab.querySelector("h1")).text


r = requests.post(
"http://127.0.0.1:8009/chrome/do",
json={
"data": {"url": "http://httpbin.org/html"},
"tab_callback": getsource(tab_callback),
"timeout": 10,
},
)
print(repr(r.text), flush=True)
'"Herman Melville - Moby-Dick"'


async def tab_callback(task, tab, data, timeout):
await tab.wait_loading(3)
return await tab.html


# 3. incognito_args demo
print(
requests.post(
"http://127.0.0.1:8009/chrome/do",
json={
"tab_callback": getsource(tab_callback),
"timeout": 10,
"incognito_args": {
"url": "http://httpbin.org/ip",
"proxyServer": "http://127.0.0.1:1080",
},
},
).text
)
# "<html><head><meta name=\"color-scheme\" content=\"light dark\"></head><body><pre style=\"word-wrap: break-word; white-space: pre-wrap;\">{\n \"origin\": \"103.171.177.94\"\n}\n</pre></body></html>"


```
2 changes: 1 addition & 1 deletion ichrome/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from .logs import logger
from .pool import ChromeEngine

__version__ = "4.0.0"
__version__ = "4.0.1"
__tips__ = "[github]: https://github.com/ClericPy/ichrome\n[cdp]: https://chromedevtools.github.io/devtools-protocol/\n[cmd args]: https://peter.sh/experiments/chromium-command-line-switches/"
__all__ = [
"ChromeDaemon",
Expand Down
29 changes: 27 additions & 2 deletions ichrome/pool.py
Original file line number Diff line number Diff line change
Expand Up @@ -501,14 +501,30 @@ async def screenshot(
return b64decode(image)

async def download(
self, url: str, cssselector: str = None, wait_tag: str = None, timeout=None
self,
url: str,
cssselector: str = "",
wait_tag: str = "",
cookies: dict = None,
user_agent: str = "",
extra_headers: dict = None,
timeout=None,
incognito_args: dict = None,
) -> dict:
data = dict(url=url, cssselector=cssselector, wait_tag=wait_tag)
data = dict(
url=url,
cssselector=cssselector,
wait_tag=wait_tag,
cookies=cookies,
extra_headers=extra_headers,
user_agent=user_agent,
)
return await self.do(
data=data,
tab_callback=CommonUtils.download,
timeout=timeout,
tab_index=None,
incognito_args=incognito_args,
)

async def preview(self, url: str, wait_tag: str = None, timeout=None) -> bytes:
Expand Down Expand Up @@ -574,6 +590,15 @@ async def screenshot(self, tab: AsyncTab, data, timeout):
async def download(self, tab: AsyncTab, data, timeout):
start_time = time.time()
result = {"url": data["url"]}
cookies = data.get("cookies") or {}
for name, value in cookies.items():
await tab.set_cookie(name=name, value=value, url=data["url"])
user_agent = data.get("user_agent")
if user_agent:
await tab.set_ua(user_agent)
extra_headers = data.get("extra_headers")
if extra_headers:
await tab.set_headers(extra_headers)
await tab.set_url(data["url"], timeout=timeout)
if data["wait_tag"]:
timeout = timeout - (time.time() - start_time)
Expand Down
156 changes: 122 additions & 34 deletions ichrome/routers/fastapi_routes.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import typing
from urllib.parse import urlencode

from ..logs import logger
from ..pool import ChromeEngine

try:
from fastapi.requests import Request
from fastapi.responses import HTMLResponse, JSONResponse, Response
from fastapi.routing import APIRoute, APIRouter
from pydantic import BaseModel
Expand All @@ -12,10 +14,10 @@
"requirements are not all ready, run `pip install ichrome[web]` or `pip install fastapi uvicorn` first."
)
raise error
"""
# ======================= server code ===========================

import os

__doc__ = """
# ======================= server code ===========================
import uvicorn
from fastapi import FastAPI
Expand All @@ -25,57 +27,103 @@
app = FastAPI()
# reset max_msg_size and window size for a large size screenshot
AsyncTab._DEFAULT_WS_KWARGS['max_msg_size'] = 10 * 1024**2
app.include_router(ChromeAPIRouter(headless=True,
extra_config=['--window-size=1920,1080']),
prefix='/chrome')
AsyncTab._DEFAULT_WS_KWARGS["max_msg_size"] = 10 * 1024**2
app.include_router(
ChromeAPIRouter(headless=True, extra_config=["--window-size=1920,1080"]),
prefix="/chrome",
)
uvicorn.run(app)
uvicorn.run(app, port=8009)
# view url with your browser
# http://127.0.0.1:8000/chrome/screenshot?url=http://bing.com
# http://127.0.0.1:8000/chrome/download?url=http://bing.com
# http://127.0.0.1:8009/chrome/screenshot?url=http://bing.com
# http://127.0.0.1:8009/chrome/download?url=http://bing.com
# ======================= client code ===========================
from torequests import tPool
from inspect import getsource
req = tPool()
import requests
# 1. request_get demo
print(
requests.get(
"http://127.0.0.1:8009/chrome/request_get",
params={
"__url": "http://httpbin.org/get?a=1", # [required] target URL
"__proxy": "http://127.0.0.1:1080", # [optional]
"__timeout": "10", # [optional]
"my_query": "OK", # [optional] params for target URL
},
# headers for target URL
headers={
"User-Agent": "OK",
"my_header": "OK",
"Cookie": "my_cookie1=OK",
},
# cookies={"my_cookie2": "OK"}, # [optional] cookies for target URL if headers["Cookie"] is None
).text,
flush=True,
)
# <html><head><meta name="color-scheme" content="light dark"></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">{
# "args": {
# "my_query": "OK"
# },
# "headers": {
# "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
# "Accept-Encoding": "gzip, deflate",
# "Cookie": "my_cookie1=OK",
# "Host": "httpbin.org",
# "My-Header": "OK",
# "Upgrade-Insecure-Requests": "1",
# "User-Agent": "OK",
# "X-Amzn-Trace-Id": "Root=1-654d0157-04ab908a3779add762b164e3"
# },
# "origin": "0.0.0.0",
# "url": "http://httpbin.org/get?my_query=OK"
# }
# </pre></body></html>
# 2. test tab_callback
async def tab_callback(self, tab, data, timeout):
await tab.set_url(data['url'], timeout=timeout)
return (await tab.querySelector('h1')).text
await tab.set_url(data["url"], timeout=timeout)
return (await tab.querySelector("h1")).text
r = req.post('http://127.0.0.1:8000/chrome/do',
json={
'data': {
'url': 'http://httpbin.org/html'
},
'tab_callback': getsource(tab_callback),
'timeout': 10
})
print(r.text)
# "Herman Melville - Moby-Dick"
r = requests.post(
"http://127.0.0.1:8009/chrome/do",
json={
"data": {"url": "http://httpbin.org/html"},
"tab_callback": getsource(tab_callback),
"timeout": 10,
},
)
print(repr(r.text), flush=True)
'"Herman Melville - Moby-Dick"'
# incognito_args demo
async def tab_callback(task, tab, data, timeout):
await tab.wait_loading(3)
return await tab.html
# 3. incognito_args demo
print(
requests.post('http://127.0.0.1:8000/chrome/do',
json={
'tab_callback': getsource(tab_callback),
'timeout': 10,
'incognito_args': {
'url': 'http://httpbin.org/ip',
'proxyServer': 'http://127.0.0.1:1080'
}
}).text)
requests.post(
"http://127.0.0.1:8009/chrome/do",
json={
"tab_callback": getsource(tab_callback),
"timeout": 10,
"incognito_args": {
"url": "http://httpbin.org/ip",
"proxyServer": "http://127.0.0.1:1080",
},
},
).text
)
# "<html><head><meta name=\"color-scheme\" content=\"light dark\"></head><body><pre style=\"word-wrap: break-word; white-space: pre-wrap;\">{\n \"origin\": \"103.171.177.94\"\n}\n</pre></body></html>"
"""

Expand Down Expand Up @@ -134,9 +182,49 @@ def setup_chrome_engine(self, *args, **kwargs):
self.get("/screenshot")(self.screenshot)
self.get("/js")(self.js)
self.post("/do")(self.do)
self.get("/request_get")(self.request_get)
self.add_event_handler("startup", self._chrome_on_startup)
self.add_event_handler("shutdown", self._chrome_on_shutdown)

async def request_get(self, req: Request):
params = dict(req.query_params)
url: str = params["__url"]
proxy: str = params.get("__proxy", "")
timeout: typing.Any = params.get("__timeout", None)
if timeout:
timeout = float(timeout)
query_list = [
item
for item in req.query_params._list
if item[0] not in {"__url", "__proxy", "__timeout"}
]
if "?" in url:
url = f"{url}&{urlencode(query_list)}"
else:
url = f"{url}?{urlencode(query_list)}"
extra_headers = dict(req.headers)
extra_headers = {key.title(): value for key, value in extra_headers.items()}
for key in {"Host"}:
extra_headers.pop(key, None)
user_agent: str = extra_headers.pop("User-Agent", "")
cookies: dict = req.cookies
incognito_args = {
"url": url,
"proxyServer": proxy,
}
data = await self.chrome_engine.download(
url,
timeout=timeout,
user_agent=user_agent,
cookies=cookies,
extra_headers=extra_headers,
incognito_args=incognito_args,
)
if data:
return HTMLResponse(data["html"])
else:
return Response(content=b"", status_code=400)

async def _chrome_on_startup(self):
await self.chrome_engine.start()

Expand Down
3 changes: 2 additions & 1 deletion mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@ nav:
- quickstart.md
- cmd.md
- reference:
- base: "reference/base.md"
- AsyncTab: "reference/AsyncTab.md"
- AsyncChromeDaemon: "reference/AsyncChromeDaemon.md"
- AsyncChrome: "reference/AsyncChrome.md"
- Debugger: "reference/Debugger.md"
- base: "reference/base.md"
- WebAPP: "reference/WebAPP.md"
plugins:
- mkdocstrings

0 comments on commit 158236d

Please sign in to comment.