Skip to content

Commit

Permalink
feat: form type frame
Browse files Browse the repository at this point in the history
  • Loading branch information
baerwang committed Dec 30, 2023
1 parent e2a2c8a commit 39e7316
Show file tree
Hide file tree
Showing 6 changed files with 100 additions and 31 deletions.
3 changes: 2 additions & 1 deletion files/user_agent.toml
Original file line number Diff line number Diff line change
Expand Up @@ -64,4 +64,5 @@ os = [
"Windows NT 6.1; WOW64",
"Windows NT 6.1; Win64; x64",
# Linux
"X11; Linux x86_64"]
"X11; Linux x86_64"
]
34 changes: 24 additions & 10 deletions src/cli/args.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
use std::convert::Infallible;
use std::path::PathBuf;

use clap::ArgAction::Set;
Expand All @@ -8,10 +7,10 @@ use headless_chrome::browser::default_executable;
#[derive(Debug, Parser)]
#[command(author, version, about, subcommand_precedence_over_arg = true)]
pub struct CLi {
/// Target to Website,Support Multi '--target https://example.com https://testphp.vulnweb.com'
/// Target to Website,Support Multi-value '--target https://example.com http://testphp.vulnweb.com'
#[arg(short, long, value_parser, num_args = 1.., value_delimiter = ' ')]
pub target: Vec<String>,
/// Custom Http Headers,support multi '--custom-headers Server:example Cookie:baerwang'
/// Custom Http Headers,Support Multi-value '--custom-headers Server:example Cookie:baerwang'
#[arg(short, long, value_parser, num_args = 1.., value_delimiter = ' ')]
pub custom_headers: Vec<String>,
/// Robots Exclusion Protocol
Expand All @@ -24,7 +23,6 @@ pub struct CLi {
#[arg(short, long)]
pub password: Option<String>,
#[command(subcommand)]
#[clap(value_parser = opt_default)]
pub opt: Option<Opt>,
}

Expand Down Expand Up @@ -53,17 +51,33 @@ pub struct Chromium {
pub proxy: Option<String>,
}

#[allow(dead_code)]
fn opt_default(o: Option<Opt>) -> Result<Option<Opt>, Infallible> {
if o.is_none() {
return Ok(Some(Opt::Chromium(Chromium {
impl Default for Opt {
fn default() -> Self {
Self::new()
}
}

impl Opt {
pub fn new() -> Self {
Opt::Chromium(Chromium::new())
}
}

impl Default for Chromium {
fn default() -> Self {
Self::new()
}
}

impl Chromium {
pub fn new() -> Self {
Chromium {
path: Some(default_executable().unwrap()),
headless: true,
sandbox: true,
ignore_certificate_errors: true,
user_data_dir: None,
proxy: None,
})));
}
}
Ok(o)
}
24 changes: 9 additions & 15 deletions src/cli/cmd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@ pub fn cli() -> Result<(), Box<dyn std::error::Error>> {
})
.collect();

let chromium_path = default_executable().map_err(|e| anyhow!(e))?;

let config = model::task::TaskConfig {
target: app.target,
headers,
Expand All @@ -37,23 +35,20 @@ pub fn cli() -> Result<(), Box<dyn std::error::Error>> {
env_logger::init_from_env(env_logger::Env::new().default_filter_or("INFO"));

common::load("user_agent", "files/user_agent.toml");
common::load("form", "files/form.toml");

let options = browser::FetcherOptions::default().with_allow_download(false);

if app.opt.is_none() {
let launch_options = LaunchOptions::default_builder()
.path(Some(chromium_path))
.fetcher_options(options)
.build()?;
return crawler::browse_wikipedia(config, launch_options);
}

match app.opt {
Some(args::Opt::Chromium(c)) => {
let chromium_path = Some(c.path.unwrap_or(chromium_path));
let opt = app.opt.unwrap_or_default();
match opt {
args::Opt::Chromium(c) => {
let path = Some(
c.path
.unwrap_or(default_executable().map_err(|e| anyhow!(e))?),
);
let proxy = Some(c.proxy.as_deref().unwrap_or_default());
let launch_options = LaunchOptions::default_builder()
.path(chromium_path)
.path(path)
.headless(c.headless)
.sandbox(c.sandbox)
.proxy_server(proxy)
Expand All @@ -64,6 +59,5 @@ pub fn cli() -> Result<(), Box<dyn std::error::Error>> {

crawler::browse_wikipedia(config, launch_options)
}
_ => Ok(()),
}
}
14 changes: 9 additions & 5 deletions src/handler/crawler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,6 @@ pub fn browse_wikipedia(
launch_options: LaunchOptions,
) -> Result<(), Box<dyn std::error::Error>> {
let browser = Browser::new(launch_options)?;
let locked_vec = browser.get_tabs().lock().unwrap();
let tabs = &*locked_vec[1];
tabs.close(true)?;
drop(locked_vec);

let random_ug = common::user_agent::random_user_agent();
for item in &config.target {
let tab = browser.new_tab()?;
Expand All @@ -32,6 +27,15 @@ pub fn browse_wikipedia(
.value
.unwrap();
assert_eq!(random_ug, ug);

let _ = tab
.evaluate("document.forms.length", false)?
.value
.unwrap()
.as_u64()
.unwrap_or_default();
// todo
// tab.evaluate(jscode,false)
_ = tab.close(true);
}

Expand Down
55 changes: 55 additions & 0 deletions src/handler/form.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
use std::collections::HashMap;

use once_cell::sync::Lazy;

type HtmlFn = fn(Html);

pub static FORM: Lazy<HashMap<&str, HtmlFn>> = Lazy::new(|| {
let mut map: HashMap<&str, HtmlFn> = HashMap::new();
map.insert("text", text);
map.insert("textarea", textarea);
map.insert("password", password);
map.insert("email", email);
map.insert("tel", tel);
map.insert("date", date);
map.insert("radio", radio);
map.insert("checkbox", checkbox);
map.insert("select_one", select_one);
map.insert("submit", submit);
map.insert("button", button);
map
});

#[allow(dead_code)]
pub struct Html {
id: String,
name: String,
el_type: String,
tag_name: String,
class_name: String,
label: String,
readonly: String,
xpath: String,
}

fn text(_: Html) {}

fn textarea(_: Html) {}

fn password(_: Html) {}

fn email(_: Html) {}

fn tel(_: Html) {}

fn date(_: Html) {}

fn radio(_: Html) {}

fn checkbox(_: Html) {}

fn select_one(_: Html) {}

fn submit(_: Html) {}

fn button(_: Html) {}
1 change: 1 addition & 0 deletions src/handler/mod.rs
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
pub mod crawler;
pub mod form;
pub mod robots;

0 comments on commit 39e7316

Please sign in to comment.