Skip to content

Commit

Permalink
begun building list and removed code causing false positives
Browse files Browse the repository at this point in the history
  • Loading branch information
mkb2091 committed Mar 7, 2024
1 parent 42de4d7 commit 3fa802d
Show file tree
Hide file tree
Showing 11 changed files with 79 additions and 73 deletions.
2 changes: 0 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ rand = { version = "0.8.5", optional = true }
hickory-proto = { version = "0.24.0", default-features = false }
humantime = "2.1.0"
notify = { version = "6.1.1", optional = true }
serde_json = { version = "1.0.114", optional = true }
async-channel = {version = "2.2.0", optional = true}


Expand Down Expand Up @@ -97,7 +96,6 @@ ssr = [
"dep:ct-logs",
"dep:rand",
"dep:notify",
"dep:serde_json",
"dep:async-channel",
]
default = ["ssr"]
Expand Down
2 changes: 0 additions & 2 deletions filterlists.csv
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,6 @@ ABPindo,https://raw.githubusercontent.com/ABPindo/indonesianadblockrules/master/
,https://raw.githubusercontent.com/ligyxy/Blocklist/master/BLOCKLIST,,MIT,86400,DomainBlocklist
,https://raw.githubusercontent.com/bjornstar/hosts/master/hosts,,The Unlicense,86400,Hostfile
,https://raw.githubusercontent.com/EFForg/privacybadger/master/src/data/yellowlist.txt,,GPLv3+,86400,DomainAllowlist
,https://raw.githubusercontent.com/EFForg/privacybadger/master/src/data/seed.json,,GPLv3,86400,PrivacyBadger
,https://raw.githubusercontent.com/EFForg/badger-sett/master/results.json,,GPLv3,86400,PrivacyBadger
,https://cdn.jsdelivr.net/gh/realodix/AdBlockID@master/dist/adblockid.adfl.txt,,,86400,Adblock
,https://blocklists.kitsapcreator.com/scam-spam.txt,,The Unlicense,86400,DomainBlocklist
,https://blocklists.kitsapcreator.com/malware-malicious.txt,,The Unlicense,86400,DomainBlocklist
Expand Down
8 changes: 8 additions & 0 deletions migrations/20240307005702_lists.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
-- Add migration script here
CREATE TABLE allow_domains (
domain_id BIGINT UNIQUE NOT NULL
);

CREATE TABLE block_domains (
domain_id BIGINT UNIQUE NOT NULL
);
3 changes: 3 additions & 0 deletions migrations/20240307012450_index.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
-- Add migration script here
CREATE INDEX domain_rules_allow_idx ON domain_rules (allow);
CREATE INDEX ip_rules_allow_idx ON ip_rules (allow);
2 changes: 1 addition & 1 deletion src/home_page.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ fn FilterListSummary(url: crate::FilterListUrl, record: crate::FilterListRecord)
<td>
<LastUpdated url=url.clone()/>
</td>
<td>
<td class="text-right">
<ListSize url=url.clone()/>
</td>
<td>
Expand Down
3 changes: 0 additions & 3 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,6 @@ pub enum FilterListType {
RegexAllowlist,
RegexBlocklist,
Hostfile,
PrivacyBadger,
}

impl FilterListType {
Expand All @@ -112,7 +111,6 @@ impl FilterListType {
Self::RegexAllowlist => "RegexAllowlist",
Self::RegexBlocklist => "RegexBlocklist",
Self::Hostfile => "Hostfile",
Self::PrivacyBadger => "PrivacyBadger",
}
}
}
Expand All @@ -139,7 +137,6 @@ impl std::str::FromStr for FilterListType {
"RegexAllowlist" => Ok(Self::RegexAllowlist),
"RegexBlocklist" => Ok(Self::RegexBlocklist),
"Hostfile" => Ok(Self::Hostfile),
"PrivacyBadger" => Ok(Self::PrivacyBadger),
_ => Err(InvalidFilterListTypeError),
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/list_manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ pub async fn update_list(url: crate::FilterListUrl) -> Result<(), ServerFnError>
}
}

#[server]
#[server(DeleteList)]
pub async fn delete_list(url: crate::FilterListUrl) -> Result<(), ServerFnError> {
let pool = crate::server::get_db().await?;
let url_str = url.as_str();
Expand Down
43 changes: 3 additions & 40 deletions src/list_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -227,12 +227,13 @@ fn parse_adblock_line(line: &str) -> Option<Rule> {
}
} else {
match tag {
"3p" | "third-party" | "doc" | "document" | "all" => {
"3p" | "doc" | "document" | "all" => {
match_end_domain = true;
block_site = true;
}
"popup" | "ghide" | "generichide" | "genericblock" | "image" | "script"
| "xmlhttprequest" | "stylesheet" | "subdocument" | "media" | "csp" => {
| "third-party" | "xmlhttprequest" | "stylesheet" | "subdocument" | "media"
| "csp" => {
has_specific_filters = true;
}
"important" => {}
Expand Down Expand Up @@ -366,43 +367,6 @@ fn parse_unknown_lines(contents: &str) -> Vec<RulePair> {
parse_lines(contents, &|_| Some(Rule::Unknown))
}

#[cfg(feature = "ssr")]
#[derive(Deserialize, Debug)]
#[allow(non_snake_case)]
struct PrivacyBadgerRule {
heuristicAction: String,
}

#[cfg(feature = "ssr")]
#[derive(Deserialize, Debug)]
struct PrivacyBadger {
action_map: std::collections::HashMap<String, PrivacyBadgerRule>,
}

#[cfg(feature = "ssr")]
fn parse_privacy_badger(contents: &str) -> Vec<RulePair> {
let res = serde_json::from_str::<PrivacyBadger>(contents);
if let Ok(privacy_badger) = res {
privacy_badger
.action_map
.into_iter()
.filter_map(|(domain, rule)| {
if rule.heuristicAction == "block" {
let domain_rule = DomainRule {
domain: domain.parse().ok()?,
allow: false,
subdomain: true,
};
Some(RulePair::new(domain.into(), Rule::Domain(domain_rule)))
} else {
None
}
})
.collect()
} else {
vec![]
}
}

#[cfg(feature = "ssr")]
pub fn parse_list_contents(contents: &str, list_format: FilterListType) -> Vec<RulePair> {
Expand All @@ -417,7 +381,6 @@ pub fn parse_list_contents(contents: &str, list_format: FilterListType) -> Vec<R
FilterListType::RegexAllowlist => parse_unknown_lines(contents),
FilterListType::RegexBlocklist => parse_regex(contents),
FilterListType::Hostfile => parse_domain_list(contents, false, true),
FilterListType::PrivacyBadger => parse_privacy_badger(contents),
}
}

Expand Down
39 changes: 15 additions & 24 deletions src/list_view.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
#[cfg(feature = "ssr")]
use self::rule_view::RuleData;
use crate::{app::Loading, list_manager::UpdateList, rule_view::DisplayRule, *};
use crate::{
app::Loading,
list_manager::{DeleteList, UpdateList},
rule_view::DisplayRule,
*,
};
use leptos::*;
use leptos_router::*;

Expand Down Expand Up @@ -262,7 +267,7 @@ fn FilterListInner(url: crate::FilterListUrl, page: Option<usize>) -> impl IntoV
<ParseList url=url.clone()/>
</p>

<DeleteList url=url.clone()/>
<DeleteListButton url=url.clone()/>
{if let Some(page) = page {
view! { <p>"Page: " {page}</p> }
} else {
Expand Down Expand Up @@ -323,29 +328,15 @@ impl ViewListParams {
}

#[component]
fn DeleteList(url: FilterListUrl) -> impl IntoView {
let delete_list = create_action(move |url: &FilterListUrl| {
let url = url.clone();
async move {
log::info!("Deleting {}", url.as_str());
if let Err(err) = list_manager::delete_list(url).await {
log::error!("Error deleting list: {:?}", err);
}
}
});
fn DeleteListButton(url: FilterListUrl) -> impl IntoView {
let delete_list_action = create_server_action::<DeleteList>();
view! {
<button
class="btn btn-danger"
on:click={
let url = url.clone();
move |_| {
delete_list.dispatch(url.clone());
}
}
>

"Delete"
</button>
<ActionForm action=delete_list_action>
<button class="btn btn-danger" type="submit">
<input type="hidden" placeholder="url" id="url" name="url" value=url.to_string()/>
"Delete"
</button>
</ActionForm>
}
}

Expand Down
3 changes: 3 additions & 0 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ async fn main() {
tokio::spawn(async {
blockconvert::server::find_rule_matches().await.unwrap();
});
tokio::spawn(async {
blockconvert::server::build_list().await.unwrap();
});
axum::serve(listener, app.into_make_service())
.await
.unwrap();
Expand Down
45 changes: 45 additions & 0 deletions src/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use crate::DomainId;
use axum::error_handling::future;
use futures::StreamExt;
use hickory_resolver::error::ResolveError;
use leptos::server_fn::ServerFn;
use leptos::*;
use notify::Watcher;
use rand::seq::SliceRandom;
Expand Down Expand Up @@ -571,3 +572,47 @@ pub async fn find_rule_matches() -> Result<(), ServerFnError> {
tx.commit().await?;
}
}

pub async fn build_list() -> Result<(), ServerFnError> {
dotenvy::dotenv()?;
let read_limit: i64 = std::env::var("READ_LIMIT")?.parse()?;
let pool = get_db().await?;
let mut tx = pool.begin().await?;
sqlx::query!("DELETE FROM allow_domains")
.execute(&mut *tx)
.await?;
let allow_count = sqlx::query!(
"SELECT COUNT(*) from rule_matches
INNER JOIN rules ON rule_matches.rule_id = rules.id
LEFT JOIN domain_rules ON rules.domain_rule_id = domain_rules.id
LEFT JOIN ip_rules ON rules.ip_rule_id = ip_rules.id
WHERE domain_rules.allow = true OR ip_rules.allow = true"
)
.fetch_one(&mut *tx)
.await?
.count
.unwrap_or(0);
log::info!("Allow count: {}", allow_count);
for offset in (0..allow_count).step_by(read_limit as usize) {
let records = sqlx::query!(
"INSERT INTO allow_domains(domain_id)
SELECT rule_matches.domain_id from rule_matches
INNER JOIN rules ON rule_matches.rule_id = rules.id
LEFT JOIN domain_rules ON rules.domain_rule_id = domain_rules.id
LEFT JOIN ip_rules ON rules.ip_rule_id = ip_rules.id
WHERE domain_rules.allow = true OR ip_rules.allow = true
ORDER BY rule_matches.domain_id
LIMIT $1
OFFSET $2
ON CONFLICT DO NOTHING
RETURNING domain_id
",
read_limit,
offset,
)
.fetch_all(&mut *tx)
.await?;
log::info!("Inserted {} allow domains", records.len());
}
Ok(())
}

0 comments on commit 3fa802d

Please sign in to comment.