Skip to content

Commit

Permalink
Browser : Improve ad blocker; add hideable elements capability; use i…
Browse files Browse the repository at this point in the history
…t for Hitomi =D
  • Loading branch information
RobbWatershed committed Jan 15, 2022
1 parent 64f5d09 commit acd191b
Show file tree
Hide file tree
Showing 12 changed files with 68 additions and 32 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ Site getStartSite() {
protected CustomWebViewClient getWebClient() {
CustomWebViewClient client = new CustomWebViewClient(getStartSite(), GALLERY_FILTER, this);
client.restrictTo(DOMAIN_FILTER);
client.addDirtyElements(DIRTY_ELEMENTS);
client.addRemovableElements(DIRTY_ELEMENTS);
client.adBlocker.addJsUrlWhitelist(JS_WHITELIST);
for (String s : JS_WHITELIST) client.adBlocker.addJsUrlPatternWhitelist(s); // TODO duplicate of above ?
for (String s : JS_CONTENT_BLACKLIST) client.adBlocker.addJsContentBlacklist(s);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,11 @@ class CustomWebViewClient extends WebViewClient {
private Disposable disposable;


// List of "dirty" elements (CSS selector) to be cleaned before displaying the page
private List<String> dirtyElements;
// List of elements (CSS selector) to be removed before displaying the page
private List<String> removableElements;

// List of elements (CSS selector) to be hidden by inline CSS
private List<String> hideableElements;


CustomWebViewClient(Site site, String[] galleryUrl, CustomWebActivity activity) {
Expand Down Expand Up @@ -156,9 +159,19 @@ void destroy() {
*
* @param elements Elements (CSS selector) to addAll to page cleaner
*/
protected void addDirtyElements(String... elements) {
if (null == dirtyElements) dirtyElements = new ArrayList<>();
Collections.addAll(dirtyElements, elements);
protected void addRemovableElements(String... elements) {
if (null == removableElements) removableElements = new ArrayList<>();
Collections.addAll(removableElements, elements);
}

/**
* Add an element filter to current site
*
* @param elements Elements (CSS selector) to addAll to page cleaner
*/
protected void addHideableElements(String... elements) {
if (null == hideableElements) hideableElements = new ArrayList<>();
Collections.addAll(hideableElements, elements);
}


Expand Down Expand Up @@ -273,7 +286,7 @@ protected boolean shouldOverrideUrlLoadingInternal(
@NonNull final WebView view,
@NonNull final String url,
@Nullable final Map<String, String> requestHeaders) {
if (adBlocker.isBlocked(url) || !url.startsWith("http")) return true;
if (adBlocker.isBlocked(url, requestHeaders) || !url.startsWith("http")) return true;

// Download and open the torrent file
// NB : Opening the URL itself won't work when the tracker is private
Expand Down Expand Up @@ -380,7 +393,7 @@ public WebResourceResponse shouldInterceptRequest(@NonNull WebView view,
@Nullable
private WebResourceResponse shouldInterceptRequestInternal(@NonNull final String url,
@Nullable final Map<String, String> headers) {
if (adBlocker.isBlocked(url) || !url.startsWith("http")) {
if (adBlocker.isBlocked(url, headers) || !url.startsWith("http")) {
return new WebResourceResponse("text/plain", "utf-8", new ByteArrayInputStream(nothing));
} else if (isMarkDownloaded() && url.contains("hentoid-checkmark")) {
return new WebResourceResponse(ImageHelper.MIME_IMAGE_WEBP, "utf-8", new ByteArrayInputStream(checkmark));
Expand All @@ -390,7 +403,7 @@ private WebResourceResponse shouldInterceptRequestInternal(@NonNull final String

// If we're here to remove "dirty elements" or mark downloaded books, we only do it
// on HTML resources (URLs without extension) from the source's main domain
if ((dirtyElements != null || isMarkDownloaded() || !activity.getCustomCss().isEmpty())
if ((removableElements != null || hideableElements != null || isMarkDownloaded() || !activity.getCustomCss().isEmpty())
&& (HttpHelper.getExtensionFromUri(url).isEmpty() || HttpHelper.getExtensionFromUri(url).equalsIgnoreCase("html"))) {
String host = Uri.parse(url).getHost();
if (host != null && !isHostNotInRestrictedDomains(host))
Expand Down Expand Up @@ -498,8 +511,8 @@ protected WebResourceResponse parseResponse(@NonNull String urlStr, @Nullable Ma

// Remove dirty elements from HTML resources
String customCss = activity.getCustomCss();
if (dirtyElements != null || isMarkDownloaded() || !customCss.isEmpty()) {
browserStream = ProcessHtml(browserStream, urlStr, customCss, dirtyElements, activity.getAllSiteUrls());
if (removableElements != null || hideableElements != null || isMarkDownloaded() || !customCss.isEmpty()) {
browserStream = ProcessHtml(browserStream, urlStr, customCss, removableElements, hideableElements, activity.getAllSiteUrls());
if (null == browserStream) return null;
}

Expand Down Expand Up @@ -603,18 +616,20 @@ void setDnsOverHttpsEnabled(boolean value) {
* - If set, remove nodes using the given list of CSS selectors to identify them
* - If set, mark book covers or links matching the given list of Urls
*
* @param stream Stream containing the HTML document to process; will be closed during the process
* @param baseUri Base URI if the document
* @param dirtyElements CSS selectors of the nodes to remove
* @param siteUrls Urls of the covers or links to mark
* @param stream Stream containing the HTML document to process; will be closed during the process
* @param baseUri Base URI if the document
* @param removableElements CSS selectors of the nodes to remove
* @param hideableElements CSS selectors of the nodes to hide
* @param siteUrls Urls of the covers or links to mark
* @return Stream containing the HTML document stripped from the elements to remove
*/
@Nullable
private InputStream ProcessHtml(
@NonNull InputStream stream,
@NonNull String baseUri,
@Nullable String customCss,
@Nullable List<String> dirtyElements,
@Nullable List<String> removableElements,
@Nullable List<String> hideableElements,
@Nullable List<String> siteUrls) {
try {
Document doc = Jsoup.parse(stream, null, baseUri);
Expand All @@ -624,13 +639,22 @@ private InputStream ProcessHtml(
doc.head().appendElement("style").attr("type", "text/css").appendText(customCss);

// Remove ad spaces
if (dirtyElements != null)
for (String s : dirtyElements)
if (removableElements != null)
for (String s : removableElements)
for (Element e : doc.select(s)) {
Timber.d("[%s] Removing node %s", baseUri, e.toString());
e.remove();
}

// Hide ad spaces
if (hideableElements != null) {
for (String s : hideableElements)
for (Element e : doc.select(s)) {
Timber.d("[%s] Hiding node %s", baseUri, e.toString());
e.attr("style", "min-height:0px;height:0%;");
}
}

// Mark downloaded books
if (siteUrls != null && !siteUrls.isEmpty()) {
// Format elements
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Site getStartSite() {
protected CustomWebViewClient getWebClient() {
CustomWebViewClient client = new CustomWebViewClient(getStartSite(), GALLERY_FILTER, this);
client.restrictTo(DOMAIN_FILTER);
client.addDirtyElements(DIRTY_ELEMENTS);
client.addRemovableElements(DIRTY_ELEMENTS);
return client;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ public class HitomiActivity extends BaseWebActivity {
private static final String[] RESULTS_FILTER = {"//hitomi.la[/]{0,1}$", "//hitomi.la[/]{0,1}\\?", "//hitomi.la/search.html", "//hitomi.la/index-[\\w%\\-\\.\\?]+", "//hitomi.la/(series|artist|tag|character)/[\\w%\\-\\.\\?]+"};
private static final String[] BLOCKED_CONTENT = {"hitomi-horizontal.js", "hitomi-vertical.js", "invoke.js", "ion.sound"};
private static final String[] JS_WHITELIST = {"//hitomi.la[/]{0,1}$", "galleries/[\\w%\\-]+.js$", "filesaver", "common", "date", "download", "gallery", "jquery", "cookie", "jszip", "limitlists", "moment-with-locales", "moveimage", "pagination", "search", "searchlib", "yall", "reader", "decode_webp", "bootstrap", "gg.js", "languagesindex", "tagindex", "paging", "language_support"};
private static final String[] JS_CONTENT_BLACKLIST = {"exoloader", "popunder"};
private static final String[] DIRTY_ELEMENTS = {".top-content > div:not(.list-title)", ".content div[class^=hitomi-]"};
private static final String[] JS_CONTENT_BLACKLIST = {"exoloader", "popunder", "da_etirw"};
private static final String[] HIDEABLE_ELEMENTS = {".content div[class^=hitomi-]", ".container div[class^=hitomi-]", ".top-content > div:not(.list-title)"};

Site getStartSite() {
return Site.HITOMI;
Expand All @@ -53,7 +53,7 @@ Site getStartSite() {
protected CustomWebViewClient getWebClient() {
HitomiWebClient client = new HitomiWebClient(getStartSite(), GALLERY_FILTER, this);
client.restrictTo(DOMAIN_FILTER);
client.addDirtyElements(DIRTY_ELEMENTS);
client.addHideableElements(HIDEABLE_ELEMENTS);
client.setResultsUrlPatterns(RESULTS_FILTER);
client.setResultUrlRewriter(this::rewriteResultsUrl);
client.adBlocker.addToUrlBlacklist(BLOCKED_CONTENT);
Expand Down Expand Up @@ -163,6 +163,7 @@ protected Content processContent(@NonNull Content content, @NonNull String url,
Timber.w(e);
}
}
Timber.w(">> Lock freed");
List<ImageFile> result = new ArrayList<>();
result.add(ImageFile.newCover(content.getCoverImageUrl(), StatusContent.SAVED));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Site getStartSite() {
protected CustomWebViewClient getWebClient() {
CustomWebViewClient client = new CustomWebViewClient(getStartSite(), GALLERY_FILTER, this);
client.restrictTo(DOMAIN_FILTER);
client.addDirtyElements(DIRTY_ELEMENTS);
client.addRemovableElements(DIRTY_ELEMENTS);
return client;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ protected CustomWebViewClient getWebClient() {
client.adBlocker.addToUrlBlacklist(BLOCKED_CONTENT);
client.adBlocker.addJsUrlWhitelist(DOMAIN_FILTER);
for (String s : JS_CONTENT_BLACKLIST) client.adBlocker.addJsContentBlacklist(s);
client.addDirtyElements(DIRTY_ELEMENTS);
client.addRemovableElements(DIRTY_ELEMENTS);
return client;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ Site getStartSite() {
protected CustomWebViewClient getWebClient() {
CustomWebViewClient client = new MrmWebClient(getStartSite(), GALLERY_FILTER, this);
client.restrictTo(DOMAIN_FILTER);
client.addDirtyElements(DIRTY_ELEMENTS);
client.addRemovableElements(DIRTY_ELEMENTS);
return client;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ protected CustomWebViewClient getWebClient() {
client.restrictTo(DOMAIN_FILTER);
client.setResultsUrlPatterns(RESULTS_FILTER);
client.setResultUrlRewriter(this::rewriteResultsUrl);
client.addDirtyElements(DIRTY_ELEMENTS);
client.addRemovableElements(DIRTY_ELEMENTS);
client.adBlocker.addToUrlBlacklist(BLOCKED_CONTENT);
return client;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ Site getStartSite() {
protected CustomWebViewClient getWebClient() {
CustomWebViewClient client = new CustomWebViewClient(getStartSite(), GALLERY_FILTER, this);
client.restrictTo(DOMAIN_FILTER);
client.addDirtyElements(DIRTY_ELEMENTS);
client.addRemovableElements(DIRTY_ELEMENTS);
return client;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ protected CustomWebViewClient getWebClient() {
client.adBlocker.addToUrlBlacklist(BLOCKED_CONTENT);
client.adBlocker.addJsUrlWhitelist(DOMAIN_FILTER);
for (String s : JS_CONTENT_BLACKLIST) client.adBlocker.addJsContentBlacklist(s);
client.addDirtyElements(DIRTY_ELEMENTS);
client.addRemovableElements(DIRTY_ELEMENTS);
return client;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ Site getStartSite() {
protected CustomWebViewClient getWebClient() {
CustomWebViewClient client = new TsuminoWebViewClient(getStartSite(), GALLERY_FILTER, this);
client.restrictTo(DOMAIN_FILTER);
client.addDirtyElements(DIRTY_ELEMENTS);
client.addRemovableElements(DIRTY_ELEMENTS);
client.adBlocker.addToUrlBlacklist(blockedContent);

return client;
Expand Down
17 changes: 14 additions & 3 deletions app/src/main/java/me/devsaki/hentoid/util/AdBlocker.java
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
package me.devsaki.hentoid.util;

import android.os.Looper;
import android.util.Pair;

import androidx.annotation.NonNull;
import androidx.annotation.Nullable;

import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
Expand Down Expand Up @@ -150,10 +154,11 @@ public void addJsContentBlacklist(@NonNull final String sequence) {
/**
* Indicate if the resource at the given URL is blocked by the current adblock settings
*
* @param url Url to examine
* @param url Url to examine
* @param headers HTTP request headers to use
* @return True if the resource is blocked; false if not
*/
public boolean isBlocked(@NonNull final String url) {
public boolean isBlocked(@NonNull final String url, @Nullable final Map<String, String> headers) {
final String cleanUrl = url.toLowerCase();

// 1- Accept whitelisted JS files
Expand All @@ -179,7 +184,13 @@ public boolean isBlocked(@NonNull final String url) {
if (Looper.getMainLooper().getThread() != Thread.currentThread()) { // No network call on UI thread
Timber.d(">> examining grey file : %s", url);
try {
Response response = HttpHelper.getOnlineResourceFast(url, null, site.useMobileAgent(), site.useHentoidAgent(), site.useWebviewAgent());
List<Pair<String, String>> requestHeadersList = HttpHelper.webkitRequestHeadersToOkHttpHeaders(headers, url);
Response response = HttpHelper.getOnlineResourceFast(url, requestHeadersList, site.useMobileAgent(), site.useHentoidAgent(), site.useWebviewAgent());
if (response.code() >= 400) {
Timber.d(">> grey file KO (%d) : %s", response.code(), url);
return false; // Better safe than sorry
}

ResponseBody body = response.body();
if (null == body) throw new IOException("Empty body");
Timber.d(">> grey file downloaded : %s", url);
Expand Down

0 comments on commit acd191b

Please sign in to comment.