Skip to content

Commit

Permalink
Version 1.3
Browse files Browse the repository at this point in the history
- [fix] Image search crawler filters out non-image results better
- [new] Crawler for results from magnetdl.com
- [new] Direct Reddit.com search, search for 'Top Posts' created in the past year
- [new] Added NSFW filter for Reddit results in config.default.php
- [new] YTS movie highlights now link to YTS website when clicking the title
- [new] Placeholder image for missing eztv highlight thumbnails
- [tweak] Better hash matching for duplicate magnet results
- [tweak] Better checking for missing/empty values in image search results
- [tweak] Code cleanup
- [tweak] More uniform code/variable names
- [change] Naming overhaul - Replaced 'Torrent' with 'Magnet' throughout most of Goosle
  • Loading branch information
adegans committed Apr 12, 2024
1 parent f7971ae commit aab57e1
Show file tree
Hide file tree
Showing 26 changed files with 366 additions and 455 deletions.
File renamed without changes
17 changes: 10 additions & 7 deletions config.default.php
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@
ENABLE IMAGE SEARCH:
Enable or disable image searches - Search results are provided by Yahoo! Images.
ENABLE TORRENT SEARCH:
Enable or disable searching for torrent downloads.
ENABLE MAGNET SEARCH:
Enable or disable searching for magnet links on torrent websites.
ENABLE SEARCH ENGINES:
Enable or disable search engines.
Expand Down Expand Up @@ -99,11 +99,11 @@
"download_count" = Most downloaded movies.
BLOCK 1337x CATEGORIES:
Add category IDs of 1337x categories, check /engines/torrent/1337x.php for a list of known categories.
Add category IDs of 1337x categories, check /engines/magnet/1337x.php for a list of known categories.
Accepts a basic numeric array, comma separated.
BLOCK PIRATEBAY CATEGORIES:
Add category IDs of Pirate Bay categories, check /engines/torrent/thepiratebay.php for a list of known categories.
Add category IDs of Pirate Bay categories, check /engines/magnet/thepiratebay.php for a list of known categories.
Accepts a basic numeric array, comma separated.
BLOCK YTS CATEGORIES:
Expand All @@ -123,7 +123,7 @@
Opera/Edge/Brave and many others use Chrome under the hood and are not a good pick for that reason.
Mobile agents may work, but some services like Wikipedia are a bit picky when it comes to answering API calls. Mobile users generally do not use APIs, so they may block your search.
TORRENT TRACKERS:
MAGNET TRACKERS:
Only used for The Pirate Bay, LimeTorrents and YTS.
Generally you do not need to change these.
These are added to the magnet links Goosle creates. You can add more or replace the existing ones if you know what you're doing.
Expand All @@ -138,16 +138,18 @@
"cache_time" => 30, // Default: 30 (Minutes)

"enable_image_search" => "on", // Default: on
"enable_torrent_search" => "on", // Default: on
"enable_magnet_search" => "on", // Default: on
"enable_duckduckgo" => "on", // Default: on
"enable_google" => "on", // Default: on
"enable_reddit" => "on", // Default: on
"enable_wikipedia" => "on", // Default: on
"enable_ecosia" => "off", // Default: on
// Site uses some kind of bot detector preventing crawler from working reliably since Feb 1, 2024, remove support in future release?)

"enable_limetorrents" => "on", // Default: on
"enable_piratebay" => "on", // Default: on
"enable_yts" => "on", // Default: on
"enable_magnetdl" => "on", // Default: on
"enable_nyaa" => "on", // Default: on
"enable_eztv" => "on", // Default: on
"enable_l33tx" => "off", // Default: off
Expand All @@ -156,6 +158,7 @@
"duckduckgo_language" => "uk-en", // Default: uk-en (United Kingdom)
"wikipedia_language" => "en", // Default: en (English)
"social_media_relevance" => 8, // Default: 8
"show_reddit_nsfw" => "on", // Default: on
"show_search_source" => "on", // Default: on
"show_search_rank" => "off", // Default: off
"imdb_id_search" => "off", // Default: off
Expand All @@ -182,7 +185,7 @@
"Mozilla/5.0 (X11; Linux i686) Gecko/20100101 Firefox/119.0", // Linux Generic, Firefox 119
),

"torrent_trackers" => array(
"magnet_trackers" => array(
"http://nyaa.tracker.wf:7777/announce",
"http://tracker.openbittorrent.com:80/announce",
"udp://tracker.opentrackr.org:1337/announce",
Expand Down
95 changes: 0 additions & 95 deletions engines/duckduckgo.php

This file was deleted.

114 changes: 0 additions & 114 deletions engines/google.php

This file was deleted.

43 changes: 28 additions & 15 deletions engines/image/yahoo.php
Original file line number Diff line number Diff line change
Expand Up @@ -60,35 +60,47 @@ public function parse_results($response) {
}

// Scrape the results
$scrape = $xpath->query("//li[contains(@class, 'ld') and not(contains(@class, 'slotting'))][position() < 101]");
// $scrape = $xpath->query("//li[contains(@class, 'ld') and not(contains(@class, 'slotting'))][position() < 101]");
$scrape = $xpath->query("//li[contains(@class, 'ld') and not(contains(@class, 'ignore'))][position() < 101]");
$rank = $results['amount'] = count($scrape);
foreach($scrape as $result) {
$image = $xpath->evaluate(".//img/@src", $result)[0];
if($image == null) continue;

$url_data = $xpath->evaluate(".//a/@href", $result)[0];
if($url_data == null) continue;

// Get meta data
// -- Relevant $url_data (there is more, but unused by Goosle)
foreach($scrape as $result) {
$image = $xpath->evaluate(".//img/@src", $result)[0];
if($image == null) continue;

$url_data = $xpath->evaluate(".//a/@href", $result)[0];
if($url_data == null) continue;

// Get and prepare meta data
// -- Relevant $url_data (there is more, but unused by Goosle)
// w = Image width (1280)
// h = Image height (720)
// imgurl = Actual full size image (Used in Yahoo preview/popup)
// rurl = Url to page where the image is used
// size = Image size (413.1KB)
// tt = Website title (Used for image alt text)
parse_str($url_data->textContent, $url_data);
foreach(explode("&", strstr($url_data->textContent, '?')) as &$meta) {
if(!is_null($meta) || !empty($meta)) {
$value = explode("=", trim($meta));

if(!empty($value[0]) && !empty($value[1])) {
$usable_data[$value[0]] = urldecode($value[1]);
}
}
unset($meta, $value);
}

// Deal with optional or missing data
$dimensions_w = (!array_key_exists('w', $url_data) || empty($url_data['w'])) ? "" : htmlspecialchars($url_data['w']);
$dimensions_h = (!array_key_exists('h', $url_data) || empty($url_data['h'])) ? "" : htmlspecialchars($url_data['h']);
$filesize = (!array_key_exists('size', $url_data) || empty($url_data['size'])) ? "" : htmlspecialchars($url_data['size']);
$link = (!array_key_exists('imgurl', $url_data) || empty($url_data['imgurl'])) ? "" : "//".htmlspecialchars($url_data['imgurl']);
$dimensions_w = (!array_key_exists('w', $usable_data)) ? "" : htmlspecialchars($usable_data['w']);
$dimensions_h = (!array_key_exists('h', $usable_data)) ? "" : htmlspecialchars($usable_data['h']);
$link = (!array_key_exists('imgurl', $usable_data)) ? "" : "//".htmlspecialchars($usable_data['imgurl']);
$url = (!array_key_exists('rurl', $usable_data)) ? "" : htmlspecialchars($usable_data['rurl']);
$filesize = (!array_key_exists('size', $usable_data)) ? "" : htmlspecialchars($usable_data['size']);
$alt = (!array_key_exists('tt', $usable_data)) ? "" : htmlspecialchars($usable_data['tt']);

// Process result
$image = htmlspecialchars($image->textContent);
$url = htmlspecialchars($url_data['rurl']);
$alt = htmlspecialchars($url_data['tt']);

// filter duplicate urls/results
if(!empty($results['search'])) {
Expand All @@ -100,6 +112,7 @@ public function parse_results($response) {

$results['search'][] = array ("id" => $id, "source" => "Yahoo! Images", "image" => $image, "alt" => $alt, "url" => $url, "width" => $dimensions_w, "height" => $dimensions_h, "filesize" => $filesize, "direct_link" => $link, "engine_rank" => $rank);
$rank -= 1;
unset($url_data, $usable_data, $dimensions_w, $dimensions_h, $filesize, $link, $url, $alt, $image);
}
unset($response, $xpath, $scrape, $rank);

Expand Down
13 changes: 4 additions & 9 deletions engines/torrent/1337x.php → engines/magnet/1337x.php
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ public function parse_results($response) {
foreach($xpath->query("//table/tbody/tr") as $result) {
$name = sanitize($xpath->evaluate(".//td[@class='coll-1 name']/a", $result)[1]->textContent);
$url = "https://1337x.to".sanitize($xpath->evaluate(".//td[@class='coll-1 name']/a/@href", $result)[1]->textContent);
$magnet = "./engines/torrent/magnetize_1337x.php?url=".$url;
$magnet = "./engines/magnet/magnetize_1337x.php?url=".$url;
$seeders = sanitize($xpath->evaluate(".//td[@class='coll-2 seeds']", $result)[0]->textContent);
$leechers = sanitize($xpath->evaluate(".//td[@class='coll-3 leeches']", $result)[0]->textContent);
$size_unformatted = explode(" ", sanitize($xpath->evaluate(".//td[contains(@class, 'coll-4 size')]", $result)[0]->textContent));
Expand All @@ -127,14 +127,9 @@ public function parse_results($response) {
// Block these categories
if(in_array($category, $this->opts->leetx_categories_blocked)) continue;

// Filter by Season (S01) or Season and Episode (S01E01)
// Where [0][0] = Season and [0][1] = Episode
if(preg_match_all("/(S[0-9]{1,3})|(E[0-9]{1,3})/i", $this->query, $query_episode) && preg_match_all("/(S[0-9]{1,3})|(E[0-9]{1,3})/i", $name, $match_episode)) {
if($query_episode[0][0] != $match_episode[0][0] || (array_key_exists(1, $query_episode[0]) && array_key_exists(1, $match_episode[0]) && $query_episode[0][1] != $match_episode[0][1])) {
continue;
}
}

// Filter episodes
if(!is_season_or_episode($this->query, $name)) continue;

$id = uniqid(rand(0, 9999));

$results[] = array (
Expand Down
2 changes: 1 addition & 1 deletion engines/torrent/eztv.php → engines/magnet/eztv.php
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ public function parse_results($response) {
foreach($json_response['torrents'] as $result) {
$name = sanitize($result['title']);
$magnet = sanitize($result['magnet_url']);
$hash = sanitize($result['hash']);
$hash = strtolower(sanitize($result['hash']));
$seeders = sanitize($result['seeds']);
$leechers = sanitize($result['peers']);
$size = sanitize($result['size_bytes']);
Expand Down
File renamed without changes.
Loading

0 comments on commit aab57e1

Please sign in to comment.