Skip to content

Commit

Permalink
Update site adder validation so it depends on the cURLed URL to close…
Browse files Browse the repository at this point in the history
… issue #22.
  • Loading branch information
Blake Bertuccelli committed Apr 19, 2022
1 parent 5c28ece commit 7306b07
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 53 deletions.
42 changes: 18 additions & 24 deletions actions/add_site.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,55 +11,49 @@
DB_NAME
);

// Valid URLs are required so that we can CURL.
$site_url = filter_input(INPUT_GET, 'url', FILTER_VALIDATE_URL);
// We don't validate the URLs here because cURL does
// a better job of validating/redirecting in the adders.
$site_url = $_GET['url'];
if($site_url == false)
throw new Exception('"'.$_GET['url'].'" is an invalid URL');
throw new Exception('URL is missing');

// We need to check the type since a user could manually
// update the URL string to something unsupported.
$type = $_GET['type'];
if( $type == false)
throw new Exception('Type is not specified for the URL "'.$site_url.'"');

// Requiring unique URLs minimizes unnessary scans.
if(!is_unique_page_url($db, $site_url))
throw new Exception('Page "'.$site_url.'" already exists');

// Static pages are treated as sites in themselves.
if($type == 'single_page' ){

// We build an adder so we can tell if the URL can be
// scaned.
single_page_adder($site_url);
// The adder cURLs the site to test if the URL can be scanned.
$curled_site = single_page_adder($site_url);

// Site URL changes to the curled URL.
$site_url = $curled_site['url'];

// Single pages are saved with the following pramenters
$type = 'single_page';
$status = 'active';
$site = $site_url;
$site = $curled_site['url'];
$is_parent = 1;
add_page($db, $url, $type, $status, $site, $is_parent );
add_page($db, $site_url, $type, $status, $site, $is_parent );

// WordPress and XML deals with adding pages similarly,
// so their functions are wrapped in one condition.
}elseif($type == 'wordpress' || $type == 'xml' ){

// WordPress API is queried to create sites.
if($type == 'wordpress' ){

// Lots of users don't include backslashes,
// which WordPress requirew to access the API.
if( !str_ends_with($site_url, '/') )
$site_url = $site_url.'/';

// WordPress adder can create lots of pages.
$pages = wordpress_site_adder($site_url);

}
if($type == 'wordpress' )
$curled_site = wordpress_site_adder($site_url);

// .XML adder can create lots of pages.
if($type == 'xml' )
$pages = xml_site_adder($site_url);
$curled_site = xml_site_adder($site_url);

// Both XML and WP deliver similar content.
$pages = $curled_site['contents'];
$site_url = $curled_site['url'];

// We're setting the status and adding pages here so we
// do not have to call the $db inside "models/adders.php",
Expand Down
86 changes: 66 additions & 20 deletions models/adders.php
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
<?php

/**
* Get Page Body
*/
function get_url_contents($site_url, $type = ''){
function run_curl($site_url, $type = ''){
$curl = curl_init($site_url);
curl_setopt($curl, CURLOPT_URL, $site_url);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, false);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($curl, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS);
curl_setopt($curl, CURLOPT_REDIR_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS);
curl_setopt($curl, CURLOPT_USERAGENT, 'Equalify');

// Restrict CURL to the type of what you want to add.
Expand All @@ -16,12 +20,40 @@ function get_url_contents($site_url, $type = ''){
if($type == 'xml')
curl_setopt($curl, CURLOPT_HTTPHEADER, array('Accept: application/xml'));

// Execute CURL or fallback.
// Execute CURL
$url_contents = curl_exec($curl);

// Add in DB info so we can see if URL is unique.
require_once '../config.php';
require_once 'db.php';
$db = connect(
DB_HOST,
DB_USERNAME,
DB_PASSWORD,
DB_NAME
);

// The curled URL is the URL we use as an ID.
$curled_url = curl_getinfo($curl, CURLINFO_EFFECTIVE_URL);

// We don't include added enpoints to the URL.
$json_endpoints = '/wp-json/wp/v2/pages?per_page=100';
$curled_url = str_replace($json_endpoints, '', $curled_url);

// Make sure URL is unique to minimize scans.
if(!is_unique_site($db, $curled_url))
throw new Exception('"'.$curled_url.'" already exists');

// Fallback if no contents exist.
if($url_contents == false)
throw new Exception('Contents of "'.$site_url.'" cannot be loaded');
throw new Exception('Contents of "'.$curled_url.'" cannot be loaded');
curl_close($curl);
return $url_contents;

// We use the curled URL as the unique ID.
return array(
'url' => $curled_url,
'contents' => $url_contents
);

}

Expand All @@ -30,29 +62,26 @@ function get_url_contents($site_url, $type = ''){
*/
function single_page_adder($site_url){

// Reformat URL for JSON request.
$json_url = $site_url.'wp-json/wp/v2/pages?per_page=100';

// Get URL contents so we can make sure URL
// can be scanned.
$url_contents = get_url_contents($site_url);
echo $url_contents;
die;
return run_curl($site_url);

}

/**
* WordPress Pages Adder
*/
function wordpress_site_adder($site_url){

// Reformat URL for JSON request.
$json_url = $site_url.'wp-json/wp/v2/pages?per_page=100';
// Add WP JSON URL endpoints for request.
$json_endpoints = '/wp-json/wp/v2/pages?per_page=100';
$json_url = $site_url.$json_endpoints;

// Get URL contents.
$url_contents = get_url_contents($json_url, 'wordpress');
$curled_site = run_curl($json_url, 'wordpress');

// Create JSON.
$wp_api_json = json_decode($url_contents, true);
$wp_api_json = json_decode($curled_site['contents'], true);
if(empty($wp_api_json[0]))
throw new Exception('The URL "'.$site_url.'" is not valid output');

Expand All @@ -61,7 +90,17 @@ function wordpress_site_adder($site_url){
foreach ($wp_api_json as $page):
array_push($pages, array('url' => $page['link']));
endforeach;
return $pages;

// Remove WP JSON endbpoints.
$clean_curled_url = str_replace($json_endpoints, '', $curled_site['url']);

// Reformat the curled contents to be an array we can
// work with.
return array(
'url' => $clean_curled_url,
'contents' => $pages
);

}

/**
Expand All @@ -70,14 +109,15 @@ function wordpress_site_adder($site_url){
function xml_site_adder($site_url){

// Get URL contents.
$url_contents = get_url_contents($site_url, 'xml');
$curled_site = run_curl($site_url, 'xml');

// Valid XML files are only allowed!
if(!str_starts_with($url_contents, '<?xml'))
throw new Exception('"'.$site_url.'" is not valid XML');
$xml_contents = $curled_site['contents'];
if(!str_starts_with($xml_contents, '<?xml'))
throw new Exception('"'.$curled_site['url'].'" is not a readable XML format');

// Convert XML to JSON, so we can use it later
$xml = simplexml_load_string($url_contents);
$xml = simplexml_load_string($xml_contents);
$json = json_encode($xml);
$json_entries = json_decode($json,TRUE);

Expand All @@ -86,6 +126,12 @@ function xml_site_adder($site_url){
foreach ($json_entries['url'] as $page):
array_push($pages, array('url' => $page['loc']));
endforeach;
return $pages;

// Reformat the curled contents to be an array we can
// work with.
return array(
'url' => $curled_site['url'],
'contents' => $pages
);

}
15 changes: 6 additions & 9 deletions models/db.php
Original file line number Diff line number Diff line change
Expand Up @@ -315,19 +315,16 @@ function get_column_names(mysqli $db, $table){


/**
* Is Unique Page URL
* Is Unique Site
*/
function is_unique_page_url(mysqli $db, $page_url){
function is_unique_site(mysqli $db, $site_url){

// Require unique URL
$sql = 'SELECT * FROM `pages` WHERE `site` = "'.$site_url.'"';

// We don't consider a page with a '/' a unique url
// so we will also search for them.
if( !str_ends_with($page_url, '/') )
$page_url_backslashed = $page_url.'/';

// Require unique URL
$sql = 'SELECT * FROM `pages` WHERE `url` = "'.$page_url.'"';
if(isset($page_url_backslashed))
$sql.= ' OR `url` = "'.$page_url_backslashed.'"';
$sql.= ' OR `site` = "'.$site_url.'/"';

$query = $db->query($sql);
if(mysqli_num_rows($query) > 0){
Expand Down

0 comments on commit 7306b07

Please sign in to comment.