diff --git a/actions/add_site.php b/actions/add_site.php index 4fec05cf..0e86bea5 100644 --- a/actions/add_site.php +++ b/actions/add_site.php @@ -11,10 +11,11 @@ DB_NAME ); -// Valid URLs are required so that we can CURL. -$site_url = filter_input(INPUT_GET, 'url', FILTER_VALIDATE_URL); +// We don't validate the URLs here because cURL does +// a better job of validating/redirecting in the adders. +$site_url = $_GET['url']; if($site_url == false) - throw new Exception('"'.$_GET['url'].'" is an invalid URL'); + throw new Exception('URL is missing'); // We need to check the type since a user could manually // update the URL string to something unsupported. @@ -22,44 +23,37 @@ if( $type == false) throw new Exception('Type is not specified for the URL "'.$site_url.'"'); -// Requiring unique URLs minimizes unnessary scans. -if(!is_unique_page_url($db, $site_url)) - throw new Exception('Page "'.$site_url.'" already exists'); - // Static pages are treated as sites in themselves. if($type == 'single_page' ){ - // We build an adder so we can tell if the URL can be - // scaned. - single_page_adder($site_url); + // The adder cURLs the site to test if the URL can be scanned. + $curled_site = single_page_adder($site_url); + + // Site URL changes to the curled URL. + $site_url = $curled_site['url']; // Single pages are saved with the following pramenters $type = 'single_page'; $status = 'active'; - $site = $site_url; + $site = $curled_site['url']; $is_parent = 1; - add_page($db, $url, $type, $status, $site, $is_parent ); + add_page($db, $site_url, $type, $status, $site, $is_parent ); // WordPress and XML deals with adding pages similarly, // so their functions are wrapped in one condition. }elseif($type == 'wordpress' || $type == 'xml' ){ // WordPress API is queried to create sites. - if($type == 'wordpress' ){ - - // Lots of users don't include backslashes, - // which WordPress requirew to access the API. - if( !str_ends_with($site_url, '/') ) - $site_url = $site_url.'/'; - - // WordPress adder can create lots of pages. - $pages = wordpress_site_adder($site_url); - - } + if($type == 'wordpress' ) + $curled_site = wordpress_site_adder($site_url); // .XML adder can create lots of pages. if($type == 'xml' ) - $pages = xml_site_adder($site_url); + $curled_site = xml_site_adder($site_url); + + // Both XML and WP deliver similar content. + $pages = $curled_site['contents']; + $site_url = $curled_site['url']; // We're setting the status and adding pages here so we // do not have to call the $db inside "models/adders.php", diff --git a/models/adders.php b/models/adders.php index d9b72c78..a497ea34 100644 --- a/models/adders.php +++ b/models/adders.php @@ -1,13 +1,17 @@ $curled_url, + 'contents' => $url_contents + ); } @@ -30,14 +62,10 @@ function get_url_contents($site_url, $type = ''){ */ function single_page_adder($site_url){ - // Reformat URL for JSON request. - $json_url = $site_url.'wp-json/wp/v2/pages?per_page=100'; - // Get URL contents so we can make sure URL // can be scanned. - $url_contents = get_url_contents($site_url); - echo $url_contents; - die; + return run_curl($site_url); + } /** @@ -45,14 +73,15 @@ function single_page_adder($site_url){ */ function wordpress_site_adder($site_url){ - // Reformat URL for JSON request. - $json_url = $site_url.'wp-json/wp/v2/pages?per_page=100'; + // Add WP JSON URL endpoints for request. + $json_endpoints = '/wp-json/wp/v2/pages?per_page=100'; + $json_url = $site_url.$json_endpoints; // Get URL contents. - $url_contents = get_url_contents($json_url, 'wordpress'); + $curled_site = run_curl($json_url, 'wordpress'); // Create JSON. - $wp_api_json = json_decode($url_contents, true); + $wp_api_json = json_decode($curled_site['contents'], true); if(empty($wp_api_json[0])) throw new Exception('The URL "'.$site_url.'" is not valid output'); @@ -61,7 +90,17 @@ function wordpress_site_adder($site_url){ foreach ($wp_api_json as $page): array_push($pages, array('url' => $page['link'])); endforeach; - return $pages; + + // Remove WP JSON endbpoints. + $clean_curled_url = str_replace($json_endpoints, '', $curled_site['url']); + + // Reformat the curled contents to be an array we can + // work with. + return array( + 'url' => $clean_curled_url, + 'contents' => $pages + ); + } /** @@ -70,14 +109,15 @@ function wordpress_site_adder($site_url){ function xml_site_adder($site_url){ // Get URL contents. - $url_contents = get_url_contents($site_url, 'xml'); + $curled_site = run_curl($site_url, 'xml'); // Valid XML files are only allowed! - if(!str_starts_with($url_contents, ' $page['loc'])); endforeach; - return $pages; + // Reformat the curled contents to be an array we can + // work with. + return array( + 'url' => $curled_site['url'], + 'contents' => $pages + ); + } \ No newline at end of file diff --git a/models/db.php b/models/db.php index 77916cd1..511a2e27 100644 --- a/models/db.php +++ b/models/db.php @@ -315,19 +315,16 @@ function get_column_names(mysqli $db, $table){ /** - * Is Unique Page URL + * Is Unique Site */ -function is_unique_page_url(mysqli $db, $page_url){ +function is_unique_site(mysqli $db, $site_url){ + + // Require unique URL + $sql = 'SELECT * FROM `pages` WHERE `site` = "'.$site_url.'"'; // We don't consider a page with a '/' a unique url // so we will also search for them. - if( !str_ends_with($page_url, '/') ) - $page_url_backslashed = $page_url.'/'; - - // Require unique URL - $sql = 'SELECT * FROM `pages` WHERE `url` = "'.$page_url.'"'; - if(isset($page_url_backslashed)) - $sql.= ' OR `url` = "'.$page_url_backslashed.'"'; + $sql.= ' OR `site` = "'.$site_url.'/"'; $query = $db->query($sql); if(mysqli_num_rows($query) > 0){