Skip to content

Commit

Permalink
pkp#9911 Consider new URLs with language, and create canonical URLs i…
Browse files Browse the repository at this point in the history
…n ConvertApacheAccessLogFile CLI tool
  • Loading branch information
bozana committed Jul 19, 2024
1 parent 10423e0 commit 9907223
Show file tree
Hide file tree
Showing 2 changed files with 144 additions and 30 deletions.
99 changes: 78 additions & 21 deletions classes/cliTool/traits/ConvertLogFile.php
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,11 @@
use APP\statistics\StatisticsHelper;
use DateTime;
use Exception;
use PKP\config\Config;
use PKP\core\Core;
use PKP\core\Registry;
use PKP\db\DAORegistry;
use PKP\facades\Locale;
use PKP\file\FileManager;
use PKP\submission\Genre;

Expand Down Expand Up @@ -169,7 +171,6 @@ public function convert(string $fileName): void
}

$newEntry['userAgent'] = $entryData['userAgent'];
$newEntry['canonicalUrl'] = $entryData['url'];

[
'workingAssocType' => $assocType,
Expand All @@ -188,13 +189,20 @@ public function convert(string $fileName): void
$context = $this->contextsByPath[$foundContextPath];
$newEntry['contextId'] = $context->getId();

$this->setAssoc($assocType, $op, $args, $newEntry);
$this->setAssoc($assocType, $page, $op, $args, $newEntry);
if (!array_key_exists('assocType', $newEntry)) {
if (!$this->isApacheAccessLogFile()) {
fwrite(STDERR, "The URL {$entryData['url']} in the line number {$lineNumber} was not considered." . PHP_EOL);
}
continue;
}

$canonicalUrl = $entryData['url']; // if this is not the apache log file i.e. it is the internal log file, the URLs are already canonical
if ($this->isApacheAccessLogFile()) {
$canonicalUrl = $this->getCanonicalUrl($foundContextPath, $newEntry['canonicalUrlPage'], $newEntry['canonicalUrlOp'], $newEntry['canonicalUrlArgs'] ?? null);
unset($newEntry['canonicalUrlPage'], $newEntry['canonicalUrlOp'], $newEntry['canonicalUrlArgs']);
}
$newEntry['canonicalUrl'] = $canonicalUrl;
} else {
continue;
}
Expand Down Expand Up @@ -404,7 +412,6 @@ protected function getExpectedPageAndOp(): array
Application::ASSOC_TYPE_ISSUE_GALLEY => [
'issue/download']
];
$pageAndOp[Application::getContextAssocType()][] = 'index';
break;
case 'omp':
// Before 3.4 OMP did not have chapter assoc type i.e. chapter landing page
Expand All @@ -426,7 +433,6 @@ protected function getExpectedPageAndOp(): array
Application::ASSOC_TYPE_SUBMISSION => [
'preprint/view']
];
$pageAndOp[Application::getContextAssocType()][] = 'index';
break;
default:
throw new Exception('Unrecognized application name.');
Expand Down Expand Up @@ -476,21 +482,21 @@ protected static function getContextPaths(string $urlInfo, bool $isPathInfo): ar
* the passed url information. It expects that urls
* were built using the system.
*/
protected static function getPage(string $urlInfo, bool $isPathInfo): string
protected static function getPage(string $urlInfo, bool $isPathInfo, array $userVars = []): string
{
$page = self::getUrlComponents($urlInfo, $isPathInfo, 0, 'page');
return Core::cleanFileVar(is_null($page) ? '' : $page);
$page = self::getUrlComponents($urlInfo, $isPathInfo, self::getOffset($urlInfo, $isPathInfo, 0), 'page', $userVars);
return Core::cleanFileVar($page ?? '');
}

/**
* Get the operation present into
* the passed url information. It expects that urls
* were built using the system.
*/
protected static function getOp(string $urlInfo, bool $isPathInfo): string
protected static function getOp(string $urlInfo, bool $isPathInfo, array $userVars = []): string
{
$operation = self::getUrlComponents($urlInfo, $isPathInfo, 1, 'op');
return Core::cleanFileVar(empty($operation) ? 'index' : $operation);
$operation = self::getUrlComponents($urlInfo, $isPathInfo, self::getOffset($urlInfo, $isPathInfo, 1), 'op', $userVars);
return Core::cleanFileVar($operation ?: 'index');
}

/**
Expand All @@ -499,16 +505,34 @@ protected static function getOp(string $urlInfo, bool $isPathInfo): string
* only arguments appended to the URL separated by "/").
* It expects that urls were built using the system.
*/
protected static function getArgs(string $urlInfo, bool $isPathInfo): array
protected static function getArgs(string $urlInfo, bool $isPathInfo, array $userVars = []): array
{
return self::getUrlComponents($urlInfo, $isPathInfo, 2, 'path');
return self::getUrlComponents($urlInfo, $isPathInfo, self::getOffset($urlInfo, $isPathInfo, 2), 'path', $userVars);
}

/**
* Get offset. Add 1 extra if localization present in URL
*/
private static function getOffset(string $urlInfo, bool $isPathInfo, int $varOffset): int
{
return $varOffset + (int) !!self::getLocalization($urlInfo, $isPathInfo);
}

/**
* Get localization path present into the passed
* url information.
*/
public static function getLocalization(string $urlInfo, bool $isPathInfo): string
{
$locale = self::getUrlComponents($urlInfo, $isPathInfo, 0);
return Locale::isLocaleValid($locale) ? $locale : '';
}

/**
* Get url components (page, operation and args)
* based on the passed offset.
*/
protected static function getUrlComponents(string $urlInfo, bool $isPathInfo, int $offset, string $varName = ''): mixed
protected static function getUrlComponents(string $urlInfo, bool $isPathInfo, int $offset, string $varName = '', array $userVars = []): array|string|null
{
$component = null;

Expand All @@ -517,7 +541,6 @@ protected static function getUrlComponents(string $urlInfo, bool $isPathInfo, in
$isArrayComponent = true;
}
if ($isPathInfo) {
$application = Application::get();
$contextDepth = 1; // Was $application->getContextDepth();

$vars = explode('/', trim($urlInfo, '/'));
Expand All @@ -544,22 +567,56 @@ protected static function getUrlComponents(string $urlInfo, bool $isPathInfo, in
return $component;
}

/**
* Construct the URL from context path, page, op, and params
*/
protected function getCanonicalUrl(string $contextPath, string $canonicalUrlPage, string $canonicalUrlOp, array $canonicalUrlArgs = null): string
{
$canonicalUrl = Application::get()->getDispatcher()->url(
Application::get()->getRequest(),
Application::ROUTE_PAGE,
$contextPath,
$canonicalUrlPage,
$canonicalUrlOp,
$canonicalUrlArgs,
urlLocaleForPage: ''
);

// Make sure we log the server name and not aliases.
$configBaseUrl = Config::getVar('general', 'base_url');
$requestBaseUrl = Application::get()->getRequest()->getBaseUrl();
if ($requestBaseUrl !== $configBaseUrl) {
// Make sure it's not an url override (no alias on that case).
if (!in_array($requestBaseUrl, Config::getContextBaseUrls()) &&
$requestBaseUrl !== Config::getVar('general', 'base_url[index]')) {
// Alias found, replace it by base_url from config file.
// Make sure we use the correct base url override value for the context, if any.
$baseUrlReplacement = Config::getVar('general', 'base_url[' . $contextPath . ']');
if (!$baseUrlReplacement) {
$baseUrlReplacement = $configBaseUrl;
}
$canonicalUrl = str_replace($requestBaseUrl, $baseUrlReplacement, $canonicalUrl);
}
}
return $canonicalUrl;
}

/**
* Set assoc type and IDs from the passed page, operation and arguments.
*/
protected function setAssoc(int $assocType, string $op, array $args, array &$newEntry): void
protected function setAssoc(int $assocType, string $page, string $op, array $args, array &$newEntry): void
{
$application = Application::get();
$applicationName = $application->getName();
switch ($applicationName) {
case 'ojs2':
$this->setOJSAssoc($assocType, $args, $newEntry);
$this->setOJSAssoc($assocType, $page, $op, $args, $newEntry);
break;
case 'omp':
$this->setOMPAssoc($assocType, $args, $newEntry);
$this->setOMPAssoc($assocType, $page, $op, $args, $newEntry);
break;
case 'ops':
$this->setOPSAssoc($assocType, $args, $newEntry);
$this->setOPSAssoc($assocType, $page, $op, $args, $newEntry);
break;
default:
throw new Exception('Unrecognized application name!');
Expand All @@ -570,7 +627,7 @@ protected function setAssoc(int $assocType, string $op, array $args, array &$new
* Set assoc type and IDs from the passed page, operation and
* arguments specific to OJS.
*/
protected function setOJSAssoc(int $assocType, array $args, array &$newEntry): void
protected function setOJSAssoc(int $assocType, string $page, string $op, array $args, array &$newEntry): void
{
switch ($assocType) {
case Application::getContextAssocType():
Expand Down Expand Up @@ -813,7 +870,7 @@ protected function setOJSAssoc(int $assocType, array $args, array &$newEntry): v
* Set assoc type and IDs from the passed page, operation and
* arguments specific to OMP.
*/
protected function setOMPAssoc(int $assocType, array $args, array &$newEntry): void
protected function setOMPAssoc(int $assocType, string $page, string $op, array $args, array &$newEntry): void
{
switch ($assocType) {
case Application::getContextAssocType():
Expand Down Expand Up @@ -966,7 +1023,7 @@ protected function setOMPAssoc(int $assocType, array $args, array &$newEntry): v
* Set assoc type and IDs from the passed page, operation and
* arguments specific to OPS.
*/
protected function setOPSAssoc(int $assocType, array $args, array &$newEntry): void
protected function setOPSAssoc(int $assocType, string $page, string $op, array $args, array &$newEntry): void
{
switch ($assocType) {
case Application::getContextAssocType():
Expand Down
Loading

0 comments on commit 9907223

Please sign in to comment.