-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add a new layer of performance by allowing developers to cache OCR'ed…
… files
- Loading branch information
Showing
9 changed files
with
699 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
<?php | ||
|
||
/** | ||
* @file | ||
* Install, update and uninstall fn for the Entity to Text Tika sub-module. | ||
*/ | ||
|
||
/** | ||
* Implements hook_requirements(). | ||
*/ | ||
function entity_to_text_tika_requirements($phase = 'runtime') { | ||
$requirements = []; | ||
$stream_wrapper_manager = \Drupal::service('stream_wrapper_manager'); | ||
$file_system = \Drupal::service('file_system'); | ||
|
||
/* ************************************************************************ */ | ||
// Check private file directory. | ||
/* ************************************************************************ */ | ||
|
||
if ($phase !== 'runtime') { | ||
return $requirements; | ||
} | ||
|
||
$requirements['entity_to_text_tika_private'] = [ | ||
'title' => t('Entity to Text (Tika): Private schema'), | ||
'value' => t('Private file system is set and writtable.'), | ||
]; | ||
|
||
// Check if the private file stream wrapper is ready to use. | ||
if (!$stream_wrapper_manager->isValidScheme('private')) { | ||
$requirements['entity_to_text_tika_private']['value'] = 'Private file system is not set.'; | ||
$requirements['entity_to_text_tika_private']['description'] = t('Entity to Text Tika will store OCR of document in the private:// schema, there this one must be configured before installing.'); | ||
$requirements['entity_to_text_tika_private']['severity'] = REQUIREMENT_ERROR; | ||
} | ||
|
||
$private_path = $file_system->realpath('private://'); | ||
// Check if the private file stream wrapper is ready to use. | ||
if (!is_dir($private_path) || !is_writable($private_path)) { | ||
$requirements['entity_to_text_tika_private']['value'] = 'Private file system is not writtable.'; | ||
$requirements['entity_to_text_tika_private']['description'] = t('The resolved private directory %directory% seems not writable.', ['%directory%' => $private_path]); | ||
$requirements['entity_to_text_tika_private']['severity'] = REQUIREMENT_ERROR; | ||
} | ||
|
||
return $requirements; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
126 changes: 126 additions & 0 deletions
126
modules/entity_to_text_tika/src/Storage/PlaintextStorage.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
<?php | ||
|
||
namespace Drupal\entity_to_text_tika\Storage; | ||
|
||
use Drupal\Core\File\FileSystemInterface; | ||
use Drupal\Core\Logger\LoggerChannelFactoryInterface; | ||
use Drupal\Core\StreamWrapper\StreamWrapperManager; | ||
use Drupal\Core\StreamWrapper\StreamWrapperManagerInterface; | ||
use Drupal\file\Entity\File; | ||
|
||
/** | ||
* Provide Capabilities to store a Text content to plain-text file. | ||
*/ | ||
class PlaintextStorage { | ||
|
||
public const DESTINATION = 'private://entity-to-text/ocr'; | ||
|
||
/** | ||
* The file system service. | ||
* | ||
* @var \Drupal\Core\File\FileSystemInterface | ||
*/ | ||
protected $fileSystem; | ||
|
||
/** | ||
* The logger service. | ||
* | ||
* @var \Drupal\Core\Logger\LoggerChannelInterface | ||
*/ | ||
protected $logger; | ||
|
||
/** | ||
* The stream wrapper manager. | ||
* | ||
* @var \Drupal\Core\StreamWrapper\StreamWrapperManagerInterface | ||
*/ | ||
protected $streamWrapperManager; | ||
|
||
/** | ||
* Construct a new PlaintextStorage object. | ||
*/ | ||
public function __construct(FileSystemInterface $file_system, LoggerChannelFactoryInterface $logger_factory, StreamWrapperManagerInterface $stream_wrapper_manager) { | ||
$this->fileSystem = $file_system; | ||
$this->logger = $logger_factory->get('entity_to_text_tika'); | ||
$this->streamWrapperManager = $stream_wrapper_manager; | ||
} | ||
|
||
/** | ||
* Store a plain text value into a file. | ||
* | ||
* @param \Drupal\file\Entity\File $file | ||
* The document. | ||
* @param string $langcode | ||
* The OCR langcode to be used. | ||
* | ||
* @return string|null | ||
* The transformed file into a plain text value by Apache Tika. | ||
*/ | ||
public function loadTextFromFile(File $file, string $langcode = 'eng'): ?string { | ||
$fullpath = $this->getFullPath($file, $langcode); | ||
|
||
if (!is_file($fullpath)) { | ||
return NULL; | ||
} | ||
|
||
return file_get_contents($fullpath); | ||
} | ||
|
||
/** | ||
* Store a plain text value into a file. | ||
* | ||
* @param \Drupal\file\Entity\File $file | ||
* The document to be saved. | ||
* @param string $content | ||
* The plain-text document to be stored. | ||
* @param string $langcode | ||
* The langcode. | ||
* | ||
* @return string | ||
* The saved fullpath file. | ||
*/ | ||
public function saveTextToFile(File $file, string $content, string $langcode = 'eng'): string { | ||
$fullpath = $this->getFullPath($file, $langcode); | ||
file_put_contents($fullpath, $content); | ||
return $fullpath; | ||
} | ||
|
||
/** | ||
* Get a normalized fullpath for a given file and langcode. | ||
* | ||
* @param \Drupal\file\Entity\File $file | ||
* The document. | ||
* @param string $langcode | ||
* The langcode. | ||
* | ||
* @return string | ||
* The given file unique fullpath. | ||
*/ | ||
private function getFullPath(File $file, string $langcode = 'eng'): string { | ||
$this->prepareDestination(); | ||
|
||
$uri = self::DESTINATION; | ||
$filename = $file->id() . '-' . $file->getFilename() . '.' . $langcode . '.ocr.txt'; | ||
|
||
$scheme = StreamWrapperManager::getScheme($uri); | ||
if (!$this->streamWrapperManager->isValidScheme($scheme)) { | ||
throw new \RuntimeException('The destination path is not a valid stream wrapper.'); | ||
} | ||
|
||
$path = $this->fileSystem->realpath($uri); | ||
if (!$path) { | ||
throw new \RuntimeException(sprintf('The resolved realpath from uri "%s" is not a valid directory.', $uri)); | ||
} | ||
|
||
return $path . '/' . $filename; | ||
} | ||
|
||
/** | ||
* Ensure the destination directory is ready to use. | ||
*/ | ||
private function prepareDestination(): void { | ||
$dest = self::DESTINATION; | ||
$this->fileSystem->prepareDirectory($dest, FileSystemInterface::CREATE_DIRECTORY); | ||
} | ||
|
||
} |
64 changes: 64 additions & 0 deletions
64
modules/entity_to_text_tika/tests/src/Functional/InstallUninstallTest.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
<?php | ||
|
||
namespace Drupal\Tests\entity_to_text_tika\Functional; | ||
|
||
use Drupal\Tests\system\Functional\Module\ModuleTestBase; | ||
|
||
/** | ||
* Tests install / uninstall of module. | ||
* | ||
* @group entity_to_text | ||
* @group entity_to_text_tika | ||
* @group entity_to_text_tika_functional | ||
*/ | ||
class InstallUninstallTest extends ModuleTestBase { | ||
|
||
/** | ||
* {@inheritdoc} | ||
*/ | ||
protected $defaultTheme = 'starterkit_theme'; | ||
|
||
/** | ||
* Ensure module can be installed. | ||
*/ | ||
public function testInstall(): void { | ||
// Makes sure the base module is installed. | ||
$this->container->get('module_installer')->install(['entity_to_text']); | ||
// Makes sure the sub-module is not already installed. | ||
$this->assertModules(['entity_to_text_tika'], FALSE); | ||
|
||
// Attempt to install the module. | ||
$edit = []; | ||
$edit['modules[entity_to_text][enable]'] = 'entity_to_text'; | ||
$edit['modules[entity_to_text_tika][enable]'] = 'entity_to_text_tika'; | ||
$this->drupalGet('admin/modules'); | ||
$this->submitForm($edit, 'Install'); | ||
|
||
$this->assertSession()->pageTextContains('Module Entity to Text - Tika has been enabled.'); | ||
|
||
// Makes sure the module has been installed. | ||
$this->assertModules(['entity_to_text_tika'], TRUE); | ||
} | ||
|
||
/** | ||
* Ensure module can be uninstalled. | ||
*/ | ||
public function testUninstall(): void { | ||
// Makes sure the base module is installed. | ||
$this->container->get('module_installer')->install(['entity_to_text']); | ||
// Makes sure the sub-module is installed. | ||
$this->container->get('module_installer')->install(['entity_to_text_tika']); | ||
|
||
// Attempt to uninstall the factory_lollipop module. | ||
$edit['uninstall[entity_to_text_tika]'] = TRUE; | ||
$this->drupalGet('admin/modules/uninstall'); | ||
$this->submitForm($edit, 'Uninstall'); | ||
// Confirm uninstall. | ||
$this->submitForm([], 'Uninstall'); | ||
$this->assertSession()->responseContains('The selected modules have been uninstalled.'); | ||
|
||
// Makes sure the module has been uninstalled. | ||
$this->assertModules(['entity_to_text_tika'], FALSE); | ||
} | ||
|
||
} |
57 changes: 57 additions & 0 deletions
57
modules/entity_to_text_tika/tests/src/Functional/RequirementsTest.php
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
<?php | ||
|
||
namespace Drupal\Tests\entity_to_text_tika\Functional; | ||
|
||
use Drupal\Tests\BrowserTestBase; | ||
|
||
/** | ||
* Tests module requirements. | ||
* | ||
* @group entity_to_text | ||
* @group entity_to_text_tika | ||
* @group entity_to_text_tika_functional | ||
*/ | ||
class RequirementsTest extends BrowserTestBase { | ||
|
||
/** | ||
* {@inheritdoc} | ||
*/ | ||
protected $defaultTheme = 'starterkit_theme'; | ||
|
||
/** | ||
* {@inheritdoc} | ||
*/ | ||
protected static $modules = ['entity_to_text_tika']; | ||
|
||
/** | ||
* Admin user. | ||
* | ||
* @var \Drupal\user\UserInterface | ||
*/ | ||
protected $adminUser; | ||
|
||
/** | ||
* {@inheritdoc} | ||
*/ | ||
protected function setUp(): void { | ||
parent::setUp(); | ||
|
||
$this->adminUser = $this->drupalCreateUser([ | ||
'administer site configuration', | ||
]); | ||
} | ||
|
||
/** | ||
* Tests when private stream is configured the status acknowledge. | ||
*/ | ||
public function testStatusPageGood() { | ||
$this->drupalLogin($this->adminUser); | ||
|
||
$this->drupalGet('admin/reports/status'); | ||
$this->assertSession()->statusCodeEquals(200); | ||
|
||
$this->assertSession()->pageTextContains('Entity to Text (Tika): Private schema'); | ||
$this->assertSession()->pageTextContains('Private file system is set and writtable.'); | ||
} | ||
|
||
} |
Oops, something went wrong.