-
Notifications
You must be signed in to change notification settings - Fork 11
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Doc/processor #141
base: main
Are you sure you want to change the base?
Doc/processor #141
Changes from 8 commits
29ed939
610aa18
9ff44e1
ac079d2
e4e7845
63c2ce5
5e74cfe
03d6372
d225842
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -33,12 +33,16 @@ | |
"symfony/mime": "^6.4 || ^7.0" | ||
}, | ||
"require-dev": { | ||
"ext-mbstring": "*", | ||
"async-aws/s3": "^2.6", | ||
"friendsofphp/php-cs-fixer": "^3.41", | ||
"league/flysystem": "^3.29", | ||
"league/flysystem-bundle": "^3.3", | ||
Comment on lines
+39
to
+40
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. might be worth to lower the requirement |
||
"phpstan/extension-installer": "^1.3", | ||
"phpstan/phpstan": "^1.10", | ||
"phpstan/phpstan-symfony": "^1.3", | ||
"phpunit/phpunit": "^10.4", | ||
"shipmonk/composer-dependency-analyser": "^1.7", | ||
"shipmonk/composer-dependency-analyser": "^1.8", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. needed to ignore |
||
"symfony/framework-bundle": "^6.4 || ^7.0", | ||
"symfony/http-client": "^6.4 || ^7.0", | ||
"symfony/monolog-bundle": "^3.10", | ||
|
@@ -56,7 +60,9 @@ | |
}, | ||
"suggest": { | ||
"symfony/monolog-bundle": "Enables logging througout the generating process.", | ||
"symfony/twig-bundle": "Allows you to use Twig to render templates into PDF", | ||
"monolog/monolog": "Enables logging througout the generating process." | ||
"symfony/twig-bundle": "Allows you to use Twig to render templates into PDF.", | ||
"monolog/monolog": "Enables logging througout the generating process.", | ||
"async-aws/s3": "Upload any file to aws s3 compatible endpoints supporting multi part upload without memory overhead.", | ||
"league/flysystem-bundle": "Upload any file using this filesystem abstraction package." | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
# Processing | ||
|
||
Let's say you want to save the PDF or Screenshot as a file, you will need to use a `Sensiolabs\GotenbergBundle\Processor\ProcessorInterface`. | ||
To avoid loading the whole file content in memory you can stream it to the browser. | ||
|
||
You can also hook on the stream and save the file chunk by chunk. To do so we leverage the [`->stream`](https://symfony.com/doc/current/http_client.html#streaming-responses) method from the HttpClientInterface and use a powerful feature from PHP Generators : [`->send`](https://www.php.net/manual/en/generator.send.php). | ||
|
||
## Using FileProcessor | ||
|
||
Useful if you want to store the file in the local filesystem. | ||
Example when generating a PDF : | ||
|
||
```php | ||
use Sensiolabs\GotenbergBundle\GotenbergPdfInterface; | ||
use Sensiolabs\GotenbergBundle\Processor\FileProcessor; | ||
use Symfony\Component\DependencyInjection\Attribute\Autowire; | ||
use Symfony\Component\Filesystem\Filesystem; | ||
use Symfony\Component\HttpFoundation\Response; | ||
|
||
#[Route(path: '/my-pdf', name: 'my_pdf')] | ||
public function pdf( | ||
GotenbergPdfInterface $gotenbergPdf, | ||
Filesystem $filesystem, | ||
|
||
#[Autowire('%kernel.project_dir%/var/pdf')] | ||
string $pdfStorage, | ||
): Response { | ||
return $gotenbergPdf->html() | ||
// | ||
->fileName('my_pdf') | ||
->processor(new FileProcessor( | ||
$filesystem, | ||
$pdfStorage, | ||
)) | ||
->generate() | ||
->stream() | ||
; | ||
} | ||
``` | ||
|
||
This will save the file under `%kernel.project_dir%/var/pdf/my_pdf.pdf` once the file has been fully streamed to the browser. | ||
If you are not streaming to a browser, you can still process the file using the `process` method instead of `stream` : | ||
|
||
```php | ||
use Sensiolabs\GotenbergBundle\GotenbergPdfInterface; | ||
use Sensiolabs\GotenbergBundle\Processor\FileProcessor; | ||
use Symfony\Component\Filesystem\Filesystem; | ||
|
||
class SomeService | ||
{ | ||
public function __construct(private readonly GotenbergPdfInterface $gotenbergPdf) {} | ||
|
||
public function pdf(): \SplFileInfo | ||
{ | ||
return $this->gotenbergPdf->html() | ||
// | ||
->fileName('my_pdf') | ||
->processor(new FileProcessor( | ||
new Filesystem(), | ||
$this->getParameter('kernel.project_dir').'/var/pdf', | ||
)) | ||
->generate() | ||
->process() | ||
; | ||
} | ||
} | ||
``` | ||
|
||
This will return a `SplFileInfo` of the generated file stored at `%kernel.project_dir%/var/pdf/my_pdf.pdf`. | ||
|
||
## Other processors | ||
|
||
* `Sensiolabs\GotenbergBundle\Processor\AsyncAwsProcessor` : Upload using the `async-aws/s3` package. Uploads using the (multipart upload)[https://docs.aws.amazon.com/AmazonS3/latest/userguide/mpuoverview.html] feature of S3. Returns a `AsyncAws\S3\Result\CompleteMultipartUploadOutput` object. | ||
StevenRenaux marked this conversation as resolved.
Show resolved
Hide resolved
|
||
* `Sensiolabs\GotenbergBundle\Processor\FlysystemProcessor` : Upload using the `league/flysystem-bundle` package. Returns a `callable`. This callable will return the uploaded content. | ||
* `Sensiolabs\GotenbergBundle\Processor\ChainProcessor` : Apply multiple processors. Each chunk will be sent to each processor sequentially. Return an array of vaues returned by chained processors. | ||
* `Sensiolabs\GotenbergBundle\Processor\NullProcessor` : Empty processor. Does nothing. Returns `null`. | ||
* `Sensiolabs\GotenbergBundle\Processor\TempfileProcessor` : Creates a temporary file and dump all chunks into it. Return a `ressource` of said `tmpfile()`. | ||
|
||
## Custom processor | ||
|
||
A custom processor must implement `Sensiolabs\GotenbergBundle\Processor\ProcessorInterface` which require that your `__invoke` method is a `\Generator`. To receive a chunk you must assign `yield` to a variable like so : `$chunk = yield`. | ||
|
||
The basic needed code is the following : | ||
|
||
```php | ||
do { | ||
$chunk = yield; | ||
// do something with it | ||
} while (!$chunk->isLast()); | ||
``` | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should also explain that the returned value of the processor will be returned by the
StevenRenaux marked this conversation as resolved.
Show resolved
Hide resolved
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,118 @@ | ||
<?php | ||
|
||
namespace Sensiolabs\GotenbergBundle\Processor; | ||
|
||
use AsyncAws\S3\Result\CompleteMultipartUploadOutput; | ||
use AsyncAws\S3\S3Client; | ||
use Psr\Log\LoggerInterface; | ||
use Sensiolabs\GotenbergBundle\Exception\ProcessorException; | ||
|
||
/** | ||
* TODO : Might be worth adding "MultiPart" to the name as not all services supports the multi part upload. | ||
* | ||
* @implements ProcessorInterface<CompleteMultipartUploadOutput> | ||
*/ | ||
final class AsyncAwsProcessor implements ProcessorInterface | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IMHO, specific processors like AWS or Flysystem must be in a separated bridge to avoid adding dev-dependencies. It could block the update of GotenbergBundle to Symfony x.y if, for example, FlysystemBundle does not support it yet. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. but then they would have to require it just for one class potentially ? Seems a bit too much don't you think ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. IMO such classes must be fixed on package version instead of framework version. So maybe rename it to include more clearly package name and version of said package ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK for a To avoid developer to install both Flysystem and AsyncAWS on their local project, we can install these dev dependencies only in the CI (if tests need them). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. they are dev dependencies already so they are not installed on developper projects. Could be move to suggest only if needed and manually required in test env for CI There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not. We should also not rely on the bundle but just the FlySystem lib |
||
{ | ||
private const MIN_PART_SIZE = 5 * 1024 * 1024; | ||
|
||
public function __construct( | ||
private S3Client $s3Client, | ||
private string $bucketName, | ||
private readonly LoggerInterface|null $logger = null, | ||
) { | ||
} | ||
|
||
public function __invoke(string|null $fileName): \Generator | ||
{ | ||
if (null === $fileName) { | ||
$fileName = uniqid('gotenberg_', true); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe use Filesystem here ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This will be droped soon I think. I don't see a reason filename should be null. I went along with it in this PR but it will change. Otherwise yes filesystem could be a lead. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah because of the |
||
$this->logger?->debug('{processor}: no filename given. Content will be dumped to "{file}".', ['processor' => self::class, 'file' => $fileName]); | ||
} | ||
|
||
$this->logger?->debug('{processor}: starting multi part upload of "{file}".', ['processor' => self::class, 'file' => $fileName]); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. private readonly LoggerInterface $logger = new NullLogger() ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why ? This works well already. What kind of gain would we have ? |
||
$multipart = $this->s3Client->createMultipartUpload([ | ||
'Bucket' => $this->bucketName, | ||
'Key' => $fileName, | ||
]); | ||
|
||
$uploadId = $multipart->getUploadId(); | ||
if (null === $uploadId) { | ||
throw new ProcessorException('Could not initiate a multi part upload'); | ||
} | ||
|
||
$uploads = []; | ||
|
||
$partNumber = 0; | ||
$currentChunk = ''; | ||
|
||
try { | ||
do { | ||
$chunk = yield; | ||
|
||
$currentChunk .= $chunk->getContent(); | ||
|
||
if (mb_strlen($currentChunk, '8bit') < self::MIN_PART_SIZE) { | ||
continue; | ||
} | ||
|
||
++$partNumber; | ||
|
||
$this->logger?->debug('{processor}: {min_size_required} reached. Uploading part {upload_part_number}', ['processor' => self::class, 'min_size_required' => self::MIN_PART_SIZE, 'upload_part_number' => $partNumber]); | ||
$upload = $this->s3Client->uploadPart([ | ||
'Bucket' => $this->bucketName, | ||
'Key' => $fileName, | ||
'Body' => $currentChunk, | ||
'PartNumber' => $partNumber, | ||
'UploadId' => $uploadId, | ||
]); | ||
|
||
$uploads[] = [ | ||
'PartNumber' => $partNumber, | ||
'ETag' => $upload->getEtag(), | ||
]; | ||
|
||
$currentChunk = ''; | ||
} while (!$chunk->isLast()); | ||
|
||
if ('' !== $currentChunk) { | ||
++$partNumber; | ||
|
||
$this->logger?->debug('{processor}: last chunk reached. Uploading leftover part {upload_part_number}', ['processor' => self::class, 'upload_part_number' => $partNumber]); | ||
$upload = $this->s3Client->uploadPart([ | ||
'Bucket' => $this->bucketName, | ||
'Key' => $fileName, | ||
'Body' => $currentChunk, | ||
'PartNumber' => $partNumber, | ||
'UploadId' => $uploadId, | ||
]); | ||
|
||
$uploads[] = [ | ||
'PartNumber' => $partNumber, | ||
'ETag' => $upload->getEtag(), | ||
]; | ||
} | ||
|
||
unset($currentChunk, $upload); | ||
|
||
$this->logger?->debug('{processor}: completing multi part upload of "{file}".', ['processor' => self::class, 'file' => $fileName]); | ||
|
||
return $this->s3Client->completeMultipartUpload([ | ||
'UploadId' => $uploadId, | ||
'Bucket' => $this->bucketName, | ||
'Key' => $fileName, | ||
'MultipartUpload' => [ | ||
'Parts' => $uploads, | ||
], | ||
]); | ||
} catch (\Throwable $e) { | ||
$this->s3Client->abortMultipartUpload([ | ||
'UploadId' => $uploadId, | ||
'Bucket' => $this->bucketName, | ||
'Key' => $fileName, | ||
]); | ||
|
||
throw $e; | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
<?php | ||
|
||
namespace Sensiolabs\GotenbergBundle\Processor; | ||
|
||
use League\Flysystem\FilesystemOperator; | ||
use Psr\Log\LoggerInterface; | ||
use Sensiolabs\GotenbergBundle\Exception\ProcessorException; | ||
|
||
/** | ||
* @implements ProcessorInterface<(Closure(): string)> | ||
*/ | ||
final class FlysystemProcessor implements ProcessorInterface | ||
{ | ||
public function __construct( | ||
private readonly FilesystemOperator $filesystemOperator, | ||
private readonly LoggerInterface|null $logger = null, | ||
) { | ||
} | ||
|
||
public function __invoke(string|null $fileName): \Generator | ||
{ | ||
if (null === $fileName) { | ||
$fileName = uniqid('gotenberg_', true); | ||
} | ||
|
||
$tmpfileProcessor = (new TempfileProcessor())($fileName); | ||
|
||
do { | ||
$chunk = yield; | ||
$tmpfileProcessor->send($chunk); | ||
} while (!$chunk->isLast()); | ||
|
||
$tmpfile = $tmpfileProcessor->getReturn(); | ||
|
||
try { | ||
$this->filesystemOperator->writeStream($fileName, $tmpfile); | ||
|
||
$this->logger?->debug('{processor}: content dumped to "{file}".', ['processor' => self::class, 'file' => $fileName]); | ||
|
||
return function () use ($fileName) { | ||
return $this->filesystemOperator->read($fileName); // use readStream instead ? | ||
}; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does not feel optimised There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it does nothing unless you can that callable. I was thinking of returning an anonymous class that implements Stringable and returns this call instead to make it easier to use. What do you suggest ? |
||
} catch (\Throwable $t) { | ||
throw new ProcessorException(\sprintf('Unable to write to "%s".', $fileName), previous : $t); | ||
} finally { | ||
fclose($tmpfile); | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
<?php | ||
|
||
namespace Sensiolabs\GotenbergBundle\Processor; | ||
|
||
use Sensiolabs\GotenbergBundle\Exception\ProcessorException; | ||
|
||
/** | ||
* @implements ProcessorInterface<resource> | ||
*/ | ||
final class TempfileProcessor implements ProcessorInterface | ||
{ | ||
public function __invoke(string|null $fileName): \Generator | ||
{ | ||
$resource = tmpfile() ?: throw new ProcessorException('Unable to create a temporary file resource.'); | ||
|
||
do { | ||
$chunk = yield; | ||
if (false === fwrite($resource, $chunk->getContent())) { | ||
throw new ProcessorException('Unable to write to the temporary file resource.'); | ||
} | ||
} while (!$chunk->isLast()); | ||
|
||
rewind($resource); | ||
|
||
return $resource; | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
might be worth to lower the requirement