Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhancement/cache and retry request function #7

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .env.dist
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
GITHUB_TOKEN=
CACHE_PATH=/tmp/cache
7 changes: 7 additions & 0 deletions build.php
Original file line number Diff line number Diff line change
Expand Up @@ -29,24 +29,31 @@
->addOrganization('laminas', 'laminas')
);

$curlExec = new \App\CurlExec(
cachePath: \getenv('CACHE_PATH') ?: null,
);

[$latestPhpVersion, $latestPhpVersionReleaseDate] = $run(
'Última versão do PHP',
new App\Crawler\LatestPhpVersion(
'https://www.php.net/releases/active.php',
$curlExec,
)
);

$phpUsagePercentage = $run(
'Uso do PHP no w3techs.com',
new App\Crawler\PhpUsagePercentage(
'https://w3techs.com/technologies/history_overview/programming_language',
$curlExec,
)
);

[$wordpressCmsUsagePercentage, $wordpressTotalUsagePercentage] = $run(
'Uso do WordPress no w3techs.com',
new App\Crawler\WordpressUsagePercentage(
'https://w3techs.com/technologies/details/cm-wordpress',
$curlExec,
)
);

Expand Down
5 changes: 4 additions & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@
],
"minimum-stability": "dev",
"require": {
"php": "^8.3"
"php": "^8.3",
"ext-curl": "*",
"ext-dom": "*",
"ext-libxml": "*"
},
"require-dev": {
"squizlabs/php_codesniffer": "4.0.x-dev",
Expand Down
10 changes: 6 additions & 4 deletions src/Crawler/LatestPhpVersion.php
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,17 @@
use App\CurlExec;
use DateTimeImmutable;

class LatestPhpVersion
readonly class LatestPhpVersion
{
public function __construct(private readonly string $url)
{
public function __construct(
private string $url,
private CurlExec $curlExec,
) {
}

public function __invoke(): ?array
{
$json = (new CurlExec())->fetchAsString($this->url);
$json = $this->curlExec->fetchAsString($this->url);
$json = \json_decode($json, true);
$latestMajor = end($json);
$latestMinor = end($latestMajor);
Expand Down
10 changes: 6 additions & 4 deletions src/Crawler/PhpUsagePercentage.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,17 @@
use DOMDocument;
use DOMXPath;

class PhpUsagePercentage
readonly class PhpUsagePercentage
{
public function __construct(private readonly string $url)
{
public function __construct(
private string $url,
private CurlExec $curlExec
) {
}

public function __invoke(): ?float
{
$xpath = (new CurlExec())->fetchAsXpath($this->url);
$xpath = $this->curlExec->fetchAsXpath($this->url);

$languages = $xpath->query('//table[@class="hist"]/tr');
foreach ($languages as $language) {
Expand Down
10 changes: 6 additions & 4 deletions src/Crawler/WordpressUsagePercentage.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,17 @@
use DOMDocument;
use DOMXPath;

class WordpressUsagePercentage
readonly class WordpressUsagePercentage
{
public function __construct(private readonly string $url)
{
public function __construct(
private string $url,
private CurlExec $curlExec
) {
}

public function __invoke(): ?array
{
$xpath = (new CurlExec())->fetchAsXpath($this->url);
$xpath = $this->curlExec->fetchAsXpath($this->url);

$languages = $xpath->query('//p[@class="surv"]');
foreach ($languages as $language) {
Expand Down
65 changes: 58 additions & 7 deletions src/CurlExec.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,58 @@
use DOMXPath;
use RuntimeException;

class CurlExec
readonly class CurlExec
{
public function __construct(
private ?string $cachePath = null,
private int $maxRetries = 3,
private int $retryDelay = 5,
) {
if (($cachePath) && (!\is_dir($cachePath))) {
\mkdir($cachePath, 0755, true);
}
}

public function fetchAsString(string $url): string
{
if ($this->cachePath) {
$cacheFile = $this->getCacheFilePath($url);
if (\file_exists($cacheFile)) {
return \file_get_contents($cacheFile);
}
}

$attempt = 0;
while ($attempt < $this->maxRetries) {
$response = $this->makeRequest($url);

// Timeout
if ($response === false) {
\sleep($this->retryDelay);
$attempt++;
continue;
}

// Resposta verdadeira
if (isset($cacheFile)) {
\file_put_contents($cacheFile, $response);
}
return $response;
}

throw new RuntimeException("Falha ao realizar a requisição para URL: {$url}");
}

public function fetchAsXpath(string $url): DOMXPath
{
$htmlString = $this->fetchAsString($url);
\libxml_use_internal_errors(true);
$doc = new DOMDocument();
$doc->loadHTML($htmlString);
return new DOMXPath($doc);
}

protected function makeRequest(string $url): string|false
{
$curlHandle = \curl_init();
\curl_setopt_array($curlHandle, [
Expand All @@ -20,19 +69,21 @@ public function fetchAsString(string $url): string
]);

$response = \curl_exec($curlHandle);

if ($response === false) {
$errorCode = \curl_errno($curlHandle);
if ($errorCode === CURLE_OPERATION_TIMEDOUT) {
return false;
}
throw new RuntimeException(\curl_error($curlHandle));
}

return $response;
}

public function fetchAsXpath(string $url): DOMXPath
private function getCacheFilePath(string $url): string
{
$htmlString = $this->fetchAsString($url);
\libxml_use_internal_errors(true);
$doc = new DOMDocument();
$doc->loadHTML($htmlString);
return new DOMXPath($doc);
$fileName = \sha1($url) . '.html';
return $this->cachePath . DIRECTORY_SEPARATOR . $fileName;
}
}