Skip to content

Commit

Permalink
Added file size limit
Browse files Browse the repository at this point in the history
  • Loading branch information
PaVeL-Ekt authored and zhelyabuzhsky committed Dec 8, 2016
1 parent 08d0597 commit 4c00349
Show file tree
Hide file tree
Showing 2 changed files with 95 additions and 31 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,15 @@ $ composer require zhelyabuzhsky/yii2-sitemap
'maxUrlsCountInFile' => 10000,
'sitemapDirectory' => 'frontend/web',
'optionalAttributes' => ['changefreq', 'lastmod', 'priority'],
'maxFileSize' => '10M',
],
```

where
* maxUrlsCountInFile - max count of urls in one sitemap file;
* sitemapDirectory - directory to place sitemap files;
* optionalAttributes - list of used optional attributes.
* optionalAttributes - list of used optional attributes;
* maxFileSize - maximal file size. Zero to work without limits. So you can specify the following abbreviations k - kilobytes and m - megabytes. By default 10m.

### Console action

Expand Down
122 changes: 92 additions & 30 deletions src/components/Sitemap.php
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,6 @@ class Sitemap extends Component
*/
protected $urlCount = 0;

/**
* Number of current sitemap file.
*
* @var int
*/
protected $filesCount = 0;

/**
* Array of data sources for sitemap generation.
*
Expand All @@ -72,6 +65,28 @@ class Sitemap extends Component
*/
protected $disallowUrls = [];

/**
* Maximal size of sitemap files.
* Default value: 10M
*
* @var int
*/
protected $maxFileSize = 10485760; // 10 * 1024 * 1024

/**
* Generated sitemap groups file count.
*
* @var int
*/
protected $fileIndex = 0;

/**
* List of generated files.
*
* @var string[]
*/
protected $generatedFiles = [];

/**
* Create index file sitemap.xml.
*/
Expand All @@ -81,8 +96,8 @@ protected function createIndexFile()
$this->handle = fopen($this->path, 'w');
fwrite(
$this->handle,
'<?xml version="1.0" encoding="UTF-8"?>' .
' <sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'
'<?xml version="1.0" encoding="UTF-8"?>' . PHP_EOL .
'<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'
);
$objDateTime = new \DateTime('NOW');
$lastmod = $objDateTime->format(\DateTime::W3C);
Expand All @@ -91,16 +106,17 @@ protected function createIndexFile()
if (isset(\Yii::$app->urlManager->baseUrl)) {
$baseUrl = \Yii::$app->urlManager->baseUrl;
}
for ($i = 1; $i <= $this->filesCount; $i++) {
foreach ($this->generatedFiles as $fileName) {
fwrite(
$this->handle,
'<sitemap>' .
" <loc>{$baseUrl}/sitemap{$i}.xml.gz</loc>" .
" <lastmod>{$lastmod}</lastmod>" .
PHP_EOL .
'<sitemap>' . PHP_EOL .
"\t" . '<loc>' . $baseUrl . '/' . $fileName . '.gz' . '</loc>' . PHP_EOL .
"\t" . '<lastmod>' . $lastmod . '</lastmod>' . PHP_EOL .
'</sitemap>'
);
}
fwrite($this->handle, '</sitemapindex>');
fwrite($this->handle, PHP_EOL . '</sitemapindex>');
fclose($this->handle);
$this->gzipFile();
}
Expand All @@ -116,7 +132,7 @@ protected function updateSitemaps()
}
// rename new files (without '_')
foreach (glob("{$this->sitemapDirectory}/_sitemap*.xml*") as $filePath) {
$newFilePath = dirname($filePath) . '/' . str_replace('_', '', basename($filePath));
$newFilePath = dirname($filePath) . '/' . substr(basename($filePath), 1);
rename($filePath, $newFilePath);
}
}
Expand All @@ -126,12 +142,17 @@ protected function updateSitemaps()
*/
protected function beginFile()
{
$this->filesCount++;
$this->path = "{$this->sitemapDirectory}/_sitemap{$this->filesCount}.xml";
++$this->fileIndex;
$this->urlCount = 0;

$fileName = 'sitemap' . $this->fileIndex . '.xml';
$this->path = $this->sitemapDirectory . '/_' . $fileName;
$this->generatedFiles[] = $fileName;

$this->handle = fopen($this->path, 'w');
fwrite(
$this->handle,
'<?xml version="1.0" encoding="UTF-8"?>' . "\n" .
'<?xml version="1.0" encoding="UTF-8"?>' . PHP_EOL .
'<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"' .
' xmlns:image="http://www.google.com/schemas/sitemap-image/1.1"' .
' xmlns:news="http://www.google.com/schemas/sitemap-news/0.9">'
Expand All @@ -143,7 +164,7 @@ protected function beginFile()
*/
protected function closeFile()
{
fwrite($this->handle, "\n" . '</urlset>');
fwrite($this->handle, PHP_EOL . '</urlset>');
fclose($this->handle);
}

Expand Down Expand Up @@ -204,31 +225,25 @@ public function addModel($model)
*/
public function create()
{
$this->fileIndex = 0;
$this->beginFile();

foreach ($this->dataSources as $dataSource) {
/** @var \yii\db\ActiveQuery $dataSource */
foreach ($dataSource->batch(100) as $entities) {
foreach ($entities as $entity) {
if ($this->isDisallowUrl($entity->getSitemapLoc())) {
continue;
}
if ($this->urlCount === $this->maxUrlsCountInFile) {
$this->urlCount = 0;
$this->closeFile();
$this->gzipFile();
$this->beginFile();
if (!$this->isDisallowUrl($entity->getSitemapLoc())) {
$this->writeEntity($entity);
}
$this->writeEntity($entity);
$this->urlCount++;
}
}
}

if ($this->urlCount >= 0) {
if (is_resource($this->handle)) {
$this->closeFile();
$this->gzipFile();
}

$this->createIndexFile();
$this->updateSitemaps();
}
Expand All @@ -245,6 +260,46 @@ public function setDisallowUrls($urls)
return $this;
}

/**
* Set maximal size of sitemap files
*
* @param int|string $size Maximal file size. Zero to work without limits.
* So you can specify the following abbreviations k - kilobytes and m - megabytes.
*/
public function setMaxFileSize($size)
{
$fileSizeAbbr = ['k', 'm'];
if (!is_int($size)) {
if (is_string($size) && preg_match('/^([\d]*)(' . implode('|', $fileSizeAbbr) . ')?$/i', $size, $matches)) {
$size = $matches[1];
if (isset($matches[2])) {
$size = $size * pow(1024, array_search(strtolower($matches[2]), $fileSizeAbbr) + 1);
}
} else {
$size = intval($size);
}
}
$this->maxFileSize = $size;
}

/**
* Method checks limits for write in the current file.
*
* @param int $strLen Size of writable string
* @return boolean
*/
public function isLimitExceeded($strLen)
{
$isStrLenExceeded = function ($strLen) {
$fileStat = fstat($this->handle);
return $fileStat['size'] + $strLen > $this->maxFileSize;
};

return
($this->maxUrlsCountInFile > 0 && $this->urlCount === $this->maxUrlsCountInFile) ||
($this->maxFileSize > 0 && $isStrLenExceeded($strLen));
}

/**
* Checking for validity.
*
Expand Down Expand Up @@ -281,6 +336,13 @@ protected function writeEntity($entity)

$str .= '</url>';

if ($this->isLimitExceeded(strlen($str))) {
$this->closeFile();
$this->gzipFile();
$this->beginFile();
}

fwrite($this->handle, $str);
++$this->urlCount;
}
}

0 comments on commit 4c00349

Please sign in to comment.