diff --git a/README.md b/README.md index 49d78e1..0441e3f 100644 --- a/README.md +++ b/README.md @@ -40,13 +40,15 @@ $ composer require zhelyabuzhsky/yii2-sitemap 'maxUrlsCountInFile' => 10000, 'sitemapDirectory' => 'frontend/web', 'optionalAttributes' => ['changefreq', 'lastmod', 'priority'], + 'maxFileSize' => '10M', ], ``` where * maxUrlsCountInFile - max count of urls in one sitemap file; * sitemapDirectory - directory to place sitemap files; -* optionalAttributes - list of used optional attributes. +* optionalAttributes - list of used optional attributes; +* maxFileSize - maximal file size. Zero to work without limits. So you can specify the following abbreviations k - kilobytes and m - megabytes. By default 10m. ### Console action diff --git a/src/components/Sitemap.php b/src/components/Sitemap.php index e5ead45..95ce95a 100644 --- a/src/components/Sitemap.php +++ b/src/components/Sitemap.php @@ -53,13 +53,6 @@ class Sitemap extends Component */ protected $urlCount = 0; - /** - * Number of current sitemap file. - * - * @var int - */ - protected $filesCount = 0; - /** * Array of data sources for sitemap generation. * @@ -72,6 +65,28 @@ class Sitemap extends Component */ protected $disallowUrls = []; + /** + * Maximal size of sitemap files. + * Default value: 10M + * + * @var int + */ + protected $maxFileSize = 10485760; // 10 * 1024 * 1024 + + /** + * Generated sitemap groups file count. + * + * @var int + */ + protected $fileIndex = 0; + + /** + * List of generated files. + * + * @var string[] + */ + protected $generatedFiles = []; + /** * Create index file sitemap.xml. */ @@ -81,8 +96,8 @@ protected function createIndexFile() $this->handle = fopen($this->path, 'w'); fwrite( $this->handle, - '' . - ' ' + '' . PHP_EOL . + '' ); $objDateTime = new \DateTime('NOW'); $lastmod = $objDateTime->format(\DateTime::W3C); @@ -91,16 +106,17 @@ protected function createIndexFile() if (isset(\Yii::$app->urlManager->baseUrl)) { $baseUrl = \Yii::$app->urlManager->baseUrl; } - for ($i = 1; $i <= $this->filesCount; $i++) { + foreach ($this->generatedFiles as $fileName) { fwrite( $this->handle, - '' . - " {$baseUrl}/sitemap{$i}.xml.gz" . - " {$lastmod}" . + PHP_EOL . + '' . PHP_EOL . + "\t" . '' . $baseUrl . '/' . $fileName . '.gz' . '' . PHP_EOL . + "\t" . '' . $lastmod . '' . PHP_EOL . '' ); } - fwrite($this->handle, ''); + fwrite($this->handle, PHP_EOL . ''); fclose($this->handle); $this->gzipFile(); } @@ -116,7 +132,7 @@ protected function updateSitemaps() } // rename new files (without '_') foreach (glob("{$this->sitemapDirectory}/_sitemap*.xml*") as $filePath) { - $newFilePath = dirname($filePath) . '/' . str_replace('_', '', basename($filePath)); + $newFilePath = dirname($filePath) . '/' . substr(basename($filePath), 1); rename($filePath, $newFilePath); } } @@ -126,12 +142,17 @@ protected function updateSitemaps() */ protected function beginFile() { - $this->filesCount++; - $this->path = "{$this->sitemapDirectory}/_sitemap{$this->filesCount}.xml"; + ++$this->fileIndex; + $this->urlCount = 0; + + $fileName = 'sitemap' . $this->fileIndex . '.xml'; + $this->path = $this->sitemapDirectory . '/_' . $fileName; + $this->generatedFiles[] = $fileName; + $this->handle = fopen($this->path, 'w'); fwrite( $this->handle, - '' . "\n" . + '' . PHP_EOL . '' @@ -143,7 +164,7 @@ protected function beginFile() */ protected function closeFile() { - fwrite($this->handle, "\n" . ''); + fwrite($this->handle, PHP_EOL . ''); fclose($this->handle); } @@ -204,31 +225,25 @@ public function addModel($model) */ public function create() { + $this->fileIndex = 0; $this->beginFile(); foreach ($this->dataSources as $dataSource) { /** @var \yii\db\ActiveQuery $dataSource */ foreach ($dataSource->batch(100) as $entities) { foreach ($entities as $entity) { - if ($this->isDisallowUrl($entity->getSitemapLoc())) { - continue; - } - if ($this->urlCount === $this->maxUrlsCountInFile) { - $this->urlCount = 0; - $this->closeFile(); - $this->gzipFile(); - $this->beginFile(); + if (!$this->isDisallowUrl($entity->getSitemapLoc())) { + $this->writeEntity($entity); } - $this->writeEntity($entity); - $this->urlCount++; } } } - if ($this->urlCount >= 0) { + if (is_resource($this->handle)) { $this->closeFile(); $this->gzipFile(); } + $this->createIndexFile(); $this->updateSitemaps(); } @@ -245,6 +260,46 @@ public function setDisallowUrls($urls) return $this; } + /** + * Set maximal size of sitemap files + * + * @param int|string $size Maximal file size. Zero to work without limits. + * So you can specify the following abbreviations k - kilobytes and m - megabytes. + */ + public function setMaxFileSize($size) + { + $fileSizeAbbr = ['k', 'm']; + if (!is_int($size)) { + if (is_string($size) && preg_match('/^([\d]*)(' . implode('|', $fileSizeAbbr) . ')?$/i', $size, $matches)) { + $size = $matches[1]; + if (isset($matches[2])) { + $size = $size * pow(1024, array_search(strtolower($matches[2]), $fileSizeAbbr) + 1); + } + } else { + $size = intval($size); + } + } + $this->maxFileSize = $size; + } + + /** + * Method checks limits for write in the current file. + * + * @param int $strLen Size of writable string + * @return boolean + */ + public function isLimitExceeded($strLen) + { + $isStrLenExceeded = function ($strLen) { + $fileStat = fstat($this->handle); + return $fileStat['size'] + $strLen > $this->maxFileSize; + }; + + return + ($this->maxUrlsCountInFile > 0 && $this->urlCount === $this->maxUrlsCountInFile) || + ($this->maxFileSize > 0 && $isStrLenExceeded($strLen)); + } + /** * Checking for validity. * @@ -281,6 +336,13 @@ protected function writeEntity($entity) $str .= ''; + if ($this->isLimitExceeded(strlen($str))) { + $this->closeFile(); + $this->gzipFile(); + $this->beginFile(); + } + fwrite($this->handle, $str); + ++$this->urlCount; } }