-
Notifications
You must be signed in to change notification settings - Fork 79
/
Feed.php
1034 lines (894 loc) · 35.8 KB
/
Feed.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<?php
namespace FeedWriter;
use \DateTime;
use \DateTimeInterface;
/*
* Copyright (C) 2008 Anis uddin Ahmad <anisniit@gmail.com>
* Copyright (C) 2010-2016, 2022 Michael Bemmerl <mail@mx-server.de>
*
* This file is part of the "Universal Feed Writer" project.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* Universal Feed Writer class
*
* Generate RSS 1.0, RSS2.0 and ATOM Feeds
*
* @package UniversalFeedWriter
* @author Anis uddin Ahmad <anisniit@gmail.com>
* @link http://www.ajaxray.com/projects/rss
*/
abstract class Feed
{
// RSS 0.90 Officially obsoleted by 1.0
// RSS 0.91, 0.92, 0.93 and 0.94 Officially obsoleted by 2.0
// So, define constants for RSS 1.0, RSS 2.0 and ATOM
const RSS1 = 'RSS 1.0';
const RSS2 = 'RSS 2.0';
const ATOM = 'ATOM';
const VERSION = '1.1.0';
/**
* Collection of all channel elements
*/
private $channels = array();
/**
* Collection of items as object of \FeedWriter\Item class.
*/
private $items = array();
/**
* Collection of other version wise data.
*
* Currently used to store the 'rdf:about' attribute and image element of the channel (both RSS1 only).
*/
private $data = array();
/**
* The tag names which have to encoded as CDATA
*/
private $CDATAEncoding = array();
/**
* Collection of XML namespaces
*/
private $namespaces = array();
/**
* Contains the format of this feed.
*/
private $version = null;
/**
* Contains the encoding of this feed.
*
* @var string
*/
private $encoding = 'utf-8';
/**
* Constructor
*
* If no version is given, a feed in RSS 2.0 format will be generated.
*
* @param string $version the version constant (RSS1/RSS2/ATOM).
*/
protected function __construct($version = Feed::RSS2)
{
$this->version = $version;
// Setting default value for essential channel element
$this->setTitle($version . ' Feed');
// Add some default XML namespaces
$this->namespaces['content'] = 'http://purl.org/rss/1.0/modules/content/';
$this->namespaces['wfw'] = 'http://wellformedweb.org/CommentAPI/';
$this->namespaces['atom'] = 'http://www.w3.org/2005/Atom';
$this->namespaces['rdf'] = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
$this->namespaces['rss1'] = 'http://purl.org/rss/1.0/';
$this->namespaces['dc'] = 'http://purl.org/dc/elements/1.1/';
$this->namespaces['sy'] = 'http://purl.org/rss/1.0/modules/syndication/';
// Tag names to encode in CDATA
$this->addCDATAEncoding(array('description', 'content:encoded', 'summary'));
}
// Start # public functions ---------------------------------------------
/**
* Set the URLs for feed pagination.
*
* See RFC 5005, chapter 3. At least one page URL must be specified.
*
* @param string $nextURL The URL to the next page of this feed. Optional.
* @param string $previousURL The URL to the previous page of this feed. Optional.
* @param string $firstURL The URL to the first page of this feed. Optional.
* @param string $lastURL The URL to the last page of this feed. Optional.
* @link http://tools.ietf.org/html/rfc5005#section-3
* @return self
* @throws \LogicException if none of the parameters are set.
*/
public function setPagination($nextURL = null, $previousURL = null, $firstURL = null, $lastURL = null)
{
if (empty($nextURL) && empty($previousURL) && empty($firstURL) && empty($lastURL))
throw new \LogicException('At least one URL must be specified for pagination to work.');
if (!empty($nextURL))
$this->setAtomLink($nextURL, 'next');
if (!empty($previousURL))
$this->setAtomLink($previousURL, 'previous');
if (!empty($firstURL))
$this->setAtomLink($firstURL, 'first');
if (!empty($lastURL))
$this->setAtomLink($lastURL, 'last');
return $this;
}
/**
* Add a channel element indicating the program used to generate the feed.
*
* @return self
* @throws InvalidOperationException if this method is called on an RSS1 feed.
*/
public function addGenerator()
{
if ($this->version == Feed::ATOM)
$this->setChannelElement('atom:generator', 'FeedWriter', array('uri' => 'https://github.com/mibe/FeedWriter'));
else if ($this->version == Feed::RSS2)
$this->setChannelElement('generator', 'FeedWriter');
else
throw new InvalidOperationException('The generator element is not supported in RSS1 feeds.');
return $this;
}
/**
* Add a XML namespace to the internal list of namespaces. After that,
* custom channel elements can be used properly to generate a valid feed.
*
* @access public
* @param string $prefix namespace prefix
* @param string $uri namespace name (URI)
* @return self
* @link http://www.w3.org/TR/REC-xml-names/
* @throws \InvalidArgumentException if the prefix or uri is empty or NULL.
*/
public function addNamespace($prefix, $uri)
{
if (empty($prefix))
throw new \InvalidArgumentException('The prefix may not be emtpy or NULL.');
if (empty($uri))
throw new \InvalidArgumentException('The uri may not be empty or NULL.');
$this->namespaces[$prefix] = $uri;
return $this;
}
/**
* Add a channel element to the feed.
*
* @access public
* @param string $elementName name of the channel tag
* @param string $content content of the channel tag
* @param array array of element attributes with attribute name as array key
* @param bool TRUE if this element can appear multiple times
* @return self
* @throws \InvalidArgumentException if the element name is not a string, empty or NULL.
*/
public function setChannelElement($elementName, $content, array $attributes = null, $multiple = false)
{
if (empty($elementName))
throw new \InvalidArgumentException('The element name may not be empty or NULL.');
if (!is_string($elementName))
throw new \InvalidArgumentException('The element name must be a string.');
$entity['content'] = $content;
$entity['attributes'] = $attributes;
if ($multiple === TRUE)
$this->channels[$elementName][] = $entity;
else
$this->channels[$elementName] = $entity;
return $this;
}
/**
* Set multiple channel elements from an array. Array elements
* should be 'channelName' => 'channelContent' format.
*
* @access public
* @param array array of channels
* @return self
*/
public function setChannelElementsFromArray(array $elementArray)
{
foreach ($elementArray as $elementName => $content) {
$this->setChannelElement($elementName, $content);
}
return $this;
}
/**
* Get the appropriate MIME type string for the current feed.
*
* @access public
* @return string The MIME type string.
*/
public function getMIMEType()
{
switch ($this->version) {
case Feed::RSS2 : $mimeType = "application/rss+xml";
break;
case Feed::RSS1 : $mimeType = "application/rdf+xml";
break;
case Feed::ATOM : $mimeType = "application/atom+xml";
break;
default : $mimeType = "text/xml";
}
return $mimeType;
}
/**
* Print the actual RSS/ATOM file
*
* Sets a Content-Type header and echoes the contents of the feed.
* Should only be used in situations where direct output is desired;
* if you need to pass a string around, use generateFeed() instead.
*
* @access public
* @param bool FALSE if the specific feed media type should be sent.
* @return void
* @throws \InvalidArgumentException if the useGenericContentType parameter is not boolean.
*/
public function printFeed($useGenericContentType = false)
{
if (!is_bool($useGenericContentType))
throw new \InvalidArgumentException('The useGenericContentType parameter must be boolean.');
$contentType = "text/xml";
if (!$useGenericContentType) {
$contentType = $this->getMIMEType();
}
// Generate the feed before setting the header, so Exceptions will be nicely visible.
$feed = $this->generateFeed();
header("Content-Type: " . $contentType . "; charset=" . $this->encoding);
echo $feed;
}
/**
* Generate the feed.
*
* @access public
* @return string The complete feed XML.
* @throws InvalidOperationException if the link element of the feed is not set.
*/
public function generateFeed()
{
if ($this->version != Feed::ATOM && !array_key_exists('link', $this->channels))
throw new InvalidOperationException('RSS1 & RSS2 feeds need a link element. Call the setLink method before this method.');
return $this->makeHeader()
. $this->makeChannels()
. $this->makeItems()
. $this->makeFooter();
}
/**
* Create a new Item.
*
* @access public
* @return Item instance of Item class
*/
public function createNewItem()
{
$Item = new Item($this->version);
return $Item;
}
/**
* Add one or more tags to the list of CDATA encoded tags
*
* @access public
* @param array $tags An array of tag names that are merged into the list of tags which should be encoded as CDATA
* @return self
*/
public function addCDATAEncoding(array $tags)
{
$this->CDATAEncoding = array_merge($this->CDATAEncoding, $tags);
return $this;
}
/**
* Get list of CDATA encoded properties
*
* @access public
* @return array Return an array of CDATA properties that are to be encoded as CDATA
*/
public function getCDATAEncoding()
{
return $this->CDATAEncoding;
}
/**
* Remove tags from the list of CDATA encoded tags
*
* @access public
* @param array $tags An array of tag names that should be removed.
* @return void
*/
public function removeCDATAEncoding(array $tags)
{
// Call array_values to re-index the array.
$this->CDATAEncoding = array_values(array_diff($this->CDATAEncoding, $tags));
}
/**
* Add a FeedItem to the main class
*
* @access public
* @param Item $feedItem instance of Item class
* @return self
* @throws \InvalidArgumentException if the given item version mismatches.
*/
public function addItem(Item $feedItem)
{
if ($feedItem->getVersion() != $this->version)
{
$msg = sprintf('Feed type mismatch: This instance can handle %s feeds only, but item for %s feeds given.', $this->version, $feedItem->getVersion());
throw new \InvalidArgumentException($msg);
}
$this->items[] = $feedItem;
return $this;
}
// Wrapper functions -------------------------------------------------------------------
/**
* Set the 'encoding' attribute in the XML prolog.
*
* @access public
* @param string $encoding value of 'encoding' attribute
* @return self
* @throws \InvalidArgumentException if the encoding is not a string, empty or NULL.
*/
public function setEncoding($encoding)
{
if (empty($encoding))
throw new \InvalidArgumentException('The encoding may not be empty or NULL.');
if (!is_string($encoding))
throw new \InvalidArgumentException('The encoding must be a string.');
$this->encoding = $encoding;
return $this;
}
/**
* Set the 'title' channel element
*
* @access public
* @param string $title value of 'title' channel tag
* @return self
* @throws \InvalidArgumentException if the title is empty or NULL.
*/
public function setTitle($title)
{
if (empty($title))
throw new \InvalidArgumentException('The title may not be empty or NULL.');
return $this->setChannelElement('title', $title);
}
/**
* Set the date when the feed was lastly updated.
*
* This adds the 'updated' element to the feed. The value of the date parameter
* can be either a class implementing DateTimeInterface, an integer containing a UNIX
* timestamp or a string which is parseable by PHP's 'strtotime' function.
*
* Not supported in RSS1 feeds.
*
* @access public
* @param DateTimeInterface|int|string Date which should be used.
* @return self
* @throws \InvalidArgumentException if the given date is not an implementation of DateTimeInterface, a UNIX timestamp or a date string.
* @throws InvalidOperationException if this method is called on an RSS1 feed.
*/
public function setDate($date)
{
if ($this->version == Feed::RSS1)
throw new InvalidOperationException('The publication date is not supported in RSS1 feeds.');
// The feeds have different date formats.
$format = $this->version == Feed::ATOM ? \DATE_ATOM : \DATE_RSS;
if ($date instanceof DateTimeInterface || $date instanceof DateTime)
$date = $date->format($format);
else if(is_numeric($date) && $date >= 0)
$date = date($format, $date);
else if (is_string($date))
{
$timestamp = strtotime($date);
if ($timestamp === FALSE)
throw new \InvalidArgumentException('The given date was not parseable.');
$date = date($format, $timestamp);
}
else
throw new \InvalidArgumentException('The given date is not an implementation of DateTimeInterface, a UNIX timestamp or a date string.');
if ($this->version == Feed::ATOM)
$this->setChannelElement('updated', $date);
else
$this->setChannelElement('lastBuildDate', $date);
return $this;
}
/**
* Set a phrase or sentence describing the feed.
*
* @access public
* @param string $description Description of the feed.
* @return self
* @throws \InvalidArgumentException if the description is empty or NULL.
*/
public function setDescription($description)
{
if (empty($description))
throw new \InvalidArgumentException('The description may not be empty or NULL.');
if ($this->version != Feed::ATOM)
$this->setChannelElement('description', $description);
else
$this->setChannelElement('subtitle', $description);
return $this;
}
/**
* Set the 'link' channel element
*
* @access public
* @param string $link value of 'link' channel tag
* @return self
* @throws \InvalidArgumentException if the link is empty or NULL.
*/
public function setLink($link)
{
if (empty($link))
throw new \InvalidArgumentException('The link may not be empty or NULL.');
if ($this->version == Feed::ATOM)
$this->setAtomLink($link);
else
$this->setChannelElement('link', $link);
return $this;
}
/**
* Set custom 'link' channel elements.
*
* In ATOM feeds, only one link with alternate relation and the same combination of
* type and hreflang values.
*
* @access public
* @param string $href URI of this link
* @param string $rel relation type of the resource
* @param string $type MIME type of the target resource
* @param string $hreflang language of the resource
* @param string $title human-readable information about the resource
* @param int $length length of the resource in bytes
* @link https://www.iana.org/assignments/link-relations/link-relations.xml
* @link https://tools.ietf.org/html/rfc4287#section-4.2.7
* @return self
* @throws \InvalidArgumentException on multiple occasions.
* @throws InvalidOperationException if the same link with the same attributes was already added to the feed.
*/
public function setAtomLink($href, $rel = null, $type = null, $hreflang = null, $title = null, $length = null)
{
$data = array('href' => $href);
if ($rel != null) {
if (!is_string($rel) || empty($rel))
throw new \InvalidArgumentException('rel parameter must be a string and a valid relation identifier.');
$data['rel'] = $rel;
}
if ($type != null) {
// Regex used from RFC 4287, page 41
if (!is_string($type) || preg_match('/.+\/.+/', $type) != 1)
throw new \InvalidArgumentException('type parameter must be a string and a MIME type.');
$data['type'] = $type;
}
if ($hreflang != null) {
// Regex used from RFC 4287, page 41
if (!is_string($hreflang) || preg_match('/[A-Za-z]{1,8}(-[A-Za-z0-9]{1,8})*/', $hreflang) != 1)
throw new \InvalidArgumentException('hreflang parameter must be a string and a valid language code.');
$data['hreflang'] = $hreflang;
}
if ($title != null) {
if (!is_string($title) || empty($title))
throw new \InvalidArgumentException('title parameter must be a string and not empty.');
$data['title'] = $title;
}
if ($length != null) {
if (!is_int($length) || $length < 0)
throw new \InvalidArgumentException('length parameter must be a positive integer.');
$data['length'] = (string) $length;
}
// ATOM spec. has some restrictions on atom:link usage
// See RFC 4287, page 12 (4.1.1)
if ($this->version == Feed::ATOM) {
foreach ($this->channels as $key => $value) {
if ($key != 'atom:link')
continue;
// $value is an array , so check every element
foreach ($value as $linkItem) {
$attrib = $linkItem['attributes'];
// Only one link with relation alternate and same hreflang & type is allowed.
if (@$attrib['rel'] == 'alternate' && @$attrib['hreflang'] == $hreflang && @$attrib['type'] == $type)
throw new InvalidOperationException('The feed must not contain more than one link element with a'
. ' relation of "alternate" that has the same combination of type and hreflang attribute values.');
}
}
}
return $this->setChannelElement('atom:link', '', $data, true);
}
/**
* Set an 'atom:link' channel element with relation=self attribute.
* Needs the full URL to this feed.
*
* @link http://www.rssboard.org/rss-profile#namespace-elements-atom-link
* @access public
* @param string $url URL to this feed
* @return self
*/
public function setSelfLink($url)
{
return $this->setAtomLink($url, 'self', $this->getMIMEType());
}
/**
* Set the 'image' channel element
*
* @access public
* @param string $url URL of the image
* @param string $title Title of the image. RSS only.
* @param string $link Link target URL of the image. RSS only.
* @return self
* @throws \InvalidArgumentException if the url is invalid.
* @throws \InvalidArgumentException if the title and link parameter are not a string or empty.
*/
public function setImage($url, $title = null, $link = null)
{
if (!is_string($url) || empty($url))
throw new \InvalidArgumentException('url parameter must be a string and may not be empty or NULL.');
// RSS feeds have support for a title & link element.
if ($this->version != Feed::ATOM)
{
if (!is_string($title) || empty($title))
throw new \InvalidArgumentException('title parameter must be a string and may not be empty or NULL.');
if (!is_string($link) || empty($link))
throw new \InvalidArgumentException('link parameter must be a string and may not be empty or NULL.');
$data = array('title'=>$title, 'link'=>$link, 'url'=>$url);
$name = 'image';
}
else
{
$name = 'logo';
$data = $url;
}
// Special handling for RSS1 again (since RSS1 is a bit strange...)
if ($this->version == Feed::RSS1)
{
$this->data['Image'] = $data;
return $this->setChannelElement($name, '', array('rdf:resource' => $url), false);
}
else
return $this->setChannelElement($name, $data);
}
/**
* Set the channel 'rdf:about' attribute, which is used in RSS1 feeds only.
*
* @access public
* @param string $url value of 'rdf:about' attribute of the channel element
* @return self
* @throws InvalidOperationException if this method is called and the feed is not of type RSS1.
* @throws \InvalidArgumentException if the given URL is invalid.
*/
public function setChannelAbout($url)
{
if ($this->version != Feed::RSS1)
throw new InvalidOperationException("This method is only supported in RSS1 feeds.");
if (empty($url))
throw new \InvalidArgumentException('The about URL may not be empty or NULL.');
if (!is_string($url))
throw new \InvalidArgumentException('The about URL must be a string.');
$this->data['ChannelAbout'] = $url;
return $this;
}
/**
* Generate an UUID.
*
* The UUID is based on an MD5 hash. If no key is given, a unique ID as the input
* for the MD5 hash is generated.
*
* @author Anis uddin Ahmad <admin@ajaxray.com>
* @access public
* @param string $key optional key on which the UUID is generated
* @param string $prefix an optional prefix
* @return string the formatted UUID
*/
public static function uuid($key = null, $prefix = '')
{
$key = ($key == null) ? uniqid(rand()) : $key;
$chars = md5($key);
$uuid = substr($chars,0,8) . '-';
$uuid .= substr($chars,8,4) . '-';
$uuid .= substr($chars,12,4) . '-';
$uuid .= substr($chars,16,4) . '-';
$uuid .= substr($chars,20,12);
return $prefix . $uuid;
}
/**
* Replace invalid XML characters.
*
* @link https://web.archive.org/web/20160608013721/http://www.phpwact.org:80/php/i18n/charsets#xml See utf8_for_xml() function
* @link http://www.w3.org/TR/REC-xml/#charsets
* @link https://github.com/mibe/FeedWriter/issues/30
*
* @access public
* @param string $string string which should be filtered
* @param string $replacement replace invalid characters with this string
* @return string the filtered string
*/
public static function filterInvalidXMLChars($string, $replacement = '_') // default to '\x{FFFD}' ???
{
$result = preg_replace('/[^\x{0009}\x{000a}\x{000d}\x{0020}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]+/u', $replacement, $string);
// Did the PCRE replace failed because of bad UTF-8 data?
// If yes, try a non-multibyte regex and without the UTF-8 mode enabled.
if ($result == NULL && preg_last_error() == PREG_BAD_UTF8_ERROR)
$result = preg_replace('/[^\x09\x0a\x0d\x20-\xFF]+/', $replacement, $string);
// In case the regex replacing failed completely, return the whole unfiltered string.
if ($result == NULL)
$result = $string;
return $result;
}
// End # public functions ----------------------------------------------
// Start # private functions ----------------------------------------------
/**
* Returns all used XML namespace prefixes in this instance.
* This includes all channel elements and feed items.
* Unfortunately some namespace prefixes are not included,
* because they are hardcoded, e.g. rdf.
*
* @access private
* @return array Array with namespace prefix as value.
*/
private function getNamespacePrefixes()
{
$prefixes = array();
// Get all tag names from channel elements...
$tags = array_keys($this->channels);
// ... and now all names from feed items
foreach ($this->items as $item) {
foreach (array_keys($item->getElements()) as $key) {
if (!in_array($key, $tags)) {
$tags[] = $key;
}
}
}
// Look for prefixes in those tag names
foreach ($tags as $tag) {
$elements = explode(':', $tag);
if (count($elements) != 2)
continue;
$prefixes[] = $elements[0];
}
return array_unique($prefixes);
}
/**
* Returns the XML header and root element, depending on the feed type.
*
* @access private
* @return string The XML header of the feed.
* @throws InvalidOperationException if an unknown XML namespace prefix is encountered.
*/
private function makeHeader()
{
$out = '<?xml version="1.0" encoding="'.$this->encoding.'" ?>' . PHP_EOL;
$prefixes = $this->getNamespacePrefixes();
$attributes = array();
$tagName = '';
$defaultNamespace = '';
if ($this->version == Feed::RSS2) {
$tagName = 'rss';
$attributes['version'] = '2.0';
} elseif ($this->version == Feed::RSS1) {
$tagName = 'rdf:RDF';
$prefixes[] = 'rdf';
$defaultNamespace = $this->namespaces['rss1'];
} elseif ($this->version == Feed::ATOM) {
$tagName = 'feed';
$defaultNamespace = $this->namespaces['atom'];
// Ugly hack to remove the 'atom' value from the prefixes array.
$prefixes = array_flip($prefixes);
unset($prefixes['atom']);
$prefixes = array_flip($prefixes);
}
// Iterate through every namespace prefix and add it to the element attributes.
foreach ($prefixes as $prefix) {
if (!isset($this->namespaces[$prefix]))
throw new InvalidOperationException('Unknown XML namespace prefix: \'' . $prefix . '\'.'
. ' Use the addNamespace method to add support for this prefix.');
else
$attributes['xmlns:' . $prefix] = $this->namespaces[$prefix];
}
// Include default namepsace, if required
if (!empty($defaultNamespace))
$attributes['xmlns'] = $defaultNamespace;
$out .= $this->makeNode($tagName, '', $attributes, true);
return $out;
}
/**
* Closes the open tags at the end of file
*
* @access private
* @return string The XML footer of the feed.
*/
private function makeFooter()
{
if ($this->version == Feed::RSS2) {
return '</channel>' . PHP_EOL . '</rss>';
} elseif ($this->version == Feed::RSS1) {
return '</rdf:RDF>';
} elseif ($this->version == Feed::ATOM) {
return '</feed>';
}
}
/**
* Creates a single node in XML format
*
* @access private
* @param string $tagName name of the tag
* @param mixed $tagContent tag value as string or array of nested tags in 'tagName' => 'tagValue' format
* @param array $attributes Attributes (if any) in 'attrName' => 'attrValue' format
* @param bool $omitEndTag True if the end tag should be omitted. Defaults to false.
* @return string formatted xml tag
* @throws \InvalidArgumentException if the tagContent is not an array and not a string.
*/
private function makeNode($tagName, $tagContent, array $attributes = null, $omitEndTag = false)
{
$nodeText = '';
$attrText = '';
if ($attributes != null) {
foreach ($attributes as $key => $value) {
$value = self::filterInvalidXMLChars($value);
$value = htmlspecialchars($value);
$attrText .= " $key=\"$value\"";
}
}
$attrText .= (in_array($tagName, $this->CDATAEncoding) && $this->version == Feed::ATOM) ? ' type="html"' : '';
$nodeText .= "<{$tagName}{$attrText}>";
$nodeText .= (in_array($tagName, $this->CDATAEncoding)) ? '<![CDATA[' : '';
if (is_array($tagContent)) {
foreach ($tagContent as $key => $value) {
if (is_array($value)) {
$nodeText .= PHP_EOL;
foreach ($value as $subValue) {
$nodeText .= $this->makeNode($key, $subValue);
}
} else if (is_string($value)) {
$nodeText .= $this->makeNode($key, $value);
} else {
throw new \InvalidArgumentException("Unknown node-value type for $key");
}
}
} else {
$tagContent = self::filterInvalidXMLChars($tagContent);
$nodeText .= (in_array($tagName, $this->CDATAEncoding)) ? $this->sanitizeCDATA($tagContent) : htmlspecialchars($tagContent);
}
$nodeText .= (in_array($tagName, $this->CDATAEncoding)) ? ']]>' : '';
if (!$omitEndTag)
$nodeText .= "</$tagName>";
$nodeText .= PHP_EOL;
return $nodeText;
}
/**
* Make the channels.
*
* @access private
* @return string The feed header as XML containing all the feed metadata.
*/
private function makeChannels()
{
$out = '';
//Start channel tag
switch ($this->version) {
case Feed::RSS2:
$out .= '<channel>' . PHP_EOL;
break;
case Feed::RSS1:
$out .= (isset($this->data['ChannelAbout']))? "<channel rdf:about=\"{$this->data['ChannelAbout']}\">" : "<channel rdf:about=\"{$this->channels['link']['content']}\">";
break;
}
//Print Items of channel
foreach ($this->channels as $key => $value) {
// In ATOM feeds, strip all ATOM namespace prefixes from the tag name. They are not needed here,
// because the ATOM namespace name is set as default namespace.
if ($this->version == Feed::ATOM && strncmp($key, 'atom', 4) == 0) {
$key = substr($key, 5);
}
// The channel element can occur multiple times, when the key 'content' is not in the array.
if (!array_key_exists('content', $value)) {
// If this is the case, iterate through the array with the multiple elements.
foreach ($value as $singleElement) {
$out .= $this->makeNode($key, $singleElement['content'], $singleElement['attributes']);
}
} else {
$out .= $this->makeNode($key, $value['content'], $value['attributes']);
}
}
if ($this->version == Feed::RSS1) {
//RSS 1.0 have special tag <rdf:Seq> with channel
$out .= "<items>" . PHP_EOL . "<rdf:Seq>" . PHP_EOL;
foreach ($this->items as $item) {
$thisItems = $item->getElements();
$out .= "<rdf:li resource=\"{$thisItems['link']['content']}\"/>" . PHP_EOL;
}
$out .= "</rdf:Seq>" . PHP_EOL . "</items>" . PHP_EOL . "</channel>" . PHP_EOL;
// An image has its own element after the channel elements.
if (array_key_exists('Image', $this->data))
$out .= $this->makeNode('image', $this->data['Image'], array('rdf:about' => $this->data['Image']['url']));
} else if ($this->version == Feed::ATOM) {
// ATOM feeds have a unique feed ID. Use the title channel element as key.
$out .= $this->makeNode('id', Feed::uuid($this->channels['title']['content'], 'urn:uuid:'));
}
return $out;
}
/**
* Prints formatted feed items
*
* @access private
* @return string The XML of every feed item.
*/
private function makeItems()
{
$out = '';
foreach ($this->items as $item) {
$thisItems = $item->getElements();
// The argument is printed as rdf:about attribute of item in RSS 1.0
// We're using the link set in the item (which is mandatory) as the about attribute.
if ($this->version == Feed::RSS1)
$out .= $this->startItem($thisItems['link']['content']);
else
$out .= $this->startItem();
foreach ($thisItems as $feedItem) {
$name = $feedItem['name'];
// Strip all ATOM namespace prefixes from tags when feed is an ATOM feed.
// Not needed here, because the ATOM namespace name is used as default namespace.
if ($this->version == Feed::ATOM && strncmp($name, 'atom', 4) == 0)
$name = substr($name, 5);
$out .= $this->makeNode($name, $feedItem['content'], $feedItem['attributes']);
}
$out .= $this->endItem();
}
return $out;
}
/**
* Make the starting tag of channels
*
* @access private
* @param string $about The value of about attribute which is used for RSS 1.0 only.
* @return string The starting XML tag of an feed item.
* @throws InvalidOperationException if this object misses the data for the about attribute.
*/
private function startItem($about = false)
{
$out = '';
if ($this->version == Feed::RSS2) {
$out .= '<item>' . PHP_EOL;
} elseif ($this->version == Feed::RSS1) {
if ($about) {
$out .= "<item rdf:about=\"$about\">" . PHP_EOL;
} else {
throw new InvalidOperationException("Missing data for about attribute. Call setChannelAbout method.");
}
} elseif ($this->version == Feed::ATOM) {
$out .= "<entry>" . PHP_EOL;
}
return $out;
}
/**
* Closes feed item tag