Skip to content

Commit

Permalink
Fix the situation when a feed is invalid and the custom parser raises…
Browse files Browse the repository at this point in the history
… an exception, in this case now this error is log and the plugin continues to operate.

Also intoduced some simple test cases for when the rss is perfect, when
is valid but empty and for when is invalid.

Fixes #18
  • Loading branch information
Pere Urbon-Bayes committed Jun 14, 2016
1 parent e143c16 commit 56675d6
Show file tree
Hide file tree
Showing 6 changed files with 195 additions and 34 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Gemfile.lock
79 changes: 46 additions & 33 deletions lib/logstash/inputs/rss.rb
Original file line number Diff line number Diff line change
Expand Up @@ -61,46 +61,59 @@ def run(queue)

def handle_response(response, queue)
body = response.body
# @logger.debug("Body", :body => body)
# Parse the RSS feed
feed = RSS::Parser.parse(body)
feed.items.each do |item|
# Put each item into an event
@logger.debug("Item", :item => item.author)
case feed.feed_type
begin
feed = RSS::Parser.parse(body)
feed.items.each do |item|
# Put each item into an event
@logger.debug("Item", :item => item.author)
case feed.feed_type
when 'rss'
@codec.decode(item.description) do |event|
event["Feed"] = @url
event["published"] = item.pubDate
event["title"] = item.title
event["link"] = item.link
event["author"] = item.author
decorate(event)
queue << event
end
handle_rss_response(queue, item)
when 'atom'
if ! item.content.nil?
content = item.content.content
else
content = item.summary.content
end
@codec.decode(content) do |event|
event["Feed"] = @url
event["updated"] = item.updated.content
event["title"] = item.title.content
event["link"] = item.link.href
event["author"] = item.author.name.content
unless item.published.nil?
event["published"] = item.published.content
end
decorate(event)
queue << event
end
handle_atom_response(queue, item)
end
end
rescue RSS::MissingTagError => e
@logger.error("Invalid RSS feed", :exception => e)
rescue => e
@logger.error("Uknown error while parsing the feed", :url => url, :exception => e)
end
end

def stop
Stud.stop!(@run_thread) if @run_thread
end

private

def handle_atom_response(queue, item)
if ! item.content.nil?
content = item.content.content
else
content = item.summary.content
end
@codec.decode(content) do |event|
event["Feed"] = @url
event["updated"] = item.updated.content
event["title"] = item.title.content
event["link"] = item.link.href
event["author"] = item.author.name.content
unless item.published.nil?
event["published"] = item.published.content
end
decorate(event)
queue << event
end
end
def handle_rss_response(queue, item)
@codec.decode(item.description) do |event|
event["Feed"] = @url
event["published"] = item.pubDate
event["title"] = item.title
event["link"] = item.link
event["author"] = item.author
decorate(event)
queue << event
end
end
end # class LogStash::Inputs::Exec
5 changes: 5 additions & 0 deletions spec/fixtures/rss-invalid.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<?xml version="1.0" encoding="windows-1252"?>
<rss version="2.0">
<channel>
</channel>
</rss>
8 changes: 8 additions & 0 deletions spec/fixtures/rss-zero-feeditems.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<?xml version="1.0" encoding="windows-1252"?>
<rss version="2.0">
<channel>
<title>Sample Feed - Favorite RSS Related Software &amp; Resources</title>
<description>Take a look at some of FeedForAll&apos;s favorite software and resources for learning more about RSS.</description>
<link>http://www.feedforall.com</link>
</channel>
</rss>
52 changes: 52 additions & 0 deletions spec/fixtures/sample-feed.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
<?xml version="1.0" encoding="windows-1252"?>
<rss version="2.0">
<channel>
<title>Sample Feed - Favorite RSS Related Software &amp; Resources</title>
<description>Take a look at some of FeedForAll&apos;s favorite software and resources for learning more about RSS.</description>
<link>http://www.feedforall.com</link>
<category domain="www.dmoz.com">Computers/Software/Internet/Site Management/Content Management</category>
<copyright>Copyright 2004 NotePage, Inc.</copyright>
<docs>http://blogs.law.harvard.edu/tech/rss</docs>
<language>en-us</language>
<lastBuildDate>Mon, 1 Nov 2004 13:17:17 -0500</lastBuildDate>
<managingEditor>marketing@feedforall.com</managingEditor>
<pubDate>Tue, 26 Oct 2004 14:06:44 -0500</pubDate>
<webMaster>webmaster@feedforall.com</webMaster>
<generator>FeedForAll Beta1 (0.0.1.8)</generator>
<image>
<url>http://www.feedforall.com/feedforall-temp.gif</url>
<title>FeedForAll Sample Feed</title>
<link>http://www.feedforall.com/industry-solutions.htm</link>
<description>FeedForAll Sample Feed</description>
<width>144</width>
<height>117</height>
</image>
<item>
<title>RSS Resources</title>
<description>Be sure to take a look at some of our favorite RSS Resources&lt;br&gt;
&lt;a href=&quot;http://www.rss-specifications.com&quot;&gt;RSS Specifications&lt;/a&gt;&lt;br&gt;
&lt;a href=&quot;http://www.blog-connection.com&quot;&gt;Blog Connection&lt;/a&gt;&lt;br&gt;
&lt;br&gt;</description>
<link>http://www.feedforall.com</link>
<pubDate>Tue, 26 Oct 2004 14:01:01 -0500</pubDate>
</item>
<item>
<title>Recommended Desktop Feed Reader Software</title>
<description>&lt;b&gt;FeedDemon&lt;/b&gt; enables you to quickly read and gather information from hundreds of web sites - without having to visit them. Don&apos;t waste any more time checking your favorite web sites for updates. Instead, use FeedDemon and make them come to you. &lt;br&gt;
More &lt;a href=&quot;http://store.esellerate.net/a.asp?c=1_SKU5139890208_AFL403073819&quot;&gt;FeedDemon Information&lt;/a&gt;</description>
<link>http://www.feedforall.com/feedforall-partners.htm</link>
<pubDate>Tue, 26 Oct 2004 14:03:25 -0500</pubDate>
</item>
<item>
<title>Recommended Web Based Feed Reader Software</title>
<description>&lt;b&gt;FeedScout&lt;/b&gt; enables you to view RSS/ATOM/RDF feeds from different sites directly in Internet Explorer. You can even set your Home Page to show favorite feeds. Feed Scout is a plug-in for Internet Explorer, so you won&apos;t have to learn anything except for how to press 2 new buttons on Internet Explorer toolbar. &lt;br&gt;
More &lt;a href=&quot;http://www.bytescout.com/feedscout.html&quot;&gt;Information on FeedScout&lt;/a&gt;&lt;br&gt;
&lt;br&gt;
&lt;br&gt;
&lt;b&gt;SurfPack&lt;/b&gt; can feature search tools, horoscopes, current weather conditions, LiveJournal diaries, humor, web modules and other dynamically updated content. &lt;br&gt;
More &lt;a href=&quot;http://www.surfpack.com/&quot;&gt;Information on SurfPack&lt;/a&gt;&lt;br&gt;</description>
<link>http://www.feedforall.com/feedforall-partners.htm</link>
<pubDate>Tue, 26 Oct 2004 14:06:44 -0500</pubDate>
</item>
</channel>
</rss>
84 changes: 83 additions & 1 deletion spec/inputs/rss_spec.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# encoding: utf-8
require "logstash/devutils/rspec/spec_helper"
require "logstash/inputs/rss"
require 'ostruct'

describe LogStash::Inputs::Rss do
describe "stopping" do
Expand All @@ -11,4 +12,85 @@
end
it_behaves_like "an interruptible input plugin"
end
end

describe "fetching input" do

let(:config) do
{
"url" => "http://www.example.com/foo.rss",
"interval" => 10
}
end

let(:sample) do
body = File.read(File.join(File.dirname(__FILE__), "..", "fixtures", "sample-feed.xml"))
OpenStruct.new(:body => body)
end

before(:each) do
allow(Faraday).to receive(:get).with(config["url"]).and_return(sample)
end

let(:data) do
plugin = described_class.new(config)
plugin_input(plugin) do |queue|
sleep 0.1 while queue.empty?
events = []
queue.size.times { |i| events << queue.pop }
events
end
end

it "fetchs all items" do
expect(data.count).to be > 0
end

context "when the feed is invalid" do

let(:sample) do
body = File.read(File.join(File.dirname(__FILE__), "..", "fixtures", "rss-invalid.xml"))
OpenStruct.new(:body => body)
end

let(:plugin) { described_class.new(config) }

it "fetchs no items and causes no errors" do
events = []
expect {
plugin_input(plugin) do |queue|
sleep 1
events = []
queue.size.times { |i| events << queue.pop }
events
end
}.not_to raise_error
expect(events.count).to be == 0
end
end


context "when the feed is valid, but has zero items" do

let(:sample) do
body = File.read(File.join(File.dirname(__FILE__), "..", "fixtures", "rss-zero-feeditems.xml"))
OpenStruct.new(:body => body)
end

let(:plugin) { described_class.new(config) }

it "fetchs no items and causes no errors" do
events = []
expect {
plugin_input(plugin) do |queue|
sleep 1
events = []
queue.size.times { |i| events << queue.pop }
events
end
}.not_to raise_error
expect(events.count).to be == 0
end
end

end
end

0 comments on commit 56675d6

Please sign in to comment.