From 069310cb413371d3989cf2469b1a0e46bdf68a30 Mon Sep 17 00:00:00 2001 From: David Precious Date: Thu, 14 Sep 2023 21:56:43 +0100 Subject: [PATCH] Support scrubbing $c->req->data from C::Action::REST If Catalyst::Action::REST / Catalyst::Controller::REST is in use, the request object will have a `data()` method for deserialised data as added by the Catalyst::TraitFor::Request::REST role which ought to be scrubbed too. To support this, (a) the scrubbing needs to happen later in the request flow - now `hooking dispatch()` instead of `prepare_parameters()` (b) to avoid the data not being read if the request body had already been read by `$c->req->body_data`, the fix in this PR is needed: perl-catalyst/catalyst-runtime/pull/186 Until such time, dirtily monkey-patch the `seek()` in. --- lib/Catalyst/Plugin/HTML/Scrubber.pm | 38 +++++++++++++++-- t/05_rest.t | 61 ++++++++++++++++++++++++++++ t/lib/MyApp05.pm | 29 +++++++++++++ t/lib/MyApp05/Controller/Root.pm | 34 ++++++++++++++++ 4 files changed, 159 insertions(+), 3 deletions(-) create mode 100644 t/05_rest.t create mode 100644 t/lib/MyApp05.pm create mode 100644 t/lib/MyApp05/Controller/Root.pm diff --git a/lib/Catalyst/Plugin/HTML/Scrubber.pm b/lib/Catalyst/Plugin/HTML/Scrubber.pm index 137914d..c3223f2 100644 --- a/lib/Catalyst/Plugin/HTML/Scrubber.pm +++ b/lib/Catalyst/Plugin/HTML/Scrubber.pm @@ -26,7 +26,7 @@ sub setup { return $c->maybe::next::method(@_); } -sub prepare_parameters { +sub dispatch { my $c = shift; $c->maybe::next::method(@_); @@ -52,6 +52,15 @@ sub html_scrub { $c->_scrub_recurse($conf, $c->request->body_data); } + # And if Catalyst::Controller::REST is in use so we have $req->data, + # then scrub that too + if ($c->request->can('data')) { + my $data = $c->request->data; + if ($data) { + $c->_scrub_recurse($conf, $c->request->data); + } + } + # Normal query/POST body parameters: $c->_scrub_recurse($conf, $c->request->parameters); @@ -122,6 +131,27 @@ sub _should_scrub_param { return 1; } + +# Incredibly nasty monkey-patch to rewind filehandle before parsing - see +# https://github.com/perl-catalyst/catalyst-runtime/pull/186 +# First, get the default handlers hashref: +my $default_data_handlers = Catalyst->default_data_handlers(); + +# Wrap the coderef for application/json in one that rewinds the filehandle +# first: +my $orig_json_handler = $default_data_handlers->{'application/json'}; +$default_data_handlers->{'application/json'} = sub { + $_[0]->seek(0,0); # rewind $fh arg + $orig_json_handler->(@_); +}; + +# and now replace the original default_data_handlers() with a version that +# returns our modified handlers +*Catalyst::default_data_handlers = sub { + return $default_data_handlers; +}; + + __PACKAGE__->meta->make_immutable; 1; @@ -170,9 +200,11 @@ See SYNOPSIS for how to configure the plugin, both with its own configuration passing on any options from L to control exactly what scrubbing happens. -=item prepare_parameters +=item dispatch -Sanitize HTML tags in all parameters (unless `ignore_params` exempts them). +Sanitize HTML tags in all parameters (unless `ignore_params` exempts them) - +this includes normal POST params, and serialised data (e.g. a POSTed JSON body) +accessed via `$c->req->body_data` or `$c->req->data`. =back diff --git a/t/05_rest.t b/t/05_rest.t new file mode 100644 index 0000000..d45c136 --- /dev/null +++ b/t/05_rest.t @@ -0,0 +1,61 @@ +use strict; +use warnings; + +use FindBin qw($Bin); +use lib "$Bin/lib"; + +use Test::More; + + +eval 'use Catalyst::Controller::REST'; +plan skip_all => 'Catalyst::Controller::REST not available, skip REST tests' if $@; + +use Catalyst::Test 'MyApp05'; +use HTTP::Request::Common; +use HTTP::Status; + +{ + # Test that data in a JSON body POSTed gets scrubbed too + my $json_body = <", + "baz":{ + "one":"Second-level " + }, + "arr": [ + "one test ", + "two " + ], + "some_html": "Leave this alone: " +} +JSON + my $req = POST('/foo', + Content_Type => 'application/json', Content => $json_body + ); + diag("REQUEST: " . $req->as_string); + my ($res, $c) = ctx_request($req); + is($res->code, RC_OK, 'response ok'); + is( + $c->req->data->{foo}, + 'Top-level ', # note trailing space where img was removed + 'Top level body param scrubbed', + ); + is( + $c->req->data->{baz}{one}, + 'Second-level ', + 'Second level body param scrubbed', + ); + is( + $c->req->data->{arr}[0], + 'one test ', + 'Second level array contents scrubbbed', + ); + is( + $c->req->data->{some_html}, + 'Leave this alone: ', + 'Body data param matching ignore_params left alone', + ); +} + +done_testing(); + diff --git a/t/lib/MyApp05.pm b/t/lib/MyApp05.pm new file mode 100644 index 0000000..3131524 --- /dev/null +++ b/t/lib/MyApp05.pm @@ -0,0 +1,29 @@ +package MyApp05; + +use Moose; +use namespace::autoclean; + +use Catalyst qw/HTML::Scrubber/; + +extends 'Catalyst'; + +__PACKAGE__->config( + name => 'MyApp03', + scrubber => { + + auto => 1, + + ignore_params => [ qr/_html$/, 'ignored_param' ], + + # params for HTML::Scrubber + params => [ + allow => [qw/br hr b/], + ], + } +); + + + +__PACKAGE__->setup(); +1; + diff --git a/t/lib/MyApp05/Controller/Root.pm b/t/lib/MyApp05/Controller/Root.pm new file mode 100644 index 0000000..e5ed70f --- /dev/null +++ b/t/lib/MyApp05/Controller/Root.pm @@ -0,0 +1,34 @@ +package MyApp05::Controller::Root; + +use Moose; +use namespace::autoclean; + +BEGIN { extends 'Catalyst::Controller::REST' } + +__PACKAGE__->config( + namespace => '', +); + +# default to avoid "No default action defined" +sub foo : Path : ActionClass('REST') { } + +sub foo_GET { + my ($self, $c) = @_; + + $c->res->body('index'); +} + +sub foo_POST { + my ($self, $c) = @_; + $c->log->debug("index_POST running for a " . $c->req->method . " request"); + $c->res->body('POST received'); +} + +sub index { + my ($self, $c) = @_; + $c->log->debug("Default index route hit by " . $c->req->method . " request"); + $c->res->body("DEFAULT"); +} + +1; +