Skip to content

Commit

Permalink
Some more minor updates to resolve issues with cleaning of response r…
Browse files Browse the repository at this point in the history
…esults
  • Loading branch information
Tom Aitken committed Apr 1, 2021
1 parent 78a31e3 commit 3fe51b6
Show file tree
Hide file tree
Showing 6 changed files with 53 additions and 32 deletions.
9 changes: 8 additions & 1 deletion scrapy_testmaster/cli.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import re
import os
import sys
import copy
import json
import scrapy
import argparse
Expand All @@ -23,6 +24,7 @@
get_project_dirs,
parse_callback_result,
prepare_callback_replay,
process_result,
erase_special_metakeys
)
from scrapy_testmaster.utils_novel import (
Expand Down Expand Up @@ -222,7 +224,12 @@ def update(self):
data["result"], _ = parse_callback_result(
request.callback(response), spider, cb_settings
)
validate_results(fixture_dir, spider.settings, data['result'], data['request']['url'])
_result = copy.deepcopy(data['result'])
items_out, requests_out = process_result(
_result, spider.settings, cb_settings)
validate_results(fixture_dir, spider.settings, items_out,
requests_out, data['request']['url'])

add_sample(fixture_index, fixture_dir, filename, data)

print("Fixture '{}' successfully updated.".format(
Expand Down
7 changes: 6 additions & 1 deletion scrapy_testmaster/middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
get_middlewares,
create_dir,
parse_callback_result,
process_result
)
from .utils_novel import (
get_cb_settings,
Expand Down Expand Up @@ -180,7 +181,11 @@ def process_spider_output(self, response, result, spider):
_request = copy.deepcopy(data['request'])
_request = clean_request(_request, spider.settings, cb_settings)

validate_results(test_dir, spider.settings, data['result'], request['url'])
_result = copy.deepcopy(data['result'])
items_out, requests_out = process_result(
_result, spider.settings, cb_settings)
validate_results(test_dir, spider.settings, items_out, requests_out,
request['url'])

if callback_counter < max_fixtures or '_update' in response.meta:
index = callback_counter + 1
Expand Down
17 changes: 15 additions & 2 deletions scrapy_testmaster/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,19 @@ def _clean(data, field_list):
data.pop(field, None)


def process_result(result, spider_settings, cb_settings):
items = [x["data"] for x in filter(
lambda res: res["type"] == "item", result)]
requests = [x["data"] for x in filter(
lambda res: res["type"] == "request", result)]
for i in range(len(items)):
clean_item(items[i], spider_settings, cb_settings)

requests = [clean_request(req, spider_settings, cb_settings) for
req in requests]
return items, requests


def erase_special_metakeys(request):
new_meta = {}
for k, v in request.meta.items():
Expand Down Expand Up @@ -537,7 +550,7 @@ def test(self):
cb_obj = clean_request(cb_obj, settings, cb_settings)
result_to_validate = {'type': 'request', 'data': cb_obj}
try:
validate_results(test_dir, settings, [result_to_validate], request.url)
validate_results(test_dir, settings, [], [result_to_validate], request.url)
except _InvalidOutput as e:
six.raise_from(
_InvalidOutput(
Expand All @@ -549,7 +562,7 @@ def test(self):
clean_item(cb_obj, settings, cb_settings)
result_to_validate = {'type': 'item', 'data': cb_obj}
try:
validate_results(fixture_path, settings, [result_to_validate], request.url)
validate_results(fixture_path, settings, [result_to_validate], [], request.url)
except _InvalidOutput as e:
six.raise_from(
_InvalidOutput(
Expand Down
32 changes: 14 additions & 18 deletions scrapy_testmaster/utils_novel.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def basic_items_check(items, obligate_fields, primary_fields, request_url):
"Empty field: %s" % (request_url, field))


def check_options(spider_settings, config, result, request_url):
def check_options(spider_settings, config, items, request_url):
obligate_local = set()
primary_local = set()
obligate_global = set(spider_settings.getlist('TESTMASTER_OBLIGATE_ITEM_FIELDS', []))
Expand All @@ -119,13 +119,10 @@ def check_options(spider_settings, config, result, request_url):
pass
obligate_fields = obligate_local if obligate_local else obligate_global
primary_fields = primary_local if primary_local else primary_global
items = map(lambda x: x["data"], filter(lambda res: res["type"] == "item", result))
basic_items_check(items, obligate_fields, primary_fields, request_url)
items = map(lambda x: x["data"], filter(lambda res: res["type"] == "item", result))
basic_items_check(items, obligate_fields, primary_fields, request_url)


def check_global_rules(spider_settings, result, request_url):
def check_global_rules(spider_settings, items, requests, request_url):
path_to_rules = spider_settings.get('TESTMASTER_PATH_TO_RULES_FILE', None)
if path_to_rules:
try:
Expand All @@ -136,41 +133,41 @@ def check_global_rules(spider_settings, result, request_url):
"settings does not exist.")
if hasattr(module, "ItemRules"):
itemclass = module.ItemRules()
check_item_rules(itemclass, result, request_url)
check_item_rules(itemclass, items, request_url)
if hasattr(module, "RequestRules"):
reqclass = module.RequestRules()
check_req_rules(reqclass, result, request_url)
check_req_rules(reqclass, requests, request_url)


def check_local_rules(config, result, request_url):
def check_local_rules(config, items, requests, request_url):
try:
itemclass = config.ItemRules()
check_item_rules(itemclass, result, request_url)
check_item_rules(itemclass, items, request_url)
except AttributeError:
pass
try:
reqclass = config.RequestRules()
check_req_rules(reqclass, result, request_url)
check_req_rules(reqclass, requests, request_url)
except AttributeError:
pass


def validate_results(test_dir, spider_settings, result, request_url):
def validate_results(test_dir, spider_settings, items, requests, request_url):
config_path = os.path.join(test_dir, 'config.py')
if not os.path.exists(config_path):
config = None
else:
config = get_cb_settings(test_dir)
check_options(spider_settings, config, result, request_url)
check_local_rules(config, result, request_url)
check_global_rules(spider_settings, result, request_url)

check_options(spider_settings, config, items, request_url)
check_local_rules(config, items, requests, request_url)
check_global_rules(spider_settings, items, requests, request_url)


def check_item_rules(itemclass, result, request_url):
def check_item_rules(itemclass, items, request_url):
itemclass_attrs = [(name, getattr(itemclass, name)) for name in dir(itemclass)
if not name.startswith('__')]
item_rules = list(filter(lambda entry: callable(entry[1]), itemclass_attrs))
items = map(lambda x: x["data"], filter(lambda res: res["type"] == "item", result))
for item in items:
for rule_func in item_rules:
try:
Expand All @@ -180,11 +177,10 @@ def check_item_rules(itemclass, result, request_url):
"failed the rule %s" % (request_url, rule_func[0]))


def check_req_rules(reqclass, result, request_url):
def check_req_rules(reqclass, requests, request_url):
reqclass_attrs = [(name, getattr(reqclass, name)) for name in dir(reqclass)
if not name.startswith('__')]
req_rules = list(filter(lambda entry: callable(entry[1]), reqclass_attrs))
requests = map(lambda x: x["data"], filter(lambda res: res["type"] == "request", result))
for req in requests:
for rule_func in req_rules:
try:
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name='scrapy-testmaster',
version='1.0',
version='1.1',
author='Thomas Aitken',
author_email='tclaitken@gmail.com',
description='Automated testing and debugging tool for Scrapy.',
Expand All @@ -26,7 +26,7 @@
'datadiff==2.0.0',
'requests'
],
entry_points = {
entry_points={
'console_scripts': [
'testmaster=scrapy_testmaster.cli:main',
],
Expand Down
16 changes: 8 additions & 8 deletions tests/test_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,37 +27,37 @@ class Settings1(Settings):

class Settings2(Settings):
TESTMASTER_PRIMARY_ITEM_FIELDS = ["uncool"]
TESTMASTER_INCLUDED_SETTINGS = []


result1 = [{"type": "item", "data": {"name": ""}}, {"type": "request", "data": {}}]
items1 = [{"name": ""}]
requests = [{}]
spider_settings1 = Settings1()

spider_settings2 = Settings2()
result2 = [{"type": "item", "data": {"uncool": "1"}}, {"type": "request", "data": {}}]
result3 = [{"type": "item", "data": {"cool": "1"}}, {"type": "request", "data": {}}]
items2 = [{"uncool": "1"}]
items3 = [{"cool": "1"}]


class TestValidation(unittest.TestCase):
def test_conflict(self):
write_config(config_1)
with self.assertRaises(_InvalidOutput):
validate_results('', spider_settings1, result1, '')
validate_results('', spider_settings1, items1, requests, '')
del_config()

def test_item_rule(self):
write_config(config_2)
with self.assertRaises(_InvalidOutput):
validate_results('', spider_settings1, result1, '')
validate_results('', spider_settings1, items1, requests, '')
del_config()

def test_override1(self):
write_config(config_3)
with self.assertRaises(_InvalidOutput):
validate_results('', spider_settings2, result2, '')
validate_results('', spider_settings2, items2, requests, '')
del_config()

def test_override2(self):
write_config(config_3)
validate_results('', spider_settings2, result3, '')
validate_results('', spider_settings2, items3, requests, '')
del_config()

0 comments on commit 3fe51b6

Please sign in to comment.