From 3fe51b6e8da894a9ed2719c1455025e9e59b471e Mon Sep 17 00:00:00 2001 From: Tom Aitken Date: Fri, 2 Apr 2021 00:12:13 +1100 Subject: [PATCH] Some more minor updates to resolve issues with cleaning of response results --- scrapy_testmaster/cli.py | 9 ++++++++- scrapy_testmaster/middleware.py | 7 ++++++- scrapy_testmaster/utils.py | 17 +++++++++++++++-- scrapy_testmaster/utils_novel.py | 32 ++++++++++++++------------------ setup.py | 4 ++-- tests/test_validation.py | 16 ++++++++-------- 6 files changed, 53 insertions(+), 32 deletions(-) diff --git a/scrapy_testmaster/cli.py b/scrapy_testmaster/cli.py index cf01a8b..476176b 100644 --- a/scrapy_testmaster/cli.py +++ b/scrapy_testmaster/cli.py @@ -1,6 +1,7 @@ import re import os import sys +import copy import json import scrapy import argparse @@ -23,6 +24,7 @@ get_project_dirs, parse_callback_result, prepare_callback_replay, + process_result, erase_special_metakeys ) from scrapy_testmaster.utils_novel import ( @@ -222,7 +224,12 @@ def update(self): data["result"], _ = parse_callback_result( request.callback(response), spider, cb_settings ) - validate_results(fixture_dir, spider.settings, data['result'], data['request']['url']) + _result = copy.deepcopy(data['result']) + items_out, requests_out = process_result( + _result, spider.settings, cb_settings) + validate_results(fixture_dir, spider.settings, items_out, + requests_out, data['request']['url']) + add_sample(fixture_index, fixture_dir, filename, data) print("Fixture '{}' successfully updated.".format( diff --git a/scrapy_testmaster/middleware.py b/scrapy_testmaster/middleware.py index abbb49f..c333b74 100644 --- a/scrapy_testmaster/middleware.py +++ b/scrapy_testmaster/middleware.py @@ -21,6 +21,7 @@ get_middlewares, create_dir, parse_callback_result, + process_result ) from .utils_novel import ( get_cb_settings, @@ -180,7 +181,11 @@ def process_spider_output(self, response, result, spider): _request = copy.deepcopy(data['request']) _request = clean_request(_request, spider.settings, cb_settings) - validate_results(test_dir, spider.settings, data['result'], request['url']) + _result = copy.deepcopy(data['result']) + items_out, requests_out = process_result( + _result, spider.settings, cb_settings) + validate_results(test_dir, spider.settings, items_out, requests_out, + request['url']) if callback_counter < max_fixtures or '_update' in response.meta: index = callback_counter + 1 diff --git a/scrapy_testmaster/utils.py b/scrapy_testmaster/utils.py index 4946f79..96818d8 100644 --- a/scrapy_testmaster/utils.py +++ b/scrapy_testmaster/utils.py @@ -325,6 +325,19 @@ def _clean(data, field_list): data.pop(field, None) +def process_result(result, spider_settings, cb_settings): + items = [x["data"] for x in filter( + lambda res: res["type"] == "item", result)] + requests = [x["data"] for x in filter( + lambda res: res["type"] == "request", result)] + for i in range(len(items)): + clean_item(items[i], spider_settings, cb_settings) + + requests = [clean_request(req, spider_settings, cb_settings) for + req in requests] + return items, requests + + def erase_special_metakeys(request): new_meta = {} for k, v in request.meta.items(): @@ -537,7 +550,7 @@ def test(self): cb_obj = clean_request(cb_obj, settings, cb_settings) result_to_validate = {'type': 'request', 'data': cb_obj} try: - validate_results(test_dir, settings, [result_to_validate], request.url) + validate_results(test_dir, settings, [], [result_to_validate], request.url) except _InvalidOutput as e: six.raise_from( _InvalidOutput( @@ -549,7 +562,7 @@ def test(self): clean_item(cb_obj, settings, cb_settings) result_to_validate = {'type': 'item', 'data': cb_obj} try: - validate_results(fixture_path, settings, [result_to_validate], request.url) + validate_results(fixture_path, settings, [result_to_validate], [], request.url) except _InvalidOutput as e: six.raise_from( _InvalidOutput( diff --git a/scrapy_testmaster/utils_novel.py b/scrapy_testmaster/utils_novel.py index cf14631..2fd4833 100644 --- a/scrapy_testmaster/utils_novel.py +++ b/scrapy_testmaster/utils_novel.py @@ -103,7 +103,7 @@ def basic_items_check(items, obligate_fields, primary_fields, request_url): "Empty field: %s" % (request_url, field)) -def check_options(spider_settings, config, result, request_url): +def check_options(spider_settings, config, items, request_url): obligate_local = set() primary_local = set() obligate_global = set(spider_settings.getlist('TESTMASTER_OBLIGATE_ITEM_FIELDS', [])) @@ -119,13 +119,10 @@ def check_options(spider_settings, config, result, request_url): pass obligate_fields = obligate_local if obligate_local else obligate_global primary_fields = primary_local if primary_local else primary_global - items = map(lambda x: x["data"], filter(lambda res: res["type"] == "item", result)) - basic_items_check(items, obligate_fields, primary_fields, request_url) - items = map(lambda x: x["data"], filter(lambda res: res["type"] == "item", result)) basic_items_check(items, obligate_fields, primary_fields, request_url) -def check_global_rules(spider_settings, result, request_url): +def check_global_rules(spider_settings, items, requests, request_url): path_to_rules = spider_settings.get('TESTMASTER_PATH_TO_RULES_FILE', None) if path_to_rules: try: @@ -136,41 +133,41 @@ def check_global_rules(spider_settings, result, request_url): "settings does not exist.") if hasattr(module, "ItemRules"): itemclass = module.ItemRules() - check_item_rules(itemclass, result, request_url) + check_item_rules(itemclass, items, request_url) if hasattr(module, "RequestRules"): reqclass = module.RequestRules() - check_req_rules(reqclass, result, request_url) + check_req_rules(reqclass, requests, request_url) -def check_local_rules(config, result, request_url): +def check_local_rules(config, items, requests, request_url): try: itemclass = config.ItemRules() - check_item_rules(itemclass, result, request_url) + check_item_rules(itemclass, items, request_url) except AttributeError: pass try: reqclass = config.RequestRules() - check_req_rules(reqclass, result, request_url) + check_req_rules(reqclass, requests, request_url) except AttributeError: pass -def validate_results(test_dir, spider_settings, result, request_url): +def validate_results(test_dir, spider_settings, items, requests, request_url): config_path = os.path.join(test_dir, 'config.py') if not os.path.exists(config_path): config = None else: config = get_cb_settings(test_dir) - check_options(spider_settings, config, result, request_url) - check_local_rules(config, result, request_url) - check_global_rules(spider_settings, result, request_url) + + check_options(spider_settings, config, items, request_url) + check_local_rules(config, items, requests, request_url) + check_global_rules(spider_settings, items, requests, request_url) -def check_item_rules(itemclass, result, request_url): +def check_item_rules(itemclass, items, request_url): itemclass_attrs = [(name, getattr(itemclass, name)) for name in dir(itemclass) if not name.startswith('__')] item_rules = list(filter(lambda entry: callable(entry[1]), itemclass_attrs)) - items = map(lambda x: x["data"], filter(lambda res: res["type"] == "item", result)) for item in items: for rule_func in item_rules: try: @@ -180,11 +177,10 @@ def check_item_rules(itemclass, result, request_url): "failed the rule %s" % (request_url, rule_func[0])) -def check_req_rules(reqclass, result, request_url): +def check_req_rules(reqclass, requests, request_url): reqclass_attrs = [(name, getattr(reqclass, name)) for name in dir(reqclass) if not name.startswith('__')] req_rules = list(filter(lambda entry: callable(entry[1]), reqclass_attrs)) - requests = map(lambda x: x["data"], filter(lambda res: res["type"] == "request", result)) for req in requests: for rule_func in req_rules: try: diff --git a/setup.py b/setup.py index 10d314a..aafdaa5 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name='scrapy-testmaster', - version='1.0', + version='1.1', author='Thomas Aitken', author_email='tclaitken@gmail.com', description='Automated testing and debugging tool for Scrapy.', @@ -26,7 +26,7 @@ 'datadiff==2.0.0', 'requests' ], - entry_points = { + entry_points={ 'console_scripts': [ 'testmaster=scrapy_testmaster.cli:main', ], diff --git a/tests/test_validation.py b/tests/test_validation.py index 7d0ac04..80a7c81 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -27,37 +27,37 @@ class Settings1(Settings): class Settings2(Settings): TESTMASTER_PRIMARY_ITEM_FIELDS = ["uncool"] - TESTMASTER_INCLUDED_SETTINGS = [] -result1 = [{"type": "item", "data": {"name": ""}}, {"type": "request", "data": {}}] +items1 = [{"name": ""}] +requests = [{}] spider_settings1 = Settings1() spider_settings2 = Settings2() -result2 = [{"type": "item", "data": {"uncool": "1"}}, {"type": "request", "data": {}}] -result3 = [{"type": "item", "data": {"cool": "1"}}, {"type": "request", "data": {}}] +items2 = [{"uncool": "1"}] +items3 = [{"cool": "1"}] class TestValidation(unittest.TestCase): def test_conflict(self): write_config(config_1) with self.assertRaises(_InvalidOutput): - validate_results('', spider_settings1, result1, '') + validate_results('', spider_settings1, items1, requests, '') del_config() def test_item_rule(self): write_config(config_2) with self.assertRaises(_InvalidOutput): - validate_results('', spider_settings1, result1, '') + validate_results('', spider_settings1, items1, requests, '') del_config() def test_override1(self): write_config(config_3) with self.assertRaises(_InvalidOutput): - validate_results('', spider_settings2, result2, '') + validate_results('', spider_settings2, items2, requests, '') del_config() def test_override2(self): write_config(config_3) - validate_results('', spider_settings2, result3, '') + validate_results('', spider_settings2, items3, requests, '') del_config()