From dd83956c04654cbf487367842e73fd4ae4267ed7 Mon Sep 17 00:00:00 2001 From: Tom Aitken Date: Wed, 7 Apr 2021 23:21:56 +1000 Subject: [PATCH] Clarified difference between parse_ and clean_request, fixed old path bug, cleaned-up copying --- scrapy_testmaster/cli.py | 6 ++---- scrapy_testmaster/middleware.py | 6 +++--- scrapy_testmaster/utils.py | 9 ++++++--- setup.py | 2 +- 4 files changed, 12 insertions(+), 11 deletions(-) diff --git a/scrapy_testmaster/cli.py b/scrapy_testmaster/cli.py index 476176b..3cd7eff 100644 --- a/scrapy_testmaster/cli.py +++ b/scrapy_testmaster/cli.py @@ -1,7 +1,6 @@ import re import os import sys -import copy import json import scrapy import argparse @@ -221,12 +220,11 @@ def update(self): request=request, **data['response']) cb_settings = get_cb_settings(fixture_dir) - data["result"], _ = parse_callback_result( + data['result'], _ = parse_callback_result( request.callback(response), spider, cb_settings ) - _result = copy.deepcopy(data['result']) items_out, requests_out = process_result( - _result, spider.settings, cb_settings) + data['result'], spider.settings, cb_settings) validate_results(fixture_dir, spider.settings, items_out, requests_out, data['request']['url']) diff --git a/scrapy_testmaster/middleware.py b/scrapy_testmaster/middleware.py index c333b74..dfbb433 100644 --- a/scrapy_testmaster/middleware.py +++ b/scrapy_testmaster/middleware.py @@ -113,7 +113,8 @@ def process_spider_input(self, response, spider): cb_name = 'parse' else: cb_name = _request['callback'] - test_dir = os.path.join(self.base_path, sanitize_module_name(spider.name), cb_name) + test_dir = os.path.join( + self.base_path, 'tests', sanitize_module_name(spider.name), cb_name) cb_settings = get_cb_settings(test_dir) filter_args = {'crawler', 'settings', 'start_urls'} if isinstance(spider, CrawlSpider): @@ -181,9 +182,8 @@ def process_spider_output(self, response, result, spider): _request = copy.deepcopy(data['request']) _request = clean_request(_request, spider.settings, cb_settings) - _result = copy.deepcopy(data['result']) items_out, requests_out = process_result( - _result, spider.settings, cb_settings) + data['result'], spider.settings, cb_settings) validate_results(test_dir, spider.settings, items_out, requests_out, request['url']) diff --git a/scrapy_testmaster/utils.py b/scrapy_testmaster/utils.py index c414925..6b1a001 100644 --- a/scrapy_testmaster/utils.py +++ b/scrapy_testmaster/utils.py @@ -193,8 +193,9 @@ def parse_object(_object, spider, cb_settings): return _object +# processes request for recording, handling auth settings def parse_request(request, spider, cb_settings): - _request = request_to_dict(request, spider=spider) + _request = copy.deepcopy(request_to_dict(request, spider=spider)) if not _request['callback']: _request['callback'] = 'parse' @@ -204,6 +205,7 @@ def parse_request(request, spider, cb_settings): for key, value in _request.get('meta').items(): if key != '_testmaster': _meta[key] = parse_object(value, spider, cb_settings) + _clean_splash(_meta, spider.settings, cb_settings) _request['meta'] = _meta return _request @@ -293,6 +295,7 @@ def _clean_headers(headers, spider_settings, cb_settings, mode=""): return headers +# processes request into JSON format for inscribing in view.json and for validation def clean_request(request, spider_settings, cb_settings): skipped_global = spider_settings.get('TESTMASTER_REQUEST_SKIPPED_FIELDS', default=[]) try: @@ -326,9 +329,9 @@ def _clean(data, field_list): def process_result(result, spider_settings, cb_settings): - items = [x["data"] for x in filter( + items = [copy.deepcopy(x["data"]) for x in filter( lambda res: res["type"] == "item", result)] - requests = [x["data"] for x in filter( + requests = [copy.deepcopy(x["data"]) for x in filter( lambda res: res["type"] == "request", result)] for i in range(len(items)): clean_item(items[i], spider_settings, cb_settings) diff --git a/setup.py b/setup.py index 0385860..72d5c5e 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name='scrapy-testmaster', - version='1.2', + version='1.3', author='Thomas Aitken', author_email='tclaitken@gmail.com', description='Automated testing and debugging tool for Scrapy.',