Skip to content

Commit

Permalink
Rewite generic_validation to sanity_check
Browse files Browse the repository at this point in the history
sanity_check operates on a file, and figures out the desired
mode from the file suffix (.in, .ans, .out). Whitespace strictness
is an explicit parameter (rather than inferred from validation mode.)
  • Loading branch information
thorehusfeldt committed Jan 29, 2024
1 parent 6a653a1 commit ccd7da2
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 43 deletions.
2 changes: 1 addition & 1 deletion bin/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,7 +267,7 @@ def process_run(run, p):
result = run.run()

if result.verdict == 'ACCEPTED':
validate.generic_validation(validate.OutputValidator, run.out_path, bar=localbar)
validate.sanity_check(run.out_path, localbar, strict_whitespace=False)

new_verdict = (
config.PRIORITY[result.verdict],
Expand Down
4 changes: 2 additions & 2 deletions bin/testcase.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,9 +200,9 @@ def validate_format(

if success and not bad_testcase:
if cls == validate.InputValidator:
validate.generic_validation(cls, self.in_path, bar=bar)
validate.sanity_check(self.in_path, bar)

if cls == validate.AnswerValidator:
validate.generic_validation(cls, self.ans_path, bar=bar)
validate.sanity_check(self.ans_path, bar)

return success
85 changes: 45 additions & 40 deletions bin/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@
from enum import Enum
from typing import Type


class Mode(Enum):
""" There are three validation modes """
"""There are three validation modes"""

INPUT = 1
ANSWER = 2
OUTPUT = 3 # not implemented


OUTPUT = 3 # not implemented


def _merge_constraints(constraints_path, constraints):
Expand Down Expand Up @@ -74,7 +74,7 @@ def __init__(self, problem, path, skip_double_build_warning=False, check_constra
self.check_constraints = check_constraints

def _run_helper(self, testcase, constraints, args):
""" Helper method for the run method in subclasses.
"""Helper method for the run method in subclasses.
Return:
cwd: a current working directory for this testcase
constraints_path: None or a path to the constraints file
Expand Down Expand Up @@ -221,9 +221,8 @@ class OutputValidator(Validator):
"""

subdir = 'output_validators'

source_dirs = ['output_validator', 'output_validators']


def run(self, testcase, run=None, constraints=None, args=None):
"""Return:
Expand All @@ -238,9 +237,11 @@ def run(self, testcase, run=None, constraints=None, args=None):
# If run is None, we're validating submission output,
# else we're validting an .ans file
feedbackdir = run.feedbackdir if run is not None else cwd
invocation = self.run_command + [
testcase.in_path.resolve(), testcase.ans_path.resolve(), feedbackdir
] + self.problem.settings.validator_flags
invocation = (
self.run_command
+ [testcase.in_path.resolve(), testcase.ans_path.resolve(), feedbackdir]
+ self.problem.settings.validator_flags
)

path = run.out_path if run is not None else testcase.ans_path
with path.open() as file:
Expand All @@ -257,6 +258,7 @@ def run(self, testcase, run=None, constraints=None, args=None):

return ret


# Checks if byte is printable or whitespace
def _in_invalid_byte(byte, *, other_whitespaces=False):
if other_whitespaces:
Expand Down Expand Up @@ -293,43 +295,46 @@ def _has_consecutive_whitespaces(bytes):
return False


# Does some generic checks on input/output:
# - no unreadable characters
# - no weird consecutive whitespaces (' ', '\n ', ' \n')
# - no whitespace at start of file
# - ensures newline at end of file
# - not too large
# if any of this is violated a warning is printed
# use --no-testcase-sanity-checks to skip this
def sanity_check(path, bar, strict_whitespace=True):
"""
Does some generic checks on input, answer, or output files of a testcase, including
- no unreadable characters
- not too large
if any of this is violated a warning is printed.
use --no-testcase-sanity-checks to skip this
args:
strict_whitespace: Also check
- no weird consecutive whitespaces (' ', '\n ', ' \n')
- no other_whitespaces (like '\t')
- no whitespace at start of file
- ensures newline at end of file
# TODO: This should not select on class, but on validate.Mode
def generic_validation(cls: Type[Validator], file, *, bar):
"""
if config.args.no_testcase_sanity_checks:
return

# Todo we could check for more stuff that is likely an error like `.*-0.*`
if cls == InputValidator:
name = 'Testcase'
strict = True
elif cls == AnswerValidator:
name = 'Default answer'
strict = True
elif cls == OutputValidator:
name = 'Output'
strict = False

if file.exists():
bytes = file.read_bytes()
if _has_invalid_byte(bytes, other_whitespaces=not strict):
if not path.exists():
fatal("{path} not found during sanity check")
with open(path, 'rb') as file:
name = {
'.in': "Input",
'.ans': "Answer",
'.out': "Output",
}[path.suffix]
file_bytes = file.read()
if _has_invalid_byte(file_bytes, other_whitespaces=not strict_whitespace):
bar.warn(f'{name} contains unexpected characters but was accepted!')
elif len(bytes) == 0:
elif len(file_bytes) == 0:
bar.warn(f'{name} is empty but was accepted!')
elif len(bytes) > 20_000_000:
elif len(file_bytes) > 20_000_000:
bar.warn(f'{name} is larger than 20Mb!')
elif strict:
if bytes[0] == ord(' ') or bytes[0] == ord('\n'):
elif strict_whitespace:
if file_bytes[0] in [ord(' '), ord('\n')]:
bar.warn(f'{name} starts with whitespace but was accepted!')
elif bytes[-1] != ord('\n'):
elif file_bytes[-1] != ord('\n'):
bar.warn(f'{name} does not end with a newline but was accepted!')
elif _has_consecutive_whitespaces(bytes):
elif _has_consecutive_whitespaces(file_bytes):
bar.warn(f'{name} contains consecutive whitespace characters but was accepted!')

0 comments on commit ccd7da2

Please sign in to comment.