Skip to content

Commit

Permalink
feat: Detect changes using filesystem (#32)
Browse files Browse the repository at this point in the history
* feat: detect changes using read files

* fix: git fetch by YZ

* fix: add more debug output

* fix: add more debug output

* fix: debug output

* fix: add git fetch --unshallow

* fix: add git fetch --unshallow

* fix: use current_repo_gitlab_path to clone student repo

* fix: add gitlab_job_token

* fix: try to copy current repo

* fix: fix gitlab_token name

* fix: remove debug outputs, set real folders to detect changes

* fix: print changed_files to log

* fix: hotfix partially_scored

* fix: hotfix partially_scored

* style: apply isort, mypy, ruff

* fix: add annotations future for 3.8

* fix: remove test_flat_folders_skip_binary_files test
  • Loading branch information
k4black authored Oct 5, 2023
1 parent b69ec80 commit e563d59
Show file tree
Hide file tree
Showing 14 changed files with 492 additions and 24 deletions.
138 changes: 123 additions & 15 deletions checker/actions/grade.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,14 @@
import os
import subprocess
import sys
import tempfile
from datetime import datetime
from pathlib import Path

from ..course import CourseConfig, CourseDriver, CourseSchedule, Group, Task
from ..exceptions import RunFailedError
from ..testers import Tester
from ..utils import get_folders_diff_except_public, get_tracked_files_list
from ..utils.manytask import PushFailedError, push_report
from ..utils.print import print_info, print_task_info

Expand Down Expand Up @@ -42,8 +45,6 @@ def _get_git_changes(
print_info(f'CI_COMMIT_SHA: {current_commit_sha}, CI_COMMIT_BEFORE_SHA: {prev_commit_sha}!')
git_changes_type = 'diff_last'

print_info('Loading changes...', color='orange')

changes = []
if git_changes_type.startswith('diff'):
if git_changes_type == 'diff_between':
Expand Down Expand Up @@ -112,6 +113,7 @@ def _get_git_changes(

result = subprocess.run(
f'cd {solution_root} && '
f'git fetch --unshallow &&'
f'(git remote rm upstream | true) &&'
f'git remote add upstream {public_repo_url}.git &&'
f'git fetch upstream',
Expand Down Expand Up @@ -250,6 +252,91 @@ def grade_tasks(
return success


def _get_changes_using_real_folders(
course_config: CourseConfig,
current_folder: str,
old_hash: str,
current_repo_gitlab_path: str,
gitlab_token: str,
) -> list[str]:
gitlab_url_with_token = course_config.gitlab_url.replace('://', f'://gitlab-ci-token:{gitlab_token}@')

with tempfile.TemporaryDirectory() as public_dir:
with tempfile.TemporaryDirectory() as old_dir:
# download public repo, minimal
print_info(f'Cloning {course_config.public_repo} of {course_config.default_branch}...', color='white')
# print_info('git clone:', color='grey')
subprocess.run(
f'git clone --depth=1 --branch={course_config.default_branch} '
f'{course_config.gitlab_url}/{course_config.public_repo}.git {public_dir}',
encoding='utf-8',
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
shell=True,
)
# print_info(r.stdout, color='grey')
# print_info(f'ls -lah {public_dir}', color='grey')
subprocess.run(
f'ls -lah {public_dir}',
encoding='utf-8',
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
shell=True,
)
# print_info(r.stdout, color='grey')

# download old repo by hash, minimal
print_info(f'Cloning {current_repo_gitlab_path} to get {old_hash}...', color='white')
# print_info('git clone:', color='grey')
subprocess.run(
f'git clone --depth=1 --branch={course_config.default_branch} '
f'{gitlab_url_with_token}/{current_repo_gitlab_path}.git {old_dir}',
encoding='utf-8',
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
shell=True,
)
# print_info(r.stdout, color='grey')
# print_info(f'git fetch origin {old_hash} && git checkout FETCH_HEAD:', color='grey')
subprocess.run(
f'git fetch origin {old_hash} && git checkout FETCH_HEAD',
encoding='utf-8',
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
shell=True,
cwd=old_dir,
)
# print_info(r.stdout, color='grey')
# print_info(f'ls -lah {old_dir}', color='grey')
subprocess.run(
f'ls -lah {old_dir}',
encoding='utf-8',
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
shell=True,
)
# print_info(r.stdout, color='grey')

# get diff
print_info('Detected changes (filtering by public repo and git tracked files)', color='white')
print_info('and filtering by git tracked files', color='white')
changes = get_folders_diff_except_public(
Path(public_dir),
Path(old_dir),
Path(current_folder),
exclude_patterns=['.git'],
)
# filter by tracked by git
git_tracked_files = get_tracked_files_list(Path(current_folder))
changes = [f for f in changes if f in git_tracked_files]

print_info('\nchanged_files:', color='white')
for change in changes:
print_info(f' ->> {change}', color='white')

return changes


def grade_on_ci(
course_config: CourseConfig,
course_schedule: CourseSchedule,
Expand Down Expand Up @@ -280,23 +367,44 @@ def grade_on_ci(
print_info(f'-> job_start_time {job_start_time}', color='grey')
print_info(f'= using send_time {send_time}', color='grey')

# Get changed files via git
author_name = os.environ.get('CI_COMMIT_AUTHOR', None)
current_commit_sha = os.environ.get('CI_COMMIT_SHA', None)
prev_commit_sha = os.environ.get('CI_COMMIT_BEFORE_SHA', None)
print_info(f'CI_COMMIT_AUTHOR {author_name}', color='grey')
print_info(f'CI_COMMIT_SHA {current_commit_sha}', color='grey')
print_info(f'CI_COMMIT_BEFORE_SHA {prev_commit_sha}', color='grey')

gitlab_job_token = os.environ.get('CI_JOB_TOKEN') or ''

print_info('Loading changes...', color='orange')
# Get changes using real files difference
try:
author_name = os.environ.get('CI_COMMIT_AUTHOR', None)
current_commit_sha = os.environ.get('CI_COMMIT_SHA', None)
prev_commit_sha = os.environ.get('CI_COMMIT_BEFORE_SHA', None)

changes = _get_git_changes(
solution_root,
course_config.gitlab_url + '/' + course_config.public_repo,
author_name=author_name,
current_commit_sha=current_commit_sha,
prev_commit_sha=prev_commit_sha,
current_repo_gitlab_path = os.environ['CI_PROJECT_PATH']
changes = _get_changes_using_real_folders(
course_config,
current_folder=solution_root,
old_hash=prev_commit_sha or course_config.default_branch,
current_repo_gitlab_path=current_repo_gitlab_path,
gitlab_token=gitlab_job_token,
)
except GitException as e:
except Exception as e:
print_info('Ooops... Loading changes failed', color='red')
print_info(e)
sys.exit(1)

print_info('Trying with git diff instead\n')
# Get changed files via git
try:
changes = _get_git_changes(
solution_root,
course_config.gitlab_url + '/' + course_config.public_repo,
author_name=author_name,
current_commit_sha=current_commit_sha,
prev_commit_sha=prev_commit_sha,
)
except GitException as e:
print_info('Ooops... Loading changes failed', color='red')
print_info(e)
sys.exit(1)

# Process Changed files to Changed tasks
tasks: list[Task] = []
Expand Down
2 changes: 1 addition & 1 deletion checker/testers/cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def _gen_build( # type: ignore[override]

try:
print_info('Running clang tidy...', color='orange')
files = [str(file) for file in task_dir.rglob('*.cpp')]
files = [str(file) for file in task_dir.rglob('*.cpp')] # type: ignore
self._executor(
['clang-tidy', '-p', '.', *files],
cwd=build_dir,
Expand Down
15 changes: 7 additions & 8 deletions checker/testers/python.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,7 @@ def _run_tests( # type: ignore[override]

# Check tests
tests_err = None
tests_output = ''
try:
print_info('Running tests...', color='orange')
output = self._executor(
Expand All @@ -356,13 +357,11 @@ def _run_tests( # type: ignore[override]
print_info(output, end='')
print_info('OK', color='green')
except ExecutionFailedError as e:
if not test_config.partially_scored:
# Reraise only if all tests should pass
tests_err = e
output = e.output
tests_err = e
tests_output = e.output or ''

if normalize_output or test_config.partially_scored:
print_info(output, end='')
print_info(e.output, end='')
e.output = ''
output = ''

Expand All @@ -374,7 +373,7 @@ def _run_tests( # type: ignore[override]
if import_err is not None:
raise RunFailedError('Import error', output=import_err.output) from import_err

if tests_err is not None:
if tests_err is not None and not test_config.partially_scored: # Reraise only if all tests should pass
raise TestsFailedError('Public or private tests error', output=tests_err.output) from tests_err

if styles_err is not None:
Expand All @@ -384,7 +383,7 @@ def _run_tests( # type: ignore[override]
raise StylecheckFailedError('Typing error', output=typing_err.output) from typing_err

if test_config.partially_scored:
output = output or '' # for mypy only
return self._parse_summary_score(output)
tests_output = tests_output or '' # for mypy only
return self._parse_summary_score(tests_output)
else:
return 1.
105 changes: 105 additions & 0 deletions checker/utils/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import re
import shutil
import subprocess
from pathlib import Path

from .print import print_info
Expand Down Expand Up @@ -140,3 +141,107 @@ def check_files_contains_regexp(
raise AssertionError(f'File <{source_path}> contains one of <{regexps}>')
return True
return False


def get_folders_diff(
old_folder: Path,
new_folder: Path,
skip_binary: bool = True,
exclude_patterns: list[str] | None = None,
) -> list[str]:
"""
Return diff files between 2 folders
@param old_folder: Old folder
@param new_folder: New folder with some changes files, based on old folder
@param skip_binary: Skip binary files
@param exclude_patterns: Exclude files that match pattern
@return: list of changed files as strings
"""
# diff docs https://www.gnu.org/software/diffutils/manual/html_node/diff-Options.html
# -N/--new-file - If one file is missing, treat it as present but empty
# -w/--ignore-all-space - ignore all spaces and tabs e.g. if ( a == b) is equal to if(a==b)
# -r/--recursive - recursively compare any subdirectories found
# -q/--brief - report only when files differ
# --strip-trailing-cr - strip trailing carriage return on input
# -x/--exclude [pattern] - exclude files that match pattern

# TODO: check format options to work, or --left-column options
exclude_args = [f'--exclude={pattern}' for pattern in exclude_patterns] if exclude_patterns else []
# exclude_args = []
result = subprocess.run(
[
'diff',
'--brief',
'--recursive',
'--ignore-all-space',
'--new-file',
'--strip-trailing-cr',
*exclude_args,
old_folder.absolute(),
new_folder.absolute()
],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
)
output = result.stdout.decode()

# TODO: make it work with whitespace in filenames

changed = []
for line in output.split('\n'):
if line.startswith('Only in'):
assert False, 'Will be treated as change due to --new-file option'
elif line.startswith('Files'):
_, file1, _, file2, _ = line.split()
changed.append(Path(file2).relative_to(new_folder))
elif line.startswith('Binary files'):
if skip_binary:
continue
_, _, file1, _, file2, _ = line.split()
changed.append(Path(file2).relative_to(new_folder))

return [str(i) for i in changed]


def get_folders_diff_except_public(
public_folder: Path,
old_folder: Path,
new_folder: Path,
skip_binary: bool = True,
exclude_patterns: list[str] | None = None,
) -> list[str]:
"""
Return diff files between 2 folders except files that are equal to public folder files
@param public_folder: Public folder
@param old_folder: Old folder
@param new_folder: New folder with some changes files, based on old folder
@param skip_binary: Skip binary files
@param exclude_patterns: Exclude files that match pattern
@return: list of changed files as strings
"""

changed_files_old_new = get_folders_diff(
old_folder,
new_folder,
skip_binary=skip_binary,
exclude_patterns=exclude_patterns,
)
changed_files_public_new = get_folders_diff(
public_folder,
new_folder,
skip_binary=skip_binary,
exclude_patterns=exclude_patterns,
)

# TODO: Remove logging
print_info('\nchanged_files_old_new:', color='grey')
for i in changed_files_old_new:
print_info(f' {i}', color='grey')
print_info('\nchanged_files_public_new:', color='grey')
for i in changed_files_public_new:
print_info(f' {i}', color='grey')

return [
str(i)
for i in set(changed_files_old_new) & set(changed_files_public_new)
]
17 changes: 17 additions & 0 deletions checker/utils/git.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

import shutil
import subprocess
from pathlib import Path
Expand All @@ -8,6 +10,21 @@
DEFAULT_BRANCH = 'main'


def get_tracked_files_list(
repo_dir: Path,
) -> list[str]:
r = subprocess.run(
'git ls-files',
encoding='utf-8',
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
shell=True,
check=True,
cwd=repo_dir,
)
return r.stdout.splitlines()


def setup_repo_in_dir(
repo_dir: Path,
remote_repo_url: str,
Expand Down
2 changes: 2 additions & 0 deletions tests/course/test_config.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

from pathlib import Path

import pytest
Expand Down
2 changes: 2 additions & 0 deletions tests/examples/test_course_configs.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

from pathlib import Path

from checker.course.config import CourseConfig
Expand Down
2 changes: 2 additions & 0 deletions tests/executors/test_sandbox.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from __future__ import annotations

import os
import sys
from pathlib import Path
Expand Down
Loading

0 comments on commit e563d59

Please sign in to comment.