Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revive and clean up repocleaner tests #2468

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 18 additions & 10 deletions Framework/script/RepoCleaner/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,26 @@ There can be any number of these rules. The order is important as we use the fir

The configuration for ccdb-test is described [here](../../../doc/DevelopersTips.md).

## Setup virtual environment for development and test (venv)

1. cd Framework/script/RepoCleaner
2. python3 -m venv env
3. source env/bin/activate
4. python -m pip install -r requirements.txt
5. python3 -m pip install .
6. You can execute and work. Next time just do "activate" and then you are good to go

## Unit Tests
`cd QualityControl/Framework/script/RepoCleaner ; python3 -m unittest discover`

and to test only one of them: `python3 -m unittest tests/test_NewProduction.py -k test_2_runs`
```
cd Framework/script/RepoCleaner
source env/bin/activate

# Run a test:
python -m unittest tests.test_Ccdb.TestCcdb.test_getObjectsList
```

`cd QualityControl/Framework/script/RepoCleaner ; python3 -m unittest discover`

In particular there is a test for the `production` rule that is pretty extensive. It hits the ccdb though and it needs the following path to be truncated:
`
Expand Down Expand Up @@ -75,11 +91,3 @@ Create new version
2. `python3 setup.py sdist bdist_wheel`
3. `python3 -m twine upload --repository pypi dist/*`

## Use venv

1. cd Framework/script/RepoCleaner
2. python3 -m venv env
3. source env/bin/activate
4. python -m pip install -r requirements.txt
5. python3 -m pip install .
6. You can execute and work. Next time just do "activate" and then you are good to go
4 changes: 3 additions & 1 deletion Framework/script/RepoCleaner/qcrepocleaner/Ccdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def __init__(self, path: str, validFrom, validTo, createdAt, uuid=None, metadata
:param uuid: unique id of the object
:param validFrom: validity range smaller limit (in ms)
:param validTo: validity range bigger limit (in ms)
:param createdAt: creation timestamp of the object
'''
self.path = path
self.uuid = uuid
Expand Down Expand Up @@ -72,7 +73,8 @@ def getObjectsList(self, added_since: int = 0, path: str = "", no_wildcard: bool
:return A list of strings, each containing a path to an object in the CCDB.
'''
url_for_all_obj = self.url + '/latest/' + path
url_for_all_obj += '/' if no_wildcard else '/.*'
url_for_all_obj += '/' if path else ''
url_for_all_obj += '' if no_wildcard else '.*'
logger.debug(f"Ccdb::getObjectsList -> {url_for_all_obj}")
headers = {'Accept': 'application/json', 'If-Not-Before':str(added_since)}
r = requests.get(url_for_all_obj, headers=headers)
Expand Down
12 changes: 12 additions & 0 deletions Framework/script/RepoCleaner/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
certifi==2024.2.2
chardet==5.2.0
charset-normalizer==3.3.2
dryable==1.2.0
idna==3.7
psutil==6.1.0
python-consul==1.1.0
PyYAML==6.0.1
requests==2.31.0
responses==0.25.0
six==1.16.0
urllib3==2.2.1
86 changes: 25 additions & 61 deletions Framework/script/RepoCleaner/tests/test_1_per_hour.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,12 @@
import logging
import time
import unittest
from datetime import timedelta, date, datetime

from Ccdb import Ccdb, ObjectVersion
from rules import last_only
import os
import sys
import importlib

def import_path(path): # needed because o2-qc-repo-cleaner has no suffix
module_name = os.path.basename(path).replace('-', '_')
spec = importlib.util.spec_from_loader(
module_name,
importlib.machinery.SourceFileLoader(module_name, path)
)
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
sys.modules[module_name] = module
return module

one_per_hour = import_path("../qcrepocleaner/rules/1_per_hour.py")
from importlib import import_module
from qcrepocleaner.Ccdb import Ccdb
from tests import test_utils
from tests.test_utils import CCDB_TEST_URL

one_per_hour = import_module(".1_per_hour", "qcrepocleaner.rules") # file names should not start with a number...

class Test1PerHour(unittest.TestCase):
"""
Expand All @@ -35,86 +21,64 @@ class Test1PerHour(unittest.TestCase):
one_minute = 60000

def setUp(self):
self.ccdb = Ccdb('http://ccdb-test.cern.ch:8080')
self.ccdb = Ccdb(CCDB_TEST_URL) # ccdb-test but please use IP to avoid DNS alerts
self.path = "qc/TST/MO/repo/test"
self.run = 124321
self.extra = {}


def test_1_per_hour(self):
"""
60 versions, 2 minutes apart
120 versions
grace period of 15 minutes
First version is preserved (always). 7 are preserved during the grace period at the end.
One more is preserved after 1 hour. --> 9 preserved
First version is preserved (always). 14 are preserved during the grace period at the end.
One more is preserved after 1 hour. --> 16 preserved
"""
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s',
datefmt='%d-%b-%y %H:%M:%S')
logging.getLogger().setLevel(int(10))

# Prepare data
test_path = self.path + "/test_1_per_hour"
self.prepare_data(test_path, 60, 2)
test_utils.clean_data(self.ccdb, test_path)
test_utils.prepare_data(self.ccdb, test_path, [120], [0], 123)

stats = one_per_hour.process(self.ccdb, test_path, 15, 1, self.in_ten_years, self.extra)
self.assertEqual(stats["deleted"], 51)
self.assertEqual(stats["preserved"], 9)
logging.info(stats)
self.assertEqual(stats["deleted"], 104)
self.assertEqual(stats["preserved"], 16)

objects_versions = self.ccdb.getVersionsList(test_path)
self.assertEqual(len(objects_versions), 9)
self.assertEqual(len(objects_versions), 16)


def test_1_per_hour_period(self):
"""
60 versions, 2 minutes apart
120 versions
no grace period
period of acceptance: 1 hour in the middle
We have therefore 30 versions in the acceptance period.
We have therefore 60 versions in the acceptance period.
Only 1 of them, the one 1 hour after the first version in the set, will be preserved, the others are deleted.
Thus we have 29 deletion. Everything outside the acceptance period is kept.
Thus we have 59 deletion. Everything outside the acceptance period is kept.
"""
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s',
datefmt='%d-%b-%y %H:%M:%S')
logging.getLogger().setLevel(int(10))

# Prepare data
test_path = self.path + "/test_1_per_hour_period"
self.prepare_data(test_path, 60, 2)
test_utils.clean_data(self.ccdb, test_path)
test_utils.prepare_data(self.ccdb, test_path, [120], [0], 123)
current_timestamp = int(time.time() * 1000)
logging.debug(f"{current_timestamp} - {datetime.today()}")

objects_versions = self.ccdb.getVersionsList(test_path)
created = len(objects_versions)

stats = one_per_hour.process(self.ccdb, test_path, 15, current_timestamp-90*60*1000,
current_timestamp-30*60*1000, self.extra)
self.assertEqual(stats["deleted"], 29)
self.assertEqual(stats["preserved"], 31)
logging.info(stats)
self.assertEqual(stats["deleted"], 59)
self.assertEqual(stats["preserved"], 61)

objects_versions = self.ccdb.getVersionsList(test_path)
self.assertEqual(len(objects_versions), 31)


def prepare_data(self, path, number_versions, minutes_between):
"""
Prepare a data set starting `since_minutes` in the past.
1 version per minute
"""

current_timestamp = int(time.time() * 1000)
data = {'part': 'part'}
run = 1234
counter = 0

for x in range(number_versions+1):
counter = counter + 1
from_ts = current_timestamp - minutes_between * x * 60 * 1000
to_ts = current_timestamp
metadata = {'RunNumber': str(run)}
version_info = ObjectVersion(path=path, validFrom=from_ts, validTo=to_ts, metadata=metadata)
self.ccdb.putVersion(version=version_info, data=data)

logging.debug(f"counter : {counter}")
self.assertEqual(len(objects_versions), 61)


if __name__ == '__main__':
Expand Down
64 changes: 12 additions & 52 deletions Framework/script/RepoCleaner/tests/test_1_per_run.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,13 @@
import logging
import time
import unittest
from datetime import timedelta, date, datetime
from importlib import import_module

from Ccdb import Ccdb, ObjectVersion
from rules import last_only
import os
import sys
import importlib


def import_path(path): # needed because o2-qc-repo-cleaner has no suffix
module_name = os.path.basename(path).replace('-', '_')
spec = importlib.util.spec_from_loader(
module_name,
importlib.machinery.SourceFileLoader(module_name, path)
)
module = importlib.util.module_from_spec(spec)
spec.loader.exec_module(module)
sys.modules[module_name] = module
return module


one_per_run = import_path("../qcrepocleaner/rules/1_per_run.py")
from qcrepocleaner.Ccdb import Ccdb
from tests import test_utils
from tests.test_utils import CCDB_TEST_URL

one_per_run = import_module(".1_per_run", "qcrepocleaner.rules") # file names should not start with a number...

class Test1PerRun(unittest.TestCase):
"""
Expand All @@ -38,15 +22,14 @@ class Test1PerRun(unittest.TestCase):
one_minute = 60000

def setUp(self):
self.ccdb = Ccdb('http://ccdb-test.cern.ch:8080')
self.ccdb = Ccdb(CCDB_TEST_URL)
self.path = "qc/TST/MO/repo/test"
self.run = 124321
self.extra = {}

def test_1_per_run(self):
"""
60 versions, 1 minute apart
6 runs
6 runs of 10 versions, versions 1 minute apart
grace period of 15 minutes
Preserved: 14 at the end (grace period), 6 for the runs, but 2 are in both sets --> 14+6-2=18 preserved
"""
Expand All @@ -56,7 +39,8 @@ def test_1_per_run(self):

# Prepare data
test_path = self.path + "/test_1_per_run"
self.prepare_data(test_path, 60)
test_utils.clean_data(self.ccdb, test_path)
test_utils.prepare_data(self.ccdb, test_path, [10, 10, 10, 10, 10, 10], [0, 0, 0, 0, 0, 0], 123)

objects_versions = self.ccdb.getVersionsList(test_path)
created = len(objects_versions)
Expand All @@ -71,8 +55,7 @@ def test_1_per_run(self):

def test_1_per_run_period(self):
"""
60 versions 1 minute apart
6 runs
6 runs of 10 versions each, versions 1 minute apart
no grace period
acceptance period is only the 38 minutes in the middle
preserved: 6 runs + 11 first and 11 last, with an overlap of 2 --> 26
Expand All @@ -83,7 +66,8 @@ def test_1_per_run_period(self):

# Prepare data
test_path = self.path + "/test_1_per_run_period"
self.prepare_data(test_path, 60)
test_utils.clean_data(self.ccdb, test_path)
test_utils.prepare_data(self.ccdb, test_path, [10, 10, 10, 10, 10, 10], [0, 0, 0, 0, 0, 0], 123)
current_timestamp = int(time.time() * 1000)

stats = one_per_run.process(self.ccdb, test_path, 0, current_timestamp - 49 * 60 * 1000,
Expand All @@ -94,29 +78,5 @@ def test_1_per_run_period(self):
objects_versions = self.ccdb.getVersionsList(test_path)
self.assertEqual(len(objects_versions), 26)

def prepare_data(self, path, since_minutes):
"""
Prepare a data set starting `since_minutes` in the past.
1 version per minute, 1 run every 10 versions
"""

current_timestamp = int(time.time() * 1000)
data = {'part': 'part'}
run = 1234
counter = 0

for x in range(since_minutes + 1):
counter = counter + 1
from_ts = current_timestamp - x * 60 * 1000
to_ts = current_timestamp
metadata = {'RunNumber': str(run)}
version_info = ObjectVersion(path=path, validFrom=from_ts, validTo=to_ts, metadata=metadata)
self.ccdb.putVersion(version=version_info, data=data)
if x % 10 == 0:
run = run + 1

logging.debug(f"counter : {counter}")


if __name__ == '__main__':
unittest.main()
41 changes: 22 additions & 19 deletions Framework/script/RepoCleaner/tests/test_Ccdb.py
Original file line number Diff line number Diff line change
@@ -1,45 +1,48 @@
import logging
import unittest
import requests
from typing import List

import responses

from Ccdb import Ccdb, ObjectVersion
from rules import production
from qcrepocleaner.Ccdb import Ccdb, ObjectVersion
from tests.test_utils import CCDB_TEST_URL


class TestCcdb(unittest.TestCase):

def setUp(self):
with open('../qcrepocleaner/objectsList.json') as f: # will close() when we leave this block
with open('objectsList.json') as f: # will close() when we leave this block
self.content_objectslist = f.read()
with open('../versionsList.json') as f: # will close() when we leave this block
with open('versionsList.json') as f: # will close() when we leave this block
self.content_versionslist = f.read()
self.ccdb = Ccdb('http://ccdb-test.cern.ch:8080')
self.ccdb = Ccdb(CCDB_TEST_URL)
logging.getLogger().setLevel(logging.DEBUG)

@responses.activate
def test_getObjectsList(self):
# Prepare mock response
responses.add(responses.GET, 'http://ccdb-test.cern.ch:8080/latest/.*',
responses.add(responses.GET, CCDB_TEST_URL + '/latest/.*',
self.content_objectslist, status=200)
# get list of objects
objectsList = self.ccdb.getObjectsList()
print(f"{objectsList}")
self.assertEqual(len(objectsList), 3)
self.assertEqual(objectsList[0], 'Test')
self.assertEqual(objectsList[1], 'ITSQcTask/ChipStaveCheck')
objects_list = self.ccdb.getObjectsList()
print(f"{objects_list}")
self.assertEqual(len(objects_list), 3)
self.assertEqual(objects_list[0], 'Test')
self.assertEqual(objects_list[1], 'ITSQcTask/ChipStaveCheck')

@responses.activate
def test_getVersionsList(self):
# Prepare mock response
object_path='asdfasdf/example'
responses.add(responses.GET, 'http://ccdb-test.cern.ch:8080/browse/'+object_path,
responses.add(responses.GET, CCDB_TEST_URL + '/browse/'+object_path,
self.content_versionslist, status=200)
# get versions for object
versionsList: List[ObjectVersion] = self.ccdb.getVersionsList(object_path)
print(f"{versionsList}")
self.assertEqual(len(versionsList), 2)
self.assertEqual(versionsList[0].path, object_path)
self.assertEqual(versionsList[1].path, object_path)
self.assertEqual(versionsList[1].metadata["custom"], "34")
versions_list: List[ObjectVersion] = self.ccdb.getVersionsList(object_path)
print(f"{versions_list}")
self.assertEqual(len(versions_list), 2)
self.assertEqual(versions_list[0].path, object_path)
self.assertEqual(versions_list[1].path, object_path)
self.assertEqual(versions_list[1].metadata["custom"], "34")

if __name__ == '__main__':
unittest.main()
Loading
Loading