From 2e36ead09d24566f5e103230654efe0bc515a6aa Mon Sep 17 00:00:00 2001 From: Ales Teska Date: Sun, 8 Oct 2023 13:31:18 +0200 Subject: [PATCH 01/25] Initial code import. --- asab/library/providers/git.py | 2 - asab/library/providers/libsreg.py | 143 ++++++++++++++++++++++++++++++ asab/library/service.py | 6 ++ 3 files changed, 149 insertions(+), 2 deletions(-) create mode 100644 asab/library/providers/libsreg.py diff --git a/asab/library/providers/git.py b/asab/library/providers/git.py index 415395874..67532171d 100644 --- a/asab/library/providers/git.py +++ b/asab/library/providers/git.py @@ -217,6 +217,4 @@ def _do_pull(self): return to_publish async def subscribe(self, path): - if not os.path.isdir(self.BasePath + path): - return self.SubscribedPaths.add(path) diff --git a/asab/library/providers/libsreg.py b/asab/library/providers/libsreg.py new file mode 100644 index 000000000..ef00a6028 --- /dev/null +++ b/asab/library/providers/libsreg.py @@ -0,0 +1,143 @@ +import os +import logging +import hashlib +import shutil +import random +import tarfile +import tempfile +import urllib.parse + +import aiohttp + +from .filesystem import FileSystemLibraryProvider + +# + +L = logging.getLogger(__name__) + +# + + +class LibsRegLibraryProvider(FileSystemLibraryProvider): + """ + Read-only provider to read from remote "library repository". + + It provides an option to specify more servers for more reliable content delivery. + + Example of the configuration: + + ```ini + [library] + providers= + ... + libsreg+https://libsreg.z6.web.core.windows.net,libsreg-secondary.z6.web.core.windows.net/lmio-common-library + ... + ``` + + """ + + def __init__(self, library, path, layer): + + url = urllib.parse.urlparse(path) + assert url.scheme.startswith('libsreg+') + + version = url.fragment + if version == '': + version = 'master' + + archname = url.path[1:] + + self.URLs = ["{scheme}://{netloc}/{archname}/{archname}-{version}.tar.xz".format( + scheme=url.scheme[8:], + netloc=netloc, + archname=archname, + version=version, + ) for netloc in url.netloc.split(',')] + assert len(self.URLs) > 0 + + tempdir = tempfile.gettempdir() + self.RepoPath = os.path.join( + tempdir, + "asab.library.libsreg", + hashlib.sha256(self.URLs[0].encode('utf-8')).hexdigest() + ) + + os.makedirs(os.path.join(self.RepoPath, "cur"), exist_ok=True) + + super().__init__(library, self.RepoPath, layer, set_ready=False) + + self.PullLock = False + + # TODO: Subscribption to changes in the library + self.SubscribedPaths = set() + + self.App.TaskService.schedule(self._periodic_pull(None)) + self.App.PubSub.subscribe("Application.tick/60!", self._periodic_pull) + + + async def _periodic_pull(self, event_name): + """ + Changes in remote repository are being pulled every minute. `PullLock` flag ensures that only if previous "pull" has finished, new one can start. + """ + if self.PullLock: + return + + self.PullLock = True + + try: + headers = {} + + # Check for existing E-Tag + etag_fname = os.path.join(self.RepoPath, "etag") + try: + with open(etag_fname, 'r') as f: + etag = f.read().strip() + headers['If-None-Match'] = etag + except FileNotFoundError: + pass + + url = random.choice(self.URLs) + + async with aiohttp.ClientSession() as session: + async with session.get(url, headers=headers) as response: + + if response.status == 200: # The request indicates a new version that we don't have yet + + etag_incoming = response.headers.get('ETag') + + fname = os.path.join(self.RepoPath, "new.tar.xz") + with open(fname, 'wb') as ftmp: + while True: + chunk = await response.content.read(16 * 1024) + if not chunk: + break + ftmp.write(chunk) + + with tarfile.open(fname, mode='r:xz') as tar: + tar.extractall(os.path.join(self.RepoPath, "new")) + + os.unlink(fname) + + # This will likely not work nicely with inotify detection of the change + # TODO: Replace this by some kind of rsync mechanism + if os.path.exists(os.path.join(self.RepoPath, "cur")): + shutil.rmtree(os.path.join(self.RepoPath, "cur")) + os.rename(os.path.join(self.RepoPath, "new"), os.path.join(self.RepoPath, "cur")) + + if etag_incoming is not None: + with open(etag_fname, 'w') as f: + f.write(etag_incoming) + + elif response.status == 304: + # The repository has not changed ... + pass + + else: + L.exception("Failed to download the library.", struct_data={"url": url, 'status': response.status}) + + finally: + self.PullLock = False + + + async def subscribe(self, path): + self.SubscribedPaths.add(path) diff --git a/asab/library/service.py b/asab/library/service.py index c2b214e95..323f035f7 100644 --- a/asab/library/service.py +++ b/asab/library/service.py @@ -91,8 +91,10 @@ def __init__(self, app: Application, service_name: str, paths: typing.Union[str, for layer, path in enumerate(paths): # Create library for each layer of paths self._create_library(path, layer) + app.PubSub.subscribe("Application.tick/60!", self._on_tick60) + async def finalize(self, app): while len(self.Libraries) > 0: lib = self.Libraries.pop(-1) @@ -119,6 +121,10 @@ def _create_library(self, path, layer): from .providers.git import GitLibraryProvider library_provider = GitLibraryProvider(self, path, layer) + elif path.startswith('libsreg+'): + from .providers.libsreg import LibsRegLibraryProvider + library_provider = LibsRegLibraryProvider(self, path, layer) + elif path == '' or path.startswith("#") or path.startswith(";"): # This is empty or commented line return From 5bf0ddee58ab29f322f3dfb22bbbbf6a5291a9b1 Mon Sep 17 00:00:00 2001 From: Ales Teska Date: Sun, 8 Oct 2023 13:33:37 +0200 Subject: [PATCH 02/25] Flake8 --- asab/library/providers/libsreg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asab/library/providers/libsreg.py b/asab/library/providers/libsreg.py index ef00a6028..1fdd4bff1 100644 --- a/asab/library/providers/libsreg.py +++ b/asab/library/providers/libsreg.py @@ -30,7 +30,7 @@ class LibsRegLibraryProvider(FileSystemLibraryProvider): [library] providers= ... - libsreg+https://libsreg.z6.web.core.windows.net,libsreg-secondary.z6.web.core.windows.net/lmio-common-library + libsreg+https://libsreg.z6.web.core.windows.net,libsreg-secondary.z6.web.core.windows.net/lmio-common-library ... ``` From 050fea8234a45e6537d765430930be8c361c983a Mon Sep 17 00:00:00 2001 From: Ales Teska Date: Sun, 8 Oct 2023 13:44:13 +0200 Subject: [PATCH 03/25] Add a rsync-like mechanism for working with the library changes. --- asab/library/dirsync.py | 59 +++++++++++++++++++++++++++++++ asab/library/providers/libsreg.py | 9 +++-- 2 files changed, 63 insertions(+), 5 deletions(-) create mode 100644 asab/library/dirsync.py diff --git a/asab/library/dirsync.py b/asab/library/dirsync.py new file mode 100644 index 000000000..858ba4dbe --- /dev/null +++ b/asab/library/dirsync.py @@ -0,0 +1,59 @@ +import os +import shutil +import filecmp + + +def synchronize_dirs(target, source): + ''' + Synchronizes 'source' directory into 'target' directory. + The 'source' directory remains unchanged. + + 1. Recursively walk through the "source" directory and compare files with the "target". + 2. If a file exists in "source" but not in "target", copy it. + 3. If a file exists in both but has been modified in "source", copy it to overwrite the one in "target". + 4. If a directory or file exists in "target" but not in "source", remove it. + ''' + + # Ensure target directory exists + if not os.path.exists(target): + os.makedirs(target) + + # Step 1: Recursively copy files from source to target + for dirpath, dirnames, filenames in os.walk(source): + # Compute relative path to the source base + relpath = os.path.relpath(dirpath, source) + target_dir = os.path.join(target, relpath) + + # Create directories in target if they don't exist + if not os.path.exists(target_dir): + os.makedirs(target_dir) + + # Check files and synchronize + for filename in filenames: + source_file = os.path.join(dirpath, filename) + target_file = os.path.join(target_dir, filename) + + # Copy if the file doesn't exist in target or if it's modified + if not os.path.exists(target_file) or not filecmp.cmp(source_file, target_file, shallow=False): + shutil.copy2(source_file, target_file) + + # Step 2: Recursively delete files/folders in target that don't exist in source + for dirpath, dirnames, filenames in os.walk(target, topdown=False): # topdown=False for depth-first + relpath = os.path.relpath(dirpath, target) + source_dir = os.path.join(source, relpath) + + # Check and remove files not in source + for filename in filenames: + target_file = os.path.join(dirpath, filename) + source_file = os.path.join(source_dir, filename) + + if not os.path.exists(source_file): + os.remove(target_file) + + # Check and remove directories not in source + for dirname in dirnames: + target_subdir = os.path.join(dirpath, dirname) + source_subdir = os.path.join(source_dir, dirname) + + if not os.path.exists(source_subdir): + shutil.rmtree(target_subdir) diff --git a/asab/library/providers/libsreg.py b/asab/library/providers/libsreg.py index 1fdd4bff1..628642db5 100644 --- a/asab/library/providers/libsreg.py +++ b/asab/library/providers/libsreg.py @@ -10,6 +10,7 @@ import aiohttp from .filesystem import FileSystemLibraryProvider +from ..dirsync import synchronize_dirs # @@ -118,11 +119,9 @@ async def _periodic_pull(self, event_name): os.unlink(fname) - # This will likely not work nicely with inotify detection of the change - # TODO: Replace this by some kind of rsync mechanism - if os.path.exists(os.path.join(self.RepoPath, "cur")): - shutil.rmtree(os.path.join(self.RepoPath, "cur")) - os.rename(os.path.join(self.RepoPath, "new"), os.path.join(self.RepoPath, "cur")) + # Move the new content in place + synchronize_dirs(os.path.join(self.RepoPath, "cur"), os.path.join(self.RepoPath, "new")) + shutil.rmtree(os.path.join(self.RepoPath, "new")) if etag_incoming is not None: with open(etag_fname, 'w') as f: From 7b0c7d3529ed21bb58ebdc7afa45b4c45bf145cc Mon Sep 17 00:00:00 2001 From: Ales Teska Date: Sun, 8 Oct 2023 13:48:14 +0200 Subject: [PATCH 04/25] Add a TODO --- asab/library/providers/libsreg.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/asab/library/providers/libsreg.py b/asab/library/providers/libsreg.py index 628642db5..2e580e65c 100644 --- a/asab/library/providers/libsreg.py +++ b/asab/library/providers/libsreg.py @@ -114,6 +114,9 @@ async def _periodic_pull(self, event_name): break ftmp.write(chunk) + # TODO: Following code is potentionally blocking and should be done in a proactor + # ⬇️⬇️⬇️ ---------- START OF THE BLOCKING CODE + with tarfile.open(fname, mode='r:xz') as tar: tar.extractall(os.path.join(self.RepoPath, "new")) @@ -127,6 +130,8 @@ async def _periodic_pull(self, event_name): with open(etag_fname, 'w') as f: f.write(etag_incoming) + # ⬆️⬆️⬆️ --------- END OF THE BLOCKING CODE + elif response.status == 304: # The repository has not changed ... pass From 267339b26fa0eed0cd883a066562afe7bc0f89b1 Mon Sep 17 00:00:00 2001 From: Ales Teska Date: Sun, 8 Oct 2023 13:58:11 +0200 Subject: [PATCH 05/25] Removal of the Python 3.7 from CI/CD due to aiohttp --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 3c785f660..ce015e26b 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -8,7 +8,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"] + python-version: ["3.8", "3.9", "3.10", "3.11"] steps: - uses: actions/checkout@v3 From 67af71553ea2ff0fa897f9d26e0af779a2a56cbe Mon Sep 17 00:00:00 2001 From: Ales Teska Date: Sun, 8 Oct 2023 13:59:21 +0200 Subject: [PATCH 06/25] Update ci.yaml --- .github/workflows/ci.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index ce015e26b..db7661b2b 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -8,7 +8,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v3 From a6bae90f211a29307a9fc7af07c320c0dd2272c4 Mon Sep 17 00:00:00 2001 From: Ales Teska Date: Sun, 8 Oct 2023 14:00:45 +0200 Subject: [PATCH 07/25] Bump of version --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index f9c19fbe2..7666fbae9 100644 --- a/setup.py +++ b/setup.py @@ -62,11 +62,11 @@ def run(self): platforms='any', classifiers=[ 'Development Status :: 5 - Production/Stable', - 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', ], keywords='asyncio', packages=find_packages(exclude=['module_sample']), From 8d87dce27e1dad194317eebb8daa6932b93d48ed Mon Sep 17 00:00:00 2001 From: Ales Teska Date: Sun, 8 Oct 2023 14:02:14 +0200 Subject: [PATCH 08/25] Finetune of Python versions. --- .github/workflows/ci.yaml | 2 +- setup.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index db7661b2b..ce015e26b 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -8,7 +8,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + python-version: ["3.8", "3.9", "3.10", "3.11"] steps: - uses: actions/checkout@v3 diff --git a/setup.py b/setup.py index 7666fbae9..164256471 100644 --- a/setup.py +++ b/setup.py @@ -66,7 +66,6 @@ def run(self): 'Programming Language :: Python :: 3.9', 'Programming Language :: Python :: 3.10', 'Programming Language :: Python :: 3.11', - 'Programming Language :: Python :: 3.12', ], keywords='asyncio', packages=find_packages(exclude=['module_sample']), From e2ab5d470d21f3c4026732257fcb398b7f8f11d5 Mon Sep 17 00:00:00 2001 From: mshivashankar Date: Mon, 16 Oct 2023 09:37:25 +0200 Subject: [PATCH 09/25] Create repor subpath --- asab/library/providers/libsreg.py | 22 ++++++++++++++++--- examples/library/.disabled.yaml | 19 ---------------- examples/library/Dashboards/Office 365.yaml | 0 .../library/Dashboards/dashboards-fs.json | 1 - examples/library/test.json | 3 --- 5 files changed, 19 insertions(+), 26 deletions(-) delete mode 100644 examples/library/.disabled.yaml delete mode 100644 examples/library/Dashboards/Office 365.yaml delete mode 100644 examples/library/Dashboards/dashboards-fs.json delete mode 100644 examples/library/test.json diff --git a/asab/library/providers/libsreg.py b/asab/library/providers/libsreg.py index 2e580e65c..635a6a3ed 100644 --- a/asab/library/providers/libsreg.py +++ b/asab/library/providers/libsreg.py @@ -9,6 +9,7 @@ import aiohttp +from ...config import Config from .filesystem import FileSystemLibraryProvider from ..dirsync import synchronize_dirs @@ -17,6 +18,14 @@ L = logging.getLogger(__name__) # +Config.add_defaults( + { + 'library': { + # You may specify multiple ElasticSearch nodes by e.g. http://es01:9200,es02:9200,es03:9200/ + 'repo_sub_path': '/' + } + } +) class LibsRegLibraryProvider(FileSystemLibraryProvider): @@ -57,19 +66,26 @@ def __init__(self, library, path, layer): assert len(self.URLs) > 0 tempdir = tempfile.gettempdir() + self.RepoSubPath = Config.get('library', 'repo_sub_path') self.RepoPath = os.path.join( tempdir, "asab.library.libsreg", hashlib.sha256(self.URLs[0].encode('utf-8')).hexdigest() ) - os.makedirs(os.path.join(self.RepoPath, "cur"), exist_ok=True) + if self.RepoSubPath == "/": + # No additional subdirectory, use the base directory as is. + self.FinalPath = self.RepoPath + else: + # Append the subdirectory to the base path and create it. + self.FinalPath = os.path.join(self.RepoPath, self.RepoSubPath) + os.makedirs(self.FinalPath, exist_ok=True) super().__init__(library, self.RepoPath, layer, set_ready=False) self.PullLock = False - # TODO: Subscribption to changes in the library + # TODO: Subscription to changes in the library self.SubscribedPaths = set() self.App.TaskService.schedule(self._periodic_pull(None)) @@ -123,7 +139,7 @@ async def _periodic_pull(self, event_name): os.unlink(fname) # Move the new content in place - synchronize_dirs(os.path.join(self.RepoPath, "cur"), os.path.join(self.RepoPath, "new")) + synchronize_dirs(self.FinalPath, os.path.join(self.RepoPath, "new")) shutil.rmtree(os.path.join(self.RepoPath, "new")) if etag_incoming is not None: diff --git a/examples/library/.disabled.yaml b/examples/library/.disabled.yaml deleted file mode 100644 index b66236d17..000000000 --- a/examples/library/.disabled.yaml +++ /dev/null @@ -1,19 +0,0 @@ -/Dashboards/Office 365.json: -- bs-discover -- dikobraz -- kibana-sample -- AliveChicken78 -/Dashboards/Refactored dasbhoard.json: -- bs-discover -- bs-speed -- kibana-sample -/import_test.json: -- test 2 -- test 1 -- default -/test.html: -- test 2 -- default -/test.json: '*' -/test_library_03: -- default diff --git a/examples/library/Dashboards/Office 365.yaml b/examples/library/Dashboards/Office 365.yaml deleted file mode 100644 index e69de29bb..000000000 diff --git a/examples/library/Dashboards/dashboards-fs.json b/examples/library/Dashboards/dashboards-fs.json deleted file mode 100644 index 0967ef424..000000000 --- a/examples/library/Dashboards/dashboards-fs.json +++ /dev/null @@ -1 +0,0 @@ -{} diff --git a/examples/library/test.json b/examples/library/test.json deleted file mode 100644 index f2aed1303..000000000 --- a/examples/library/test.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "foo": "bar" -} \ No newline at end of file From 09bb36bee4899b87fa2cd4f4a8920b44ac1f02f1 Mon Sep 17 00:00:00 2001 From: mshivashankar Date: Mon, 16 Oct 2023 09:42:00 +0200 Subject: [PATCH 10/25] add files --- examples/library/Dashboards/Office 365.yaml | 0 examples/library/Dashboards/dashboards-fs.json | 1 + examples/library/test.json | 3 +++ 3 files changed, 4 insertions(+) create mode 100644 examples/library/Dashboards/Office 365.yaml create mode 100644 examples/library/Dashboards/dashboards-fs.json create mode 100644 examples/library/test.json diff --git a/examples/library/Dashboards/Office 365.yaml b/examples/library/Dashboards/Office 365.yaml new file mode 100644 index 000000000..e69de29bb diff --git a/examples/library/Dashboards/dashboards-fs.json b/examples/library/Dashboards/dashboards-fs.json new file mode 100644 index 000000000..0967ef424 --- /dev/null +++ b/examples/library/Dashboards/dashboards-fs.json @@ -0,0 +1 @@ +{} diff --git a/examples/library/test.json b/examples/library/test.json new file mode 100644 index 000000000..f2aed1303 --- /dev/null +++ b/examples/library/test.json @@ -0,0 +1,3 @@ +{ + "foo": "bar" +} \ No newline at end of file From 38d8b8aeec4e750e759433451a70e762582e0619 Mon Sep 17 00:00:00 2001 From: mshivashankar Date: Mon, 16 Oct 2023 10:17:49 +0200 Subject: [PATCH 11/25] add missing file --- examples/library/disabled.yaml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 examples/library/disabled.yaml diff --git a/examples/library/disabled.yaml b/examples/library/disabled.yaml new file mode 100644 index 000000000..b66236d17 --- /dev/null +++ b/examples/library/disabled.yaml @@ -0,0 +1,19 @@ +/Dashboards/Office 365.json: +- bs-discover +- dikobraz +- kibana-sample +- AliveChicken78 +/Dashboards/Refactored dasbhoard.json: +- bs-discover +- bs-speed +- kibana-sample +/import_test.json: +- test 2 +- test 1 +- default +/test.html: +- test 2 +- default +/test.json: '*' +/test_library_03: +- default From 231f30a645089180a86e7d74050eb793ee84c5bd Mon Sep 17 00:00:00 2001 From: mshivashankar Date: Mon, 16 Oct 2023 10:29:05 +0200 Subject: [PATCH 12/25] rename --- examples/library/{disabled.yaml => .disabled.yaml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename examples/library/{disabled.yaml => .disabled.yaml} (100%) diff --git a/examples/library/disabled.yaml b/examples/library/.disabled.yaml similarity index 100% rename from examples/library/disabled.yaml rename to examples/library/.disabled.yaml From 4885397fb74f8ff4cba034a8a3edac31a74a5c22 Mon Sep 17 00:00:00 2001 From: mshivashankar Date: Mon, 16 Oct 2023 11:28:46 +0200 Subject: [PATCH 13/25] repo creation --- asab/library/providers/libsreg.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/asab/library/providers/libsreg.py b/asab/library/providers/libsreg.py index 635a6a3ed..c8049a7ff 100644 --- a/asab/library/providers/libsreg.py +++ b/asab/library/providers/libsreg.py @@ -79,7 +79,9 @@ def __init__(self, library, path, layer): else: # Append the subdirectory to the base path and create it. self.FinalPath = os.path.join(self.RepoPath, self.RepoSubPath) - os.makedirs(self.FinalPath, exist_ok=True) + + # Ensure the directory exists + os.makedirs(self.FinalPath, exist_ok=True) super().__init__(library, self.RepoPath, layer, set_ready=False) From c760f39e672e9ff77918f51b48348c4f0264c157 Mon Sep 17 00:00:00 2001 From: mshivashankar Date: Mon, 16 Oct 2023 12:16:00 +0200 Subject: [PATCH 14/25] check to delete --- asab/library/providers/libsreg.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/asab/library/providers/libsreg.py b/asab/library/providers/libsreg.py index c8049a7ff..7fe9d6468 100644 --- a/asab/library/providers/libsreg.py +++ b/asab/library/providers/libsreg.py @@ -123,8 +123,7 @@ async def _periodic_pull(self, event_name): if response.status == 200: # The request indicates a new version that we don't have yet etag_incoming = response.headers.get('ETag') - - fname = os.path.join(self.RepoPath, "new.tar.xz") + fname = os.path.join(self.FinalPath, "new.tar.xz") with open(fname, 'wb') as ftmp: while True: chunk = await response.content.read(16 * 1024) @@ -136,13 +135,15 @@ async def _periodic_pull(self, event_name): # ⬇️⬇️⬇️ ---------- START OF THE BLOCKING CODE with tarfile.open(fname, mode='r:xz') as tar: - tar.extractall(os.path.join(self.RepoPath, "new")) + tar.extractall(os.path.join(self.FinalPath, "new")) os.unlink(fname) # Move the new content in place synchronize_dirs(self.FinalPath, os.path.join(self.RepoPath, "new")) - shutil.rmtree(os.path.join(self.RepoPath, "new")) + path_to_remove = os.path.join(self.RepoPath, "new") + if os.path.exists(path_to_remove): + shutil.rmtree(path_to_remove) if etag_incoming is not None: with open(etag_fname, 'w') as f: From 3c7b15cee5c8c5754fd243532d2a3748cc63b2d9 Mon Sep 17 00:00:00 2001 From: mshivashankar Date: Mon, 16 Oct 2023 16:00:29 +0200 Subject: [PATCH 15/25] check to delete --- asab/library/providers/libsreg.py | 80 ++++++++++--------------------- 1 file changed, 25 insertions(+), 55 deletions(-) diff --git a/asab/library/providers/libsreg.py b/asab/library/providers/libsreg.py index 7fe9d6468..563515d79 100644 --- a/asab/library/providers/libsreg.py +++ b/asab/library/providers/libsreg.py @@ -21,7 +21,6 @@ Config.add_defaults( { 'library': { - # You may specify multiple ElasticSearch nodes by e.g. http://es01:9200,es02:9200,es03:9200/ 'repo_sub_path': '/' } } @@ -29,32 +28,13 @@ class LibsRegLibraryProvider(FileSystemLibraryProvider): - """ - Read-only provider to read from remote "library repository". - - It provides an option to specify more servers for more reliable content delivery. - - Example of the configuration: - - ```ini - [library] - providers= - ... - libsreg+https://libsreg.z6.web.core.windows.net,libsreg-secondary.z6.web.core.windows.net/lmio-common-library - ... - ``` - - """ def __init__(self, library, path, layer): url = urllib.parse.urlparse(path) assert url.scheme.startswith('libsreg+') - version = url.fragment - if version == '': - version = 'master' - + version = url.fragment if url.fragment else 'master' archname = url.path[1:] self.URLs = ["{scheme}://{netloc}/{archname}/{archname}-{version}.tar.xz".format( @@ -66,38 +46,31 @@ def __init__(self, library, path, layer): assert len(self.URLs) > 0 tempdir = tempfile.gettempdir() - self.RepoSubPath = Config.get('library', 'repo_sub_path') self.RepoPath = os.path.join( tempdir, "asab.library.libsreg", hashlib.sha256(self.URLs[0].encode('utf-8')).hexdigest() ) - if self.RepoSubPath == "/": - # No additional subdirectory, use the base directory as is. - self.FinalPath = self.RepoPath - else: - # Append the subdirectory to the base path and create it. - self.FinalPath = os.path.join(self.RepoPath, self.RepoSubPath) + # Ensure the base repository path exists + os.makedirs(self.RepoPath, exist_ok=True) + + # Determine the final path based on the configuration + repo_sub_path = Config.get('library', 'repo_sub_path') + self.FinalPath = os.path.join(self.RepoPath, repo_sub_path.strip("/")) if repo_sub_path != "/" else self.RepoPath - # Ensure the directory exists + # Ensure the final path exists os.makedirs(self.FinalPath, exist_ok=True) super().__init__(library, self.RepoPath, layer, set_ready=False) self.PullLock = False - - # TODO: Subscription to changes in the library self.SubscribedPaths = set() self.App.TaskService.schedule(self._periodic_pull(None)) self.App.PubSub.subscribe("Application.tick/60!", self._periodic_pull) - async def _periodic_pull(self, event_name): - """ - Changes in remote repository are being pulled every minute. `PullLock` flag ensures that only if previous "pull" has finished, new one can start. - """ if self.PullLock: return @@ -105,8 +78,6 @@ async def _periodic_pull(self, event_name): try: headers = {} - - # Check for existing E-Tag etag_fname = os.path.join(self.RepoPath, "etag") try: with open(etag_fname, 'r') as f: @@ -120,10 +91,14 @@ async def _periodic_pull(self, event_name): async with aiohttp.ClientSession() as session: async with session.get(url, headers=headers) as response: - if response.status == 200: # The request indicates a new version that we don't have yet - + if response.status == 200: etag_incoming = response.headers.get('ETag') - fname = os.path.join(self.FinalPath, "new.tar.xz") + + # Ensure the new directory exists before writing the file + new_dir = os.path.join(self.RepoPath, "new") + os.makedirs(new_dir, exist_ok=True) + + fname = os.path.join(new_dir, "new.tar.xz") with open(fname, 'wb') as ftmp: while True: chunk = await response.content.read(16 * 1024) @@ -131,29 +106,25 @@ async def _periodic_pull(self, event_name): break ftmp.write(chunk) - # TODO: Following code is potentionally blocking and should be done in a proactor - # ⬇️⬇️⬇️ ---------- START OF THE BLOCKING CODE - with tarfile.open(fname, mode='r:xz') as tar: - tar.extractall(os.path.join(self.FinalPath, "new")) + tar.extractall(new_dir) os.unlink(fname) - # Move the new content in place - synchronize_dirs(self.FinalPath, os.path.join(self.RepoPath, "new")) - path_to_remove = os.path.join(self.RepoPath, "new") - if os.path.exists(path_to_remove): - shutil.rmtree(path_to_remove) + # Synchronize the directories + synchronize_dirs(self.FinalPath, new_dir) + # Safely remove the new directory if it exists + if os.path.exists(new_dir): + shutil.rmtree(new_dir) + else: + L.warning("Directory not found for removal: {}".format(new_dir)) - if etag_incoming is not None: + if etag_incoming is not None: with open(etag_fname, 'w') as f: f.write(etag_incoming) - # ⬆️⬆️⬆️ --------- END OF THE BLOCKING CODE - elif response.status == 304: - # The repository has not changed ... - pass + pass # No changes else: L.exception("Failed to download the library.", struct_data={"url": url, 'status': response.status}) @@ -161,6 +132,5 @@ async def _periodic_pull(self, event_name): finally: self.PullLock = False - async def subscribe(self, path): self.SubscribedPaths.add(path) From d7d7b499064c943958bf36a23640ba9dde9207d2 Mon Sep 17 00:00:00 2001 From: mshivashankar Date: Mon, 16 Oct 2023 16:05:41 +0200 Subject: [PATCH 16/25] fix var assignment --- asab/library/providers/libsreg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asab/library/providers/libsreg.py b/asab/library/providers/libsreg.py index 563515d79..d5a10d5c2 100644 --- a/asab/library/providers/libsreg.py +++ b/asab/library/providers/libsreg.py @@ -87,7 +87,7 @@ async def _periodic_pull(self, event_name): pass url = random.choice(self.URLs) - + etag_incoming = None async with aiohttp.ClientSession() as session: async with session.get(url, headers=headers) as response: From 6283aa09f0c1a4a88499bd04c18cdbf1f265776b Mon Sep 17 00:00:00 2001 From: mshivashankar Date: Mon, 16 Oct 2023 16:46:11 +0200 Subject: [PATCH 17/25] Small change --- asab/library/providers/libsreg.py | 83 +++++++++++++++++-------------- 1 file changed, 47 insertions(+), 36 deletions(-) diff --git a/asab/library/providers/libsreg.py b/asab/library/providers/libsreg.py index d5a10d5c2..1c4de2c37 100644 --- a/asab/library/providers/libsreg.py +++ b/asab/library/providers/libsreg.py @@ -9,7 +9,6 @@ import aiohttp -from ...config import Config from .filesystem import FileSystemLibraryProvider from ..dirsync import synchronize_dirs @@ -18,23 +17,35 @@ L = logging.getLogger(__name__) # -Config.add_defaults( - { - 'library': { - 'repo_sub_path': '/' - } - } -) class LibsRegLibraryProvider(FileSystemLibraryProvider): + """ + Read-only provider to read from remote "library repository". + + It provides an option to specify more servers for more reliable content delivery. + + Example of the configuration: + + ```ini + [library] + providers= + ... + libsreg+https://libsreg.z6.web.core.windows.net,libsreg-secondary.z6.web.core.windows.net/lmio-common-library + ... + ``` + + """ def __init__(self, library, path, layer): url = urllib.parse.urlparse(path) assert url.scheme.startswith('libsreg+') - version = url.fragment if url.fragment else 'master' + version = url.fragment + if version == '': + version = 'master' + archname = url.path[1:] self.URLs = ["{scheme}://{netloc}/{archname}/{archname}-{version}.tar.xz".format( @@ -52,25 +63,23 @@ def __init__(self, library, path, layer): hashlib.sha256(self.URLs[0].encode('utf-8')).hexdigest() ) - # Ensure the base repository path exists - os.makedirs(self.RepoPath, exist_ok=True) - - # Determine the final path based on the configuration - repo_sub_path = Config.get('library', 'repo_sub_path') - self.FinalPath = os.path.join(self.RepoPath, repo_sub_path.strip("/")) if repo_sub_path != "/" else self.RepoPath - - # Ensure the final path exists - os.makedirs(self.FinalPath, exist_ok=True) + os.makedirs(os.path.join(self.RepoPath), exist_ok=True) super().__init__(library, self.RepoPath, layer, set_ready=False) self.PullLock = False + + # TODO: Subscribption to changes in the library self.SubscribedPaths = set() self.App.TaskService.schedule(self._periodic_pull(None)) self.App.PubSub.subscribe("Application.tick/60!", self._periodic_pull) + async def _periodic_pull(self, event_name): + """ + Changes in remote repository are being pulled every minute. `PullLock` flag ensures that only if previous "pull" has finished, new one can start. + """ if self.PullLock: return @@ -78,6 +87,8 @@ async def _periodic_pull(self, event_name): try: headers = {} + + # Check for existing E-Tag etag_fname = os.path.join(self.RepoPath, "etag") try: with open(etag_fname, 'r') as f: @@ -87,18 +98,15 @@ async def _periodic_pull(self, event_name): pass url = random.choice(self.URLs) - etag_incoming = None + async with aiohttp.ClientSession() as session: async with session.get(url, headers=headers) as response: - if response.status == 200: - etag_incoming = response.headers.get('ETag') + if response.status == 200: # The request indicates a new version that we don't have yet - # Ensure the new directory exists before writing the file - new_dir = os.path.join(self.RepoPath, "new") - os.makedirs(new_dir, exist_ok=True) + etag_incoming = response.headers.get('ETag') - fname = os.path.join(new_dir, "new.tar.xz") + fname = os.path.join(self.RepoPath, "new.tar.xz") with open(fname, 'wb') as ftmp: while True: chunk = await response.content.read(16 * 1024) @@ -106,25 +114,27 @@ async def _periodic_pull(self, event_name): break ftmp.write(chunk) + # TODO: Following code is potentionally blocking and should be done in a proactor + # ⬇️⬇️⬇️ ---------- START OF THE BLOCKING CODE + with tarfile.open(fname, mode='r:xz') as tar: - tar.extractall(new_dir) + tar.extractall(os.path.join(self.RepoPath, "new")) os.unlink(fname) - # Synchronize the directories - synchronize_dirs(self.FinalPath, new_dir) - # Safely remove the new directory if it exists - if os.path.exists(new_dir): - shutil.rmtree(new_dir) - else: - L.warning("Directory not found for removal: {}".format(new_dir)) + # Move the new content in place + synchronize_dirs(self.RepoPath, os.path.join(self.RepoPath, "new")) + shutil.rmtree(os.path.join(self.RepoPath, "new")) - if etag_incoming is not None: + if etag_incoming is not None: with open(etag_fname, 'w') as f: f.write(etag_incoming) + # ⬆️⬆️⬆️ --------- END OF THE BLOCKING CODE + elif response.status == 304: - pass # No changes + # The repository has not changed ... + pass else: L.exception("Failed to download the library.", struct_data={"url": url, 'status': response.status}) @@ -132,5 +142,6 @@ async def _periodic_pull(self, event_name): finally: self.PullLock = False + async def subscribe(self, path): - self.SubscribedPaths.add(path) + self.SubscribedPaths.add(path) \ No newline at end of file From 28de5c150c965116cca9125ccfc00647a0a42704 Mon Sep 17 00:00:00 2001 From: mshivashankar Date: Mon, 16 Oct 2023 16:52:03 +0200 Subject: [PATCH 18/25] Flake8 issues --- asab/library/providers/libsreg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asab/library/providers/libsreg.py b/asab/library/providers/libsreg.py index 1c4de2c37..10825e5cd 100644 --- a/asab/library/providers/libsreg.py +++ b/asab/library/providers/libsreg.py @@ -144,4 +144,4 @@ async def _periodic_pull(self, event_name): async def subscribe(self, path): - self.SubscribedPaths.add(path) \ No newline at end of file + self.SubscribedPaths.add(path) From f1c6cfcc32d2a43a43c3fde01047f8c1830e20ab Mon Sep 17 00:00:00 2001 From: mshivashankar Date: Mon, 16 Oct 2023 17:32:44 +0200 Subject: [PATCH 19/25] small change --- asab/library/providers/libsreg.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/asab/library/providers/libsreg.py b/asab/library/providers/libsreg.py index 10825e5cd..3a9e89bc6 100644 --- a/asab/library/providers/libsreg.py +++ b/asab/library/providers/libsreg.py @@ -117,14 +117,17 @@ async def _periodic_pull(self, event_name): # TODO: Following code is potentionally blocking and should be done in a proactor # ⬇️⬇️⬇️ ---------- START OF THE BLOCKING CODE + breakpoint() with tarfile.open(fname, mode='r:xz') as tar: tar.extractall(os.path.join(self.RepoPath, "new")) os.unlink(fname) # Move the new content in place - synchronize_dirs(self.RepoPath, os.path.join(self.RepoPath, "new")) - shutil.rmtree(os.path.join(self.RepoPath, "new")) + synchronize_dirs(os.path.join(self.RepoPath), os.path.join(self.RepoPath, "new")) + + if os.path.exists(os.path.join(self.RepoPath, "new")): + shutil.rmtree(os.path.join(self.RepoPath, "new")) if etag_incoming is not None: with open(etag_fname, 'w') as f: @@ -144,4 +147,4 @@ async def _periodic_pull(self, event_name): async def subscribe(self, path): - self.SubscribedPaths.add(path) + self.SubscribedPaths.add(path) \ No newline at end of file From 65d54023817e09fc2b366bba434d9a9d642a2020 Mon Sep 17 00:00:00 2001 From: mshivashankar Date: Mon, 16 Oct 2023 19:56:34 +0200 Subject: [PATCH 20/25] working code --- asab/library/dirsync.py | 22 ---------------------- asab/library/providers/libsreg.py | 4 ++-- 2 files changed, 2 insertions(+), 24 deletions(-) diff --git a/asab/library/dirsync.py b/asab/library/dirsync.py index 858ba4dbe..5a5d3470c 100644 --- a/asab/library/dirsync.py +++ b/asab/library/dirsync.py @@ -32,28 +32,6 @@ def synchronize_dirs(target, source): for filename in filenames: source_file = os.path.join(dirpath, filename) target_file = os.path.join(target_dir, filename) - # Copy if the file doesn't exist in target or if it's modified if not os.path.exists(target_file) or not filecmp.cmp(source_file, target_file, shallow=False): shutil.copy2(source_file, target_file) - - # Step 2: Recursively delete files/folders in target that don't exist in source - for dirpath, dirnames, filenames in os.walk(target, topdown=False): # topdown=False for depth-first - relpath = os.path.relpath(dirpath, target) - source_dir = os.path.join(source, relpath) - - # Check and remove files not in source - for filename in filenames: - target_file = os.path.join(dirpath, filename) - source_file = os.path.join(source_dir, filename) - - if not os.path.exists(source_file): - os.remove(target_file) - - # Check and remove directories not in source - for dirname in dirnames: - target_subdir = os.path.join(dirpath, dirname) - source_subdir = os.path.join(source_dir, dirname) - - if not os.path.exists(source_subdir): - shutil.rmtree(target_subdir) diff --git a/asab/library/providers/libsreg.py b/asab/library/providers/libsreg.py index 3a9e89bc6..80d16d126 100644 --- a/asab/library/providers/libsreg.py +++ b/asab/library/providers/libsreg.py @@ -117,14 +117,14 @@ async def _periodic_pull(self, event_name): # TODO: Following code is potentionally blocking and should be done in a proactor # ⬇️⬇️⬇️ ---------- START OF THE BLOCKING CODE - breakpoint() + with tarfile.open(fname, mode='r:xz') as tar: tar.extractall(os.path.join(self.RepoPath, "new")) os.unlink(fname) # Move the new content in place - synchronize_dirs(os.path.join(self.RepoPath), os.path.join(self.RepoPath, "new")) + synchronize_dirs(self.RepoPath, os.path.join(self.RepoPath, "new")) if os.path.exists(os.path.join(self.RepoPath, "new")): shutil.rmtree(os.path.join(self.RepoPath, "new")) From 7d5f646c525016e2b24f8f33f3838d0b1e145404 Mon Sep 17 00:00:00 2001 From: mshivashankar Date: Tue, 17 Oct 2023 05:49:46 +0200 Subject: [PATCH 21/25] sync update --- asab/library/dirsync.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/asab/library/dirsync.py b/asab/library/dirsync.py index 5a5d3470c..d90cffcdf 100644 --- a/asab/library/dirsync.py +++ b/asab/library/dirsync.py @@ -35,3 +35,34 @@ def synchronize_dirs(target, source): # Copy if the file doesn't exist in target or if it's modified if not os.path.exists(target_file) or not filecmp.cmp(source_file, target_file, shallow=False): shutil.copy2(source_file, target_file) + + # Step 2: Recursively delete files/folders in target that don't exist in source + for dirpath, dirnames, filenames in os.walk(target, topdown=False): # topdown=False for depth-first + relpath = os.path.relpath(dirpath, target) + source_dir = os.path.join(source, relpath) + + # Skip the 'new' directory in the target + if os.path.normpath(dirpath) == os.path.normpath(os.path.join(target, 'new')): + continue + + # Check and remove files not in source + for filename in filenames: + target_file = os.path.join(dirpath, filename) + source_file = os.path.join(source_dir, filename) + + # Do not remove if the file is in the 'new' directory + if 'new' not in os.path.relpath(target_file, target).split(os.path.sep): + if not os.path.exists(source_file): + os.remove(target_file) + + # Check and remove directories not in source + for dirname in dirnames: + target_subdir = os.path.join(dirpath, dirname) + source_subdir = os.path.join(source_dir, dirname) + + # Do not remove the 'new' directory or its subdirectories + if 'new' in os.path.relpath(target_subdir, target).split(os.path.sep): + continue + + if not os.path.exists(source_subdir): + shutil.rmtree(target_subdir) From 5fee6ce4c0b19f767f4008452f5d4936f3f01ded Mon Sep 17 00:00:00 2001 From: mshivashankar Date: Tue, 17 Oct 2023 08:11:37 +0200 Subject: [PATCH 22/25] Working --- asab/library/dirsync.py | 17 +++-------- asab/library/providers/libsreg.py | 50 +++++++++++++------------------ 2 files changed, 24 insertions(+), 43 deletions(-) diff --git a/asab/library/dirsync.py b/asab/library/dirsync.py index d90cffcdf..f12dfae0b 100644 --- a/asab/library/dirsync.py +++ b/asab/library/dirsync.py @@ -32,6 +32,7 @@ def synchronize_dirs(target, source): for filename in filenames: source_file = os.path.join(dirpath, filename) target_file = os.path.join(target_dir, filename) + # Copy if the file doesn't exist in target or if it's modified if not os.path.exists(target_file) or not filecmp.cmp(source_file, target_file, shallow=False): shutil.copy2(source_file, target_file) @@ -41,28 +42,18 @@ def synchronize_dirs(target, source): relpath = os.path.relpath(dirpath, target) source_dir = os.path.join(source, relpath) - # Skip the 'new' directory in the target - if os.path.normpath(dirpath) == os.path.normpath(os.path.join(target, 'new')): - continue - # Check and remove files not in source for filename in filenames: target_file = os.path.join(dirpath, filename) source_file = os.path.join(source_dir, filename) - # Do not remove if the file is in the 'new' directory - if 'new' not in os.path.relpath(target_file, target).split(os.path.sep): - if not os.path.exists(source_file): - os.remove(target_file) + if not os.path.exists(source_file): + os.remove(target_file) # Check and remove directories not in source for dirname in dirnames: target_subdir = os.path.join(dirpath, dirname) source_subdir = os.path.join(source_dir, dirname) - # Do not remove the 'new' directory or its subdirectories - if 'new' in os.path.relpath(target_subdir, target).split(os.path.sep): - continue - if not os.path.exists(source_subdir): - shutil.rmtree(target_subdir) + shutil.rmtree(target_subdir) \ No newline at end of file diff --git a/asab/library/providers/libsreg.py b/asab/library/providers/libsreg.py index 80d16d126..404828254 100644 --- a/asab/library/providers/libsreg.py +++ b/asab/library/providers/libsreg.py @@ -75,7 +75,6 @@ def __init__(self, library, path, layer): self.App.TaskService.schedule(self._periodic_pull(None)) self.App.PubSub.subscribe("Application.tick/60!", self._periodic_pull) - async def _periodic_pull(self, event_name): """ Changes in remote repository are being pulled every minute. `PullLock` flag ensures that only if previous "pull" has finished, new one can start. @@ -106,34 +105,25 @@ async def _periodic_pull(self, event_name): etag_incoming = response.headers.get('ETag') - fname = os.path.join(self.RepoPath, "new.tar.xz") - with open(fname, 'wb') as ftmp: - while True: - chunk = await response.content.read(16 * 1024) - if not chunk: - break - ftmp.write(chunk) - - # TODO: Following code is potentionally blocking and should be done in a proactor - # ⬇️⬇️⬇️ ---------- START OF THE BLOCKING CODE - - - with tarfile.open(fname, mode='r:xz') as tar: - tar.extractall(os.path.join(self.RepoPath, "new")) - - os.unlink(fname) - - # Move the new content in place - synchronize_dirs(self.RepoPath, os.path.join(self.RepoPath, "new")) - - if os.path.exists(os.path.join(self.RepoPath, "new")): - shutil.rmtree(os.path.join(self.RepoPath, "new")) - - if etag_incoming is not None: - with open(etag_fname, 'w') as f: - f.write(etag_incoming) - - # ⬆️⬆️⬆️ --------- END OF THE BLOCKING CODE + # Create a separate temporary directory for extraction + with tempfile.TemporaryDirectory() as temp_extract_dir: + fname = os.path.join(temp_extract_dir, "new.tar.xz") + with open(fname, 'wb') as ftmp: + while True: + chunk = await response.content.read(16 * 1024) + if not chunk: + break + ftmp.write(chunk) + + # Extract the contents to the temporary directory + with tarfile.open(fname, mode='r:xz') as tar: + tar.extractall(temp_extract_dir) + + # Synchronize the directories + synchronize_dirs(self.RepoPath, temp_extract_dir) + if etag_incoming is not None: + with open(etag_fname, 'w') as f: + f.write(etag_incoming) elif response.status == 304: # The repository has not changed ... @@ -147,4 +137,4 @@ async def _periodic_pull(self, event_name): async def subscribe(self, path): - self.SubscribedPaths.add(path) \ No newline at end of file + self.SubscribedPaths.add(path) From 778ea027fc2293ad120c5b56f48172e9d33a957e Mon Sep 17 00:00:00 2001 From: mshivashankar Date: Tue, 17 Oct 2023 13:13:11 +0200 Subject: [PATCH 23/25] Flake8 --- asab/library/dirsync.py | 2 +- asab/library/providers/libsreg.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/asab/library/dirsync.py b/asab/library/dirsync.py index f12dfae0b..858ba4dbe 100644 --- a/asab/library/dirsync.py +++ b/asab/library/dirsync.py @@ -56,4 +56,4 @@ def synchronize_dirs(target, source): source_subdir = os.path.join(source_dir, dirname) if not os.path.exists(source_subdir): - shutil.rmtree(target_subdir) \ No newline at end of file + shutil.rmtree(target_subdir) diff --git a/asab/library/providers/libsreg.py b/asab/library/providers/libsreg.py index 404828254..a4de95c4a 100644 --- a/asab/library/providers/libsreg.py +++ b/asab/library/providers/libsreg.py @@ -1,7 +1,6 @@ import os import logging import hashlib -import shutil import random import tarfile import tempfile From 9bebc08916e81692af06132241bd2e7b6e172ef6 Mon Sep 17 00:00:00 2001 From: Ales Teska Date: Wed, 18 Oct 2023 14:52:41 +0200 Subject: [PATCH 24/25] Reviewing --- asab/library/providers/libsreg.py | 51 +++++++++++++++++++------------ 1 file changed, 31 insertions(+), 20 deletions(-) diff --git a/asab/library/providers/libsreg.py b/asab/library/providers/libsreg.py index a4de95c4a..691498bc6 100644 --- a/asab/library/providers/libsreg.py +++ b/asab/library/providers/libsreg.py @@ -56,9 +56,13 @@ def __init__(self, library, path, layer): assert len(self.URLs) > 0 tempdir = tempfile.gettempdir() - self.RepoPath = os.path.join( + self.RootPath = os.path.join( tempdir, "asab.library.libsreg", + ) + + self.RepoPath = os.path.join( + self.RootPath, hashlib.sha256(self.URLs[0].encode('utf-8')).hexdigest() ) @@ -104,25 +108,32 @@ async def _periodic_pull(self, event_name): etag_incoming = response.headers.get('ETag') - # Create a separate temporary directory for extraction - with tempfile.TemporaryDirectory() as temp_extract_dir: - fname = os.path.join(temp_extract_dir, "new.tar.xz") - with open(fname, 'wb') as ftmp: - while True: - chunk = await response.content.read(16 * 1024) - if not chunk: - break - ftmp.write(chunk) - - # Extract the contents to the temporary directory - with tarfile.open(fname, mode='r:xz') as tar: - tar.extractall(temp_extract_dir) - - # Synchronize the directories - synchronize_dirs(self.RepoPath, temp_extract_dir) - if etag_incoming is not None: - with open(etag_fname, 'w') as f: - f.write(etag_incoming) + # Download new version + newtarfname = os.path.join(self.RootPath, "new.tar.xz") + with open(fname, 'wb') as ftmp: + while True: + chunk = await response.content.read(16 * 1024) + if not chunk: + break + ftmp.write(chunk) + + # Extract the contents to the temporary directory + temp_extract_dir = os.path.join( + self.RootPath, + "new" + ) + # TODO: Remove temp_extract_dir if exists (from last, failed run) + with tarfile.open(newtarfname, mode='r:xz') as tar: + tar.extractall(temp_extract_dir) + + # Synchronize the directories + synchronize_dirs(self.RepoPath, temp_extract_dir) + if etag_incoming is not None: + with open(etag_fname, 'w') as f: + f.write(etag_incoming) + + # TODO: Remove temp_extract_dir + # TODO: Remove newtarfname elif response.status == 304: # The repository has not changed ... From 8fb0ec39e0956ceba7b5897155d0c9c3c872537b Mon Sep 17 00:00:00 2001 From: mshivashankar Date: Tue, 24 Oct 2023 07:29:40 +0200 Subject: [PATCH 25/25] Comple TODOs --- asab/library/providers/libsreg.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/asab/library/providers/libsreg.py b/asab/library/providers/libsreg.py index 691498bc6..9578630f9 100644 --- a/asab/library/providers/libsreg.py +++ b/asab/library/providers/libsreg.py @@ -5,6 +5,7 @@ import tarfile import tempfile import urllib.parse +import shutil import aiohttp @@ -110,7 +111,7 @@ async def _periodic_pull(self, event_name): # Download new version newtarfname = os.path.join(self.RootPath, "new.tar.xz") - with open(fname, 'wb') as ftmp: + with open(newtarfname, 'wb') as ftmp: while True: chunk = await response.content.read(16 * 1024) if not chunk: @@ -122,6 +123,11 @@ async def _periodic_pull(self, event_name): self.RootPath, "new" ) + + # Remove temp_extract_dir if it exists (from the last, failed run) + if os.path.exists(temp_extract_dir): + shutil.rmtree(temp_extract_dir) + # TODO: Remove temp_extract_dir if exists (from last, failed run) with tarfile.open(newtarfname, mode='r:xz') as tar: tar.extractall(temp_extract_dir) @@ -132,8 +138,13 @@ async def _periodic_pull(self, event_name): with open(etag_fname, 'w') as f: f.write(etag_incoming) - # TODO: Remove temp_extract_dir - # TODO: Remove newtarfname + # Remove temp_extract_dir + if os.path.exists(temp_extract_dir): + shutil.rmtree(temp_extract_dir) + + # Remove newtarfname + if os.path.exists(newtarfname): + os.remove(newtarfname) elif response.status == 304: # The repository has not changed ...