From 4c9f04ce8c96d5ba1a5e8a4e7ca0f72234577276 Mon Sep 17 00:00:00 2001 From: eLBati Date: Sat, 31 Jul 2021 08:21:49 +0200 Subject: [PATCH 1/6] ADD attachment_db_by_checksum --- attachment_db_by_checksum/__init__.py | 1 + attachment_db_by_checksum/__manifest__.py | 22 ++++++ attachment_db_by_checksum/models/__init__.py | 2 + .../models/ir_attachment.py | 72 +++++++++++++++++++ .../models/ir_attachment_content.py | 16 +++++ .../readme/CONFIGURE.rst | 3 + .../readme/CONTRIBUTORS.rst | 3 + .../readme/DESCRIPTION.rst | 3 + .../security/ir.model.access.csv | 4 ++ 9 files changed, 126 insertions(+) create mode 100644 attachment_db_by_checksum/__init__.py create mode 100644 attachment_db_by_checksum/__manifest__.py create mode 100644 attachment_db_by_checksum/models/__init__.py create mode 100644 attachment_db_by_checksum/models/ir_attachment.py create mode 100644 attachment_db_by_checksum/models/ir_attachment_content.py create mode 100644 attachment_db_by_checksum/readme/CONFIGURE.rst create mode 100644 attachment_db_by_checksum/readme/CONTRIBUTORS.rst create mode 100644 attachment_db_by_checksum/readme/DESCRIPTION.rst create mode 100644 attachment_db_by_checksum/security/ir.model.access.csv diff --git a/attachment_db_by_checksum/__init__.py b/attachment_db_by_checksum/__init__.py new file mode 100644 index 0000000000..0650744f6b --- /dev/null +++ b/attachment_db_by_checksum/__init__.py @@ -0,0 +1 @@ +from . import models diff --git a/attachment_db_by_checksum/__manifest__.py b/attachment_db_by_checksum/__manifest__.py new file mode 100644 index 0000000000..1fc6b685ec --- /dev/null +++ b/attachment_db_by_checksum/__manifest__.py @@ -0,0 +1,22 @@ +# Copyright 2021 Lorenzo Battistini @ TAKOBI +# License LGPL-3.0 or later (https://www.gnu.org/licenses/lgpl). +{ + "name": "DB attachments saved by checksum", + "summary": "Allow to identify database attachments through their hash, " + "avoiding duplicates", + "version": "12.0.1.0.0", + "development_status": "Beta", + "category": "Storage", + "website": "https://github.com/OCA/storage", + "author": "TAKOBI, Odoo Community Association (OCA)", + "maintainers": ["eLBati"], + "license": "LGPL-3", + "application": False, + "installable": True, + "depends": [ + "base", + ], + "data": [ + "security/ir.model.access.csv", + ], +} diff --git a/attachment_db_by_checksum/models/__init__.py b/attachment_db_by_checksum/models/__init__.py new file mode 100644 index 0000000000..35f69215e9 --- /dev/null +++ b/attachment_db_by_checksum/models/__init__.py @@ -0,0 +1,2 @@ +from . import ir_attachment_content +from . import ir_attachment diff --git a/attachment_db_by_checksum/models/ir_attachment.py b/attachment_db_by_checksum/models/ir_attachment.py new file mode 100644 index 0000000000..3e9e9ed832 --- /dev/null +++ b/attachment_db_by_checksum/models/ir_attachment.py @@ -0,0 +1,72 @@ +import logging +from odoo import models, api, _ +from odoo.exceptions import AccessError + +_logger = logging.getLogger(__name__) + + +class Attachment(models.Model): + _inherit = "ir.attachment" + + @api.model + def _file_write(self, value, checksum): + location = self._storage() + if location != "hashed_db": + return super(Attachment, self)._file_write(value, checksum) + fname, _ = self._get_path(False, checksum) + att = self.env["ir.attachment.content"].search([ + ("checksum", "=", fname) + ], limit=1) + if not att: + self.env["ir.attachment.content"].create({ + "checksum": fname, + "db_datas": value, + }) + return fname + + @api.model + def _file_read(self, checksum, bin_size=False): + location = self._storage() + if location != "hashed_db": + return super(Attachment, self)._file_read(checksum, bin_size) + att = self.env["ir.attachment.content"].search([ + ("checksum", "=", checksum) + ]) + if not att: + _logger.debug("File %s not found" % checksum) + return super(Attachment, self)._file_read(checksum, bin_size) + return att.db_datas + + @api.model + def _file_delete(self, checksum): + location = self._storage() + if location == "hashed_db": + attachments = self.search([ + ("store_fname", "=", checksum) + ]) + if not attachments: + self.env["ir.attachment.content"].search([ + ("checksum", "=", checksum) + ]).unlink() + return super(Attachment, self)._file_delete(checksum) + + @api.model + def force_storage(self): + if not self.env.user._is_admin(): + raise AccessError(_("Only administrators can execute this action.")) + location = self._storage() + if location == "hashed_db": + # we don't know if previous storage was file system or DB: + # we run for every attachment + for attach in self.search([ + # trick to get every attachment, see _search method of ir.attachment + "|", ("res_field", '=', False), ("res_field", "!=", False) + ]): + attach.write({ + "datas": attach.datas, + # do not try to guess mimetype overwriting existing value + "mimetype": attach.mimetype, + }) + return True + else: + return super(Attachment, self).force_storage() diff --git a/attachment_db_by_checksum/models/ir_attachment_content.py b/attachment_db_by_checksum/models/ir_attachment_content.py new file mode 100644 index 0000000000..b0132f35fb --- /dev/null +++ b/attachment_db_by_checksum/models/ir_attachment_content.py @@ -0,0 +1,16 @@ +from odoo import models, fields + + +class AttachmentContent(models.Model): + _name = "ir.attachment.content" + _rec_name = "checksum" + _description = "Attachment content by hash" + + checksum = fields.Char( + "Checksum/SHA1", size=40, index=True, readonly=True, required=True) + db_datas = fields.Binary("Database Data") + + _sql_constraints = [( + 'checksum_uniq', 'unique(checksum)', + 'The checksum of the file must be unique !' + )] diff --git a/attachment_db_by_checksum/readme/CONFIGURE.rst b/attachment_db_by_checksum/readme/CONFIGURE.rst new file mode 100644 index 0000000000..3e6d7040c1 --- /dev/null +++ b/attachment_db_by_checksum/readme/CONFIGURE.rst @@ -0,0 +1,3 @@ +Set system parameter ``ir_attachment.location`` to ``hashed_db`` to activate saving by checksum. + +Run ``force_storage``, method of ``ir.attachment``, to move existing attachments. diff --git a/attachment_db_by_checksum/readme/CONTRIBUTORS.rst b/attachment_db_by_checksum/readme/CONTRIBUTORS.rst new file mode 100644 index 0000000000..2b476d7520 --- /dev/null +++ b/attachment_db_by_checksum/readme/CONTRIBUTORS.rst @@ -0,0 +1,3 @@ +* `TAKOBI `_: + + * Lorenzo Battistini diff --git a/attachment_db_by_checksum/readme/DESCRIPTION.rst b/attachment_db_by_checksum/readme/DESCRIPTION.rst new file mode 100644 index 0000000000..e386591e84 --- /dev/null +++ b/attachment_db_by_checksum/readme/DESCRIPTION.rst @@ -0,0 +1,3 @@ +Allow to identify database attachments through their hash, avoiding duplicates. + +This is typically useful when you want to save attachments to database but you want to save space avoiding to write the same content in several attachments (think of email attachments, for example, or any file uploaded more than once). diff --git a/attachment_db_by_checksum/security/ir.model.access.csv b/attachment_db_by_checksum/security/ir.model.access.csv new file mode 100644 index 0000000000..60f20a32e6 --- /dev/null +++ b/attachment_db_by_checksum/security/ir.model.access.csv @@ -0,0 +1,4 @@ +"id","name","model_id:id","group_id:id","perm_read","perm_write","perm_create","perm_unlink" +"access_ir_attachment_all","ir_attachment all","model_ir_attachment_content",,1,0,0,0 +"access_ir_attachment_group_user","ir_attachment group_user","model_ir_attachment_content","base.group_user",1,1,1,1 +"access_ir_attachment_portal","ir.attachment.portal","model_ir_attachment_content","base.group_portal",1,0,1,0 From 03f84d895c782a0809ab80016b668634b95143d0 Mon Sep 17 00:00:00 2001 From: eLBati Date: Sat, 11 Sep 2021 08:51:07 +0200 Subject: [PATCH 2/6] FIX attachment_db_by_checksum - while deleting attachment content, search all the attachments, otherwise we delete content still used - checksum is longer than 40; update its content --- attachment_db_by_checksum/__manifest__.py | 2 +- .../migrations/12.0.1.0.1/post-migration.py | 18 ++++++++++++++++++ .../models/ir_attachment.py | 6 +++++- .../models/ir_attachment_content.py | 2 +- 4 files changed, 25 insertions(+), 3 deletions(-) create mode 100644 attachment_db_by_checksum/migrations/12.0.1.0.1/post-migration.py diff --git a/attachment_db_by_checksum/__manifest__.py b/attachment_db_by_checksum/__manifest__.py index 1fc6b685ec..7954fdd328 100644 --- a/attachment_db_by_checksum/__manifest__.py +++ b/attachment_db_by_checksum/__manifest__.py @@ -4,7 +4,7 @@ "name": "DB attachments saved by checksum", "summary": "Allow to identify database attachments through their hash, " "avoiding duplicates", - "version": "12.0.1.0.0", + "version": "12.0.1.0.1", "development_status": "Beta", "category": "Storage", "website": "https://github.com/OCA/storage", diff --git a/attachment_db_by_checksum/migrations/12.0.1.0.1/post-migration.py b/attachment_db_by_checksum/migrations/12.0.1.0.1/post-migration.py new file mode 100644 index 0000000000..3a427aca9e --- /dev/null +++ b/attachment_db_by_checksum/migrations/12.0.1.0.1/post-migration.py @@ -0,0 +1,18 @@ +from odoo import api, SUPERUSER_ID + + +def migrate(cr, version): + if not version: + return + with api.Environment.manage(): + env = api.Environment(cr, SUPERUSER_ID, {}) + contents = env['ir.attachment.content'].search([]) + for content in contents: + attachments = env['ir.attachment'].search([ + ('store_fname', 'like', content.checksum), + "|", + ("res_field", '=', False), + ("res_field", "!=", False) + ]) + # già controllato che non ci sono state perdite di file con checksum molto simili + content.checksum = attachments[0].store_fname diff --git a/attachment_db_by_checksum/models/ir_attachment.py b/attachment_db_by_checksum/models/ir_attachment.py index 3e9e9ed832..11f3654ee6 100644 --- a/attachment_db_by_checksum/models/ir_attachment.py +++ b/attachment_db_by_checksum/models/ir_attachment.py @@ -41,8 +41,12 @@ def _file_read(self, checksum, bin_size=False): def _file_delete(self, checksum): location = self._storage() if location == "hashed_db": + # see force_storage attachments = self.search([ - ("store_fname", "=", checksum) + ("store_fname", "=", checksum), + "|", + ("res_field", '=', False), + ("res_field", "!=", False) ]) if not attachments: self.env["ir.attachment.content"].search([ diff --git a/attachment_db_by_checksum/models/ir_attachment_content.py b/attachment_db_by_checksum/models/ir_attachment_content.py index b0132f35fb..a48ea747e3 100644 --- a/attachment_db_by_checksum/models/ir_attachment_content.py +++ b/attachment_db_by_checksum/models/ir_attachment_content.py @@ -7,7 +7,7 @@ class AttachmentContent(models.Model): _description = "Attachment content by hash" checksum = fields.Char( - "Checksum/SHA1", size=40, index=True, readonly=True, required=True) + "Checksum/SHA1", index=True, readonly=True, required=True) db_datas = fields.Binary("Database Data") _sql_constraints = [( From 8674838c9181ecc21896dc35f6bd8ed3736e2635 Mon Sep 17 00:00:00 2001 From: eLBati Date: Thu, 2 Dec 2021 17:44:26 +0100 Subject: [PATCH 3/6] pre-commit --- attachment_db_by_checksum/__manifest__.py | 10 +-- .../migrations/12.0.1.0.1/post-migration.py | 19 +++--- .../models/ir_attachment.py | 64 ++++++++++--------- .../models/ir_attachment_content.py | 16 +++-- 4 files changed, 57 insertions(+), 52 deletions(-) diff --git a/attachment_db_by_checksum/__manifest__.py b/attachment_db_by_checksum/__manifest__.py index 7954fdd328..fdfae27e3c 100644 --- a/attachment_db_by_checksum/__manifest__.py +++ b/attachment_db_by_checksum/__manifest__.py @@ -3,7 +3,7 @@ { "name": "DB attachments saved by checksum", "summary": "Allow to identify database attachments through their hash, " - "avoiding duplicates", + "avoiding duplicates", "version": "12.0.1.0.1", "development_status": "Beta", "category": "Storage", @@ -13,10 +13,6 @@ "license": "LGPL-3", "application": False, "installable": True, - "depends": [ - "base", - ], - "data": [ - "security/ir.model.access.csv", - ], + "depends": ["base"], + "data": ["security/ir.model.access.csv"], } diff --git a/attachment_db_by_checksum/migrations/12.0.1.0.1/post-migration.py b/attachment_db_by_checksum/migrations/12.0.1.0.1/post-migration.py index 3a427aca9e..b30a07262c 100644 --- a/attachment_db_by_checksum/migrations/12.0.1.0.1/post-migration.py +++ b/attachment_db_by_checksum/migrations/12.0.1.0.1/post-migration.py @@ -1,4 +1,4 @@ -from odoo import api, SUPERUSER_ID +from odoo import SUPERUSER_ID, api def migrate(cr, version): @@ -6,13 +6,14 @@ def migrate(cr, version): return with api.Environment.manage(): env = api.Environment(cr, SUPERUSER_ID, {}) - contents = env['ir.attachment.content'].search([]) + contents = env["ir.attachment.content"].search([]) for content in contents: - attachments = env['ir.attachment'].search([ - ('store_fname', 'like', content.checksum), - "|", - ("res_field", '=', False), - ("res_field", "!=", False) - ]) - # già controllato che non ci sono state perdite di file con checksum molto simili + attachments = env["ir.attachment"].search( + [ + ("store_fname", "like", content.checksum), + "|", + ("res_field", "=", False), + ("res_field", "!=", False), + ] + ) content.checksum = attachments[0].store_fname diff --git a/attachment_db_by_checksum/models/ir_attachment.py b/attachment_db_by_checksum/models/ir_attachment.py index 11f3654ee6..5db0197750 100644 --- a/attachment_db_by_checksum/models/ir_attachment.py +++ b/attachment_db_by_checksum/models/ir_attachment.py @@ -1,5 +1,6 @@ import logging -from odoo import models, api, _ + +from odoo import _, api, models from odoo.exceptions import AccessError _logger = logging.getLogger(__name__) @@ -14,14 +15,13 @@ def _file_write(self, value, checksum): if location != "hashed_db": return super(Attachment, self)._file_write(value, checksum) fname, _ = self._get_path(False, checksum) - att = self.env["ir.attachment.content"].search([ - ("checksum", "=", fname) - ], limit=1) + att = self.env["ir.attachment.content"].search( + [("checksum", "=", fname)], limit=1 + ) if not att: - self.env["ir.attachment.content"].create({ - "checksum": fname, - "db_datas": value, - }) + self.env["ir.attachment.content"].create( + {"checksum": fname, "db_datas": value} + ) return fname @api.model @@ -29,9 +29,7 @@ def _file_read(self, checksum, bin_size=False): location = self._storage() if location != "hashed_db": return super(Attachment, self)._file_read(checksum, bin_size) - att = self.env["ir.attachment.content"].search([ - ("checksum", "=", checksum) - ]) + att = self.env["ir.attachment.content"].search([("checksum", "=", checksum)]) if not att: _logger.debug("File %s not found" % checksum) return super(Attachment, self)._file_read(checksum, bin_size) @@ -42,16 +40,18 @@ def _file_delete(self, checksum): location = self._storage() if location == "hashed_db": # see force_storage - attachments = self.search([ - ("store_fname", "=", checksum), - "|", - ("res_field", '=', False), - ("res_field", "!=", False) - ]) + attachments = self.search( + [ + ("store_fname", "=", checksum), + "|", + ("res_field", "=", False), + ("res_field", "!=", False), + ] + ) if not attachments: - self.env["ir.attachment.content"].search([ - ("checksum", "=", checksum) - ]).unlink() + self.env["ir.attachment.content"].search( + [("checksum", "=", checksum)] + ).unlink() return super(Attachment, self)._file_delete(checksum) @api.model @@ -62,15 +62,21 @@ def force_storage(self): if location == "hashed_db": # we don't know if previous storage was file system or DB: # we run for every attachment - for attach in self.search([ - # trick to get every attachment, see _search method of ir.attachment - "|", ("res_field", '=', False), ("res_field", "!=", False) - ]): - attach.write({ - "datas": attach.datas, - # do not try to guess mimetype overwriting existing value - "mimetype": attach.mimetype, - }) + for attach in self.search( + [ + # trick to get every attachment, see _search method of ir.attachment + "|", + ("res_field", "=", False), + ("res_field", "!=", False), + ] + ): + attach.write( + { + "datas": attach.datas, + # do not try to guess mimetype overwriting existing value + "mimetype": attach.mimetype, + } + ) return True else: return super(Attachment, self).force_storage() diff --git a/attachment_db_by_checksum/models/ir_attachment_content.py b/attachment_db_by_checksum/models/ir_attachment_content.py index a48ea747e3..ee28801b4e 100644 --- a/attachment_db_by_checksum/models/ir_attachment_content.py +++ b/attachment_db_by_checksum/models/ir_attachment_content.py @@ -1,4 +1,4 @@ -from odoo import models, fields +from odoo import fields, models class AttachmentContent(models.Model): @@ -6,11 +6,13 @@ class AttachmentContent(models.Model): _rec_name = "checksum" _description = "Attachment content by hash" - checksum = fields.Char( - "Checksum/SHA1", index=True, readonly=True, required=True) + checksum = fields.Char("Checksum/SHA1", index=True, readonly=True, required=True) db_datas = fields.Binary("Database Data") - _sql_constraints = [( - 'checksum_uniq', 'unique(checksum)', - 'The checksum of the file must be unique !' - )] + _sql_constraints = [ + ( + "checksum_uniq", + "unique(checksum)", + "The checksum of the file must be unique !", + ) + ] From 35947e609a13b33b24417bd44a0e35e2d265291b Mon Sep 17 00:00:00 2001 From: Simone Rubino Date: Tue, 14 Feb 2023 15:41:08 +0100 Subject: [PATCH 4/6] [IMP] attachment_db_by_checksum: black, isort, prettier --- .../odoo/addons/attachment_db_by_checksum | 1 + setup/attachment_db_by_checksum/setup.py | 6 ++++++ 2 files changed, 7 insertions(+) create mode 120000 setup/attachment_db_by_checksum/odoo/addons/attachment_db_by_checksum create mode 100644 setup/attachment_db_by_checksum/setup.py diff --git a/setup/attachment_db_by_checksum/odoo/addons/attachment_db_by_checksum b/setup/attachment_db_by_checksum/odoo/addons/attachment_db_by_checksum new file mode 120000 index 0000000000..7c28dce4f6 --- /dev/null +++ b/setup/attachment_db_by_checksum/odoo/addons/attachment_db_by_checksum @@ -0,0 +1 @@ +../../../../attachment_db_by_checksum \ No newline at end of file diff --git a/setup/attachment_db_by_checksum/setup.py b/setup/attachment_db_by_checksum/setup.py new file mode 100644 index 0000000000..28c57bb640 --- /dev/null +++ b/setup/attachment_db_by_checksum/setup.py @@ -0,0 +1,6 @@ +import setuptools + +setuptools.setup( + setup_requires=['setuptools-odoo'], + odoo_addon=True, +) From 6711c992eebd4197aa994016f744d7787ecaae8f Mon Sep 17 00:00:00 2001 From: Simone Rubino Date: Tue, 14 Feb 2023 13:24:00 +0100 Subject: [PATCH 5/6] [MIG] attachment_db_by_checksum: Migration to 14.0 --- attachment_db_by_checksum/README.rst | 92 ++++ attachment_db_by_checksum/__init__.py | 2 + attachment_db_by_checksum/__manifest__.py | 21 +- .../migrations/12.0.1.0.1/post-migration.py | 19 - attachment_db_by_checksum/models/__init__.py | 2 + .../models/ir_attachment.py | 159 ++++--- .../models/ir_attachment_content.py | 33 +- .../readme/CONTRIBUTORS.rst | 1 + .../security/ir.model.access.csv | 6 +- .../static/description/index.html | 431 ++++++++++++++++++ attachment_db_by_checksum/tests/__init__.py | 3 + .../tests/test_attachment_by_checksum.py | 85 ++++ 12 files changed, 765 insertions(+), 89 deletions(-) create mode 100644 attachment_db_by_checksum/README.rst delete mode 100644 attachment_db_by_checksum/migrations/12.0.1.0.1/post-migration.py create mode 100644 attachment_db_by_checksum/static/description/index.html create mode 100644 attachment_db_by_checksum/tests/__init__.py create mode 100644 attachment_db_by_checksum/tests/test_attachment_by_checksum.py diff --git a/attachment_db_by_checksum/README.rst b/attachment_db_by_checksum/README.rst new file mode 100644 index 0000000000..b10ee26645 --- /dev/null +++ b/attachment_db_by_checksum/README.rst @@ -0,0 +1,92 @@ +================================ +DB attachments saved by checksum +================================ + +.. !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + !! This file is generated by oca-gen-addon-readme !! + !! changes will be overwritten. !! + !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + +.. |badge1| image:: https://img.shields.io/badge/maturity-Beta-yellow.png + :target: https://odoo-community.org/page/development-status + :alt: Beta +.. |badge2| image:: https://img.shields.io/badge/licence-LGPL--3-blue.png + :target: http://www.gnu.org/licenses/lgpl-3.0-standalone.html + :alt: License: LGPL-3 +.. |badge3| image:: https://img.shields.io/badge/github-OCA%2Fstorage-lightgray.png?logo=github + :target: https://github.com/OCA/storage/tree/14.0/attachment_db_by_checksum + :alt: OCA/storage +.. |badge4| image:: https://img.shields.io/badge/weblate-Translate%20me-F47D42.png + :target: https://translation.odoo-community.org/projects/storage-14-0/storage-14-0-attachment_db_by_checksum + :alt: Translate me on Weblate +.. |badge5| image:: https://img.shields.io/badge/runbot-Try%20me-875A7B.png + :target: https://runbot.odoo-community.org/runbot/275/14.0 + :alt: Try me on Runbot + +|badge1| |badge2| |badge3| |badge4| |badge5| + +Allow to identify database attachments through their hash, avoiding duplicates. + +This is typically useful when you want to save attachments to database but you want to save space avoiding to write the same content in several attachments (think of email attachments, for example, or any file uploaded more than once). + +**Table of contents** + +.. contents:: + :local: + +Configuration +============= + +Set system parameter ``ir_attachment.location`` to ``hashed_db`` to activate saving by checksum. + +Run ``force_storage``, method of ``ir.attachment``, to move existing attachments. + +Bug Tracker +=========== + +Bugs are tracked on `GitHub Issues `_. +In case of trouble, please check there if your issue has already been reported. +If you spotted it first, help us smashing it by providing a detailed and welcomed +`feedback `_. + +Do not contact contributors directly about support or help with technical issues. + +Credits +======= + +Authors +~~~~~~~ + +* TAKOBI + +Contributors +~~~~~~~~~~~~ + +* `TAKOBI `_: + + * Lorenzo Battistini + +Maintainers +~~~~~~~~~~~ + +This module is maintained by the OCA. + +.. image:: https://odoo-community.org/logo.png + :alt: Odoo Community Association + :target: https://odoo-community.org + +OCA, or the Odoo Community Association, is a nonprofit organization whose +mission is to support the collaborative development of Odoo features and +promote its widespread use. + +.. |maintainer-eLBati| image:: https://github.com/eLBati.png?size=40px + :target: https://github.com/eLBati + :alt: eLBati + +Current `maintainer `__: + +|maintainer-eLBati| + +This module is part of the `OCA/storage `_ project on GitHub. + +You are welcome to contribute. To learn how please visit https://odoo-community.org/page/Contribute. diff --git a/attachment_db_by_checksum/__init__.py b/attachment_db_by_checksum/__init__.py index 0650744f6b..2bc1433e72 100644 --- a/attachment_db_by_checksum/__init__.py +++ b/attachment_db_by_checksum/__init__.py @@ -1 +1,3 @@ +# License LGPL-3.0 or later (https://www.gnu.org/licenses/lgpl). + from . import models diff --git a/attachment_db_by_checksum/__manifest__.py b/attachment_db_by_checksum/__manifest__.py index fdfae27e3c..10feb3473a 100644 --- a/attachment_db_by_checksum/__manifest__.py +++ b/attachment_db_by_checksum/__manifest__.py @@ -1,18 +1,21 @@ # Copyright 2021 Lorenzo Battistini @ TAKOBI +# Copyright 2023 Simone Rubino - TAKOBI # License LGPL-3.0 or later (https://www.gnu.org/licenses/lgpl). { "name": "DB attachments saved by checksum", - "summary": "Allow to identify database attachments through their hash, " - "avoiding duplicates", - "version": "12.0.1.0.1", - "development_status": "Beta", + "summary": "Allow to identify database attachments through their hash, avoiding duplicates", + "version": "14.0.1.0.0", "category": "Storage", "website": "https://github.com/OCA/storage", "author": "TAKOBI, Odoo Community Association (OCA)", - "maintainers": ["eLBati"], + "maintainers": [ + "eLBati", + ], "license": "LGPL-3", - "application": False, - "installable": True, - "depends": ["base"], - "data": ["security/ir.model.access.csv"], + "depends": [ + "base", + ], + "data": [ + "security/ir.model.access.csv", + ], } diff --git a/attachment_db_by_checksum/migrations/12.0.1.0.1/post-migration.py b/attachment_db_by_checksum/migrations/12.0.1.0.1/post-migration.py deleted file mode 100644 index b30a07262c..0000000000 --- a/attachment_db_by_checksum/migrations/12.0.1.0.1/post-migration.py +++ /dev/null @@ -1,19 +0,0 @@ -from odoo import SUPERUSER_ID, api - - -def migrate(cr, version): - if not version: - return - with api.Environment.manage(): - env = api.Environment(cr, SUPERUSER_ID, {}) - contents = env["ir.attachment.content"].search([]) - for content in contents: - attachments = env["ir.attachment"].search( - [ - ("store_fname", "like", content.checksum), - "|", - ("res_field", "=", False), - ("res_field", "!=", False), - ] - ) - content.checksum = attachments[0].store_fname diff --git a/attachment_db_by_checksum/models/__init__.py b/attachment_db_by_checksum/models/__init__.py index 35f69215e9..dc67916b59 100644 --- a/attachment_db_by_checksum/models/__init__.py +++ b/attachment_db_by_checksum/models/__init__.py @@ -1,2 +1,4 @@ +# License LGPL-3.0 or later (https://www.gnu.org/licenses/lgpl). + from . import ir_attachment_content from . import ir_attachment diff --git a/attachment_db_by_checksum/models/ir_attachment.py b/attachment_db_by_checksum/models/ir_attachment.py index 5db0197750..1bfdc9c791 100644 --- a/attachment_db_by_checksum/models/ir_attachment.py +++ b/attachment_db_by_checksum/models/ir_attachment.py @@ -1,82 +1,133 @@ +# Copyright 2023 Simone Rubino - TAKOBI +# License LGPL-3.0 or later (https://www.gnu.org/licenses/agpl). + import logging from odoo import _, api, models from odoo.exceptions import AccessError +from odoo.osv import expression _logger = logging.getLogger(__name__) +HASHED_STORAGE_PARAMETER = "hashed_db" + class Attachment(models.Model): _inherit = "ir.attachment" @api.model - def _file_write(self, value, checksum): - location = self._storage() - if location != "hashed_db": - return super(Attachment, self)._file_write(value, checksum) - fname, _ = self._get_path(False, checksum) - att = self.env["ir.attachment.content"].search( - [("checksum", "=", fname)], limit=1 - ) - if not att: + def _file_write_by_checksum(self, bin_value, checksum): + """Store attachment content in `Attachment content by hash`.""" + fname, full_path = self._get_path(bin_value, checksum) + attachment_content = self.env["ir.attachment.content"].search_by_checksum(fname) + if not attachment_content: self.env["ir.attachment.content"].create( - {"checksum": fname, "db_datas": value} + { + "checksum": fname, + "db_datas": bin_value, + } ) return fname @api.model - def _file_read(self, checksum, bin_size=False): + def _file_write(self, bin_value, checksum): location = self._storage() - if location != "hashed_db": - return super(Attachment, self)._file_read(checksum, bin_size) - att = self.env["ir.attachment.content"].search([("checksum", "=", checksum)]) - if not att: - _logger.debug("File %s not found" % checksum) - return super(Attachment, self)._file_read(checksum, bin_size) - return att.db_datas + if location == HASHED_STORAGE_PARAMETER: + return self._file_write_by_checksum(bin_value, checksum) + return super()._file_write(bin_value, checksum) @api.model - def _file_delete(self, checksum): + def _file_read_by_checksum(self, fname): + """Read attachment content from `Attachment content by hash`.""" + attachment_content = self.env["ir.attachment.content"].search_by_checksum(fname) + if attachment_content: + bin_value = attachment_content.db_datas + else: + # Fallback on standard behavior + _logger.debug("File %s not found" % fname) + bin_value = super()._file_read(fname) + return bin_value + + @api.model + def _file_read(self, fname): location = self._storage() - if location == "hashed_db": - # see force_storage - attachments = self.search( + if location == HASHED_STORAGE_PARAMETER: + return self._file_read_by_checksum(fname) + return super()._file_read(fname) + + @api.model + def _get_all_attachments_by_checksum_domain(self, fname=None): + """Get domain for finding all the attachments. + + If `checksum` is provided, + get domain for finding all the attachments having checksum `checksum`. + """ + # trick to get every attachment, see _search method of ir.attachment + domain = [ + ("id", "!=", 0), + ] + if fname is not None: + checksum_domain = [ + ("store_fname", "=", fname), + ] + domain = expression.AND( [ - ("store_fname", "=", checksum), - "|", - ("res_field", "=", False), - ("res_field", "!=", False), + domain, + checksum_domain, ] ) - if not attachments: - self.env["ir.attachment.content"].search( - [("checksum", "=", checksum)] - ).unlink() - return super(Attachment, self)._file_delete(checksum) + return domain @api.model - def force_storage(self): - if not self.env.user._is_admin(): + def _get_all_attachments_by_checksum(self, fname=None): + """Get all attachments. + + If `checksum` is provided, + get all the attachments having checksum `checksum`. + """ + domain = self._get_all_attachments_by_checksum_domain(fname) + attachments = self.search(domain) + return attachments + + @api.model + def _file_delete_by_checksum(self, fname): + """Delete attachment content in `Attachment content by hash`.""" + attachments = self._get_all_attachments_by_checksum(fname=fname) + if not attachments: + attachment_content = self.env["ir.attachment.content"].search_by_checksum( + fname + ) + attachment_content.unlink() + + @api.model + def _file_delete(self, fname): + location = self._storage() + if location == HASHED_STORAGE_PARAMETER: + self._file_delete_by_checksum(fname) + return super()._file_delete(fname) + + @api.model + def force_storage_by_checksum(self): + """Copy all the attachments to `Attachment content by hash`.""" + if not self.env.is_admin(): raise AccessError(_("Only administrators can execute this action.")) + + # we don't know if previous storage was file system or DB: + # we run for every attachment + all_attachments = self._get_all_attachments_by_checksum() + for attach in all_attachments: + attach.write( + { + "datas": attach.datas, + # do not try to guess mimetype overwriting existing value + "mimetype": attach.mimetype, + } + ) + return True + + @api.model + def force_storage(self): location = self._storage() - if location == "hashed_db": - # we don't know if previous storage was file system or DB: - # we run for every attachment - for attach in self.search( - [ - # trick to get every attachment, see _search method of ir.attachment - "|", - ("res_field", "=", False), - ("res_field", "!=", False), - ] - ): - attach.write( - { - "datas": attach.datas, - # do not try to guess mimetype overwriting existing value - "mimetype": attach.mimetype, - } - ) - return True - else: - return super(Attachment, self).force_storage() + if location == HASHED_STORAGE_PARAMETER: + return self.force_storage_by_checksum() + return super().force_storage() diff --git a/attachment_db_by_checksum/models/ir_attachment_content.py b/attachment_db_by_checksum/models/ir_attachment_content.py index ee28801b4e..2df5c3d809 100644 --- a/attachment_db_by_checksum/models/ir_attachment_content.py +++ b/attachment_db_by_checksum/models/ir_attachment_content.py @@ -6,13 +6,38 @@ class AttachmentContent(models.Model): _rec_name = "checksum" _description = "Attachment content by hash" - checksum = fields.Char("Checksum/SHA1", index=True, readonly=True, required=True) - db_datas = fields.Binary("Database Data") + checksum = fields.Char( + string="Checksum/SHA1", + help="Checksum in the shape 2a/2a...\n", + index=True, + readonly=True, + required=True, + ) + db_datas = fields.Binary( + string="Database Data", + attachment=False, + ) _sql_constraints = [ ( "checksum_uniq", "unique(checksum)", - "The checksum of the file must be unique !", - ) + "The checksum of the file must be unique!", + ), ] + + def search_by_checksum(self, fname): + """Get Attachment content, searching by `fname`. + + Note that `fname` is the relative path of the attachment + as it would be saved by the core, for example 2a/2a..., + this is the same value that we store + in field `ir.attachment.content.checksum`. + """ + attachment_content = self.env["ir.attachment.content"].search( + [ + ("checksum", "=", fname), + ], + limit=1, + ) + return attachment_content diff --git a/attachment_db_by_checksum/readme/CONTRIBUTORS.rst b/attachment_db_by_checksum/readme/CONTRIBUTORS.rst index 2b476d7520..c337ddd909 100644 --- a/attachment_db_by_checksum/readme/CONTRIBUTORS.rst +++ b/attachment_db_by_checksum/readme/CONTRIBUTORS.rst @@ -1,3 +1,4 @@ * `TAKOBI `_: * Lorenzo Battistini + * Simone Rubino diff --git a/attachment_db_by_checksum/security/ir.model.access.csv b/attachment_db_by_checksum/security/ir.model.access.csv index 60f20a32e6..bedf4fff58 100644 --- a/attachment_db_by_checksum/security/ir.model.access.csv +++ b/attachment_db_by_checksum/security/ir.model.access.csv @@ -1,4 +1,4 @@ "id","name","model_id:id","group_id:id","perm_read","perm_write","perm_create","perm_unlink" -"access_ir_attachment_all","ir_attachment all","model_ir_attachment_content",,1,0,0,0 -"access_ir_attachment_group_user","ir_attachment group_user","model_ir_attachment_content","base.group_user",1,1,1,1 -"access_ir_attachment_portal","ir.attachment.portal","model_ir_attachment_content","base.group_portal",1,0,1,0 +"access_ir_attachment_all","Everyone can read Attachment Contents","model_ir_attachment_content",,1,0,0,0 +"access_ir_attachment_group_user","Internal Users can manage Attachment Contents","model_ir_attachment_content","base.group_user",1,1,1,1 +"access_ir_attachment_portal","Portal Users can read and create Attachment Contents","model_ir_attachment_content","base.group_portal",1,0,1,0 diff --git a/attachment_db_by_checksum/static/description/index.html b/attachment_db_by_checksum/static/description/index.html new file mode 100644 index 0000000000..05cb3ac093 --- /dev/null +++ b/attachment_db_by_checksum/static/description/index.html @@ -0,0 +1,431 @@ + + + + + + +DB attachments saved by checksum + + + +
+

DB attachments saved by checksum

+ + +

Beta License: LGPL-3 OCA/storage Translate me on Weblate Try me on Runbot

+

Allow to identify database attachments through their hash, avoiding duplicates.

+

This is typically useful when you want to save attachments to database but you want to save space avoiding to write the same content in several attachments (think of email attachments, for example, or any file uploaded more than once).

+

Table of contents

+ +
+

Configuration

+

Set system parameter ir_attachment.location to hashed_db to activate saving by checksum.

+

Run force_storage, method of ir.attachment, to move existing attachments.

+
+
+

Bug Tracker

+

Bugs are tracked on GitHub Issues. +In case of trouble, please check there if your issue has already been reported. +If you spotted it first, help us smashing it by providing a detailed and welcomed +feedback.

+

Do not contact contributors directly about support or help with technical issues.

+
+
+

Credits

+
+

Authors

+
    +
  • TAKOBI
  • +
+
+
+

Contributors

+
    +
  • TAKOBI:
      +
    • Lorenzo Battistini
    • +
    +
  • +
+
+
+

Maintainers

+

This module is maintained by the OCA.

+Odoo Community Association +

OCA, or the Odoo Community Association, is a nonprofit organization whose +mission is to support the collaborative development of Odoo features and +promote its widespread use.

+

Current maintainer:

+

eLBati

+

This module is part of the OCA/storage project on GitHub.

+

You are welcome to contribute. To learn how please visit https://odoo-community.org/page/Contribute.

+
+
+
+ + diff --git a/attachment_db_by_checksum/tests/__init__.py b/attachment_db_by_checksum/tests/__init__.py new file mode 100644 index 0000000000..c6ba00b537 --- /dev/null +++ b/attachment_db_by_checksum/tests/__init__.py @@ -0,0 +1,3 @@ +# License LGPL-3.0 or later (https://www.gnu.org/licenses/lgpl). + +from . import test_attachment_by_checksum diff --git a/attachment_db_by_checksum/tests/test_attachment_by_checksum.py b/attachment_db_by_checksum/tests/test_attachment_by_checksum.py new file mode 100644 index 0000000000..b7b18b3334 --- /dev/null +++ b/attachment_db_by_checksum/tests/test_attachment_by_checksum.py @@ -0,0 +1,85 @@ +# Copyright 2023 Simone Rubino - TAKOBI +# License LGPL-3.0 or later (https://www.gnu.org/licenses/lgpl). + +import base64 + +from odoo.tests import SavepointCase + +from odoo.addons.attachment_db_by_checksum.models.ir_attachment import ( + HASHED_STORAGE_PARAMETER, +) + + +class TestAttachmentByChecksum(SavepointCase): + @classmethod + def setUpClass(cls): + super().setUpClass() + + cls.data = b"Test attachment data" + cls.attachment = cls.env["ir.attachment"].create( + { + "name": "Test attachment", + "datas": base64.b64encode(cls.data), + } + ) + # Save the fname (a2/a2...) of the attachment + # so that we can use it in tests where the attachment is deleted + cls.fname = cls.attachment.store_fname + + @classmethod + def _set_hashed_db_storage(cls): + """Set `hashed_db` Attachment Storage.""" + cls.env["ir.config_parameter"].set_param( + "ir_attachment.location", + HASHED_STORAGE_PARAMETER, + ) + + def test_force_storage(self): + """Move storage from default to `hashed_db`: + attachments are copied in `Attachment content by hash` records. + """ + # Arrange: Create an attachment + data = self.data + fname = self.fname + attachment = self.attachment + # pre-condition: The storage is not `hashed_db` + self.assertNotEqual( + self.env["ir.attachment"]._storage(), HASHED_STORAGE_PARAMETER + ) + self.assertEqual(attachment.raw, data) + + # Act: Move the storage + self._set_hashed_db_storage() + self.env["ir.attachment"].force_storage() + + # Assert: The attachment value is both in the attachment + # and in the Attachment content by hash + self.assertEqual(self.env["ir.attachment"]._storage(), HASHED_STORAGE_PARAMETER) + self.assertEqual(attachment.raw, data) + attachment_content = self.env["ir.attachment.content"].search_by_checksum(fname) + self.assertEqual(attachment_content.db_datas, data) + + def test_new_hashed_attachment(self): + """Storage is `hashed_db`: + new attachments are only stored in `Attachment content by hash` records. + """ + # Arrange: Set the storage to `hashed_db` + data = self.data + fname = self.fname + self.attachment.unlink() + self._set_hashed_db_storage() + # pre-condition + self.assertEqual(self.env["ir.attachment"]._storage(), HASHED_STORAGE_PARAMETER) + + # Act: Create an attachment + self.env["ir.attachment"].create( + { + "name": "Test attachment", + "datas": base64.b64encode(data), + } + ) + + # Assert: The new attachment value is in the Attachment content by hash + self.assertEqual(self.env["ir.attachment"]._storage(), HASHED_STORAGE_PARAMETER) + attachment_content = self.env["ir.attachment.content"].search_by_checksum(fname) + self.assertEqual(attachment_content.db_datas, data) From 5e0290130508409791c2522434905324fd4aa266 Mon Sep 17 00:00:00 2001 From: Simone Rubino Date: Wed, 15 Feb 2023 10:43:35 +0100 Subject: [PATCH 6/6] [FIX] attachment_db_by_checksum: Copy attachments of invisible menus --- .../models/ir_attachment.py | 5 ++- .../tests/test_attachment_by_checksum.py | 33 +++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/attachment_db_by_checksum/models/ir_attachment.py b/attachment_db_by_checksum/models/ir_attachment.py index 1bfdc9c791..cb2dd32408 100644 --- a/attachment_db_by_checksum/models/ir_attachment.py +++ b/attachment_db_by_checksum/models/ir_attachment.py @@ -86,7 +86,10 @@ def _get_all_attachments_by_checksum(self, fname=None): get all the attachments having checksum `checksum`. """ domain = self._get_all_attachments_by_checksum_domain(fname) - attachments = self.search(domain) + invisible_menu_context = { + "ir.ui.menu.full_list": True, + } + attachments = self.with_context(**invisible_menu_context).search(domain) return attachments @api.model diff --git a/attachment_db_by_checksum/tests/test_attachment_by_checksum.py b/attachment_db_by_checksum/tests/test_attachment_by_checksum.py index b7b18b3334..3ea3eb4548 100644 --- a/attachment_db_by_checksum/tests/test_attachment_by_checksum.py +++ b/attachment_db_by_checksum/tests/test_attachment_by_checksum.py @@ -83,3 +83,36 @@ def test_new_hashed_attachment(self): self.assertEqual(self.env["ir.attachment"]._storage(), HASHED_STORAGE_PARAMETER) attachment_content = self.env["ir.attachment.content"].search_by_checksum(fname) self.assertEqual(attachment_content.db_datas, data) + + def test_force_storage_invisible_menu(self): + """Move storage from default to `hashed_db`: + attachments linked to invisible menus + are copied in `Attachment content by hash` records. + """ + # Arrange: Create a menu invisible for current user + fname = self.fname + self.attachment.unlink() + menu_model = self.env["ir.ui.menu"] + invisible_menu = menu_model.create( + { + "name": "Test invisible menu", + "web_icon_data": base64.b64encode(self.data), + "groups_id": [(6, 0, self.env.ref("base.group_no_one").ids)], + } + ) + # pre-condition: The menu is invisible and storage is not `hashed_db` + self.assertNotEqual( + self.env["ir.attachment"]._storage(), HASHED_STORAGE_PARAMETER + ) + self.assertNotIn(invisible_menu, menu_model.search([])) + + # Act: Move the storage to `hashed_db` + self._set_hashed_db_storage() + self.env["ir.attachment"].with_user( + self.env.ref("base.user_admin") + ).force_storage() + + # Assert: The menu's attachment value is in the Attachment content by hash + self.assertEqual(self.env["ir.attachment"]._storage(), HASHED_STORAGE_PARAMETER) + attachment_content = self.env["ir.attachment.content"].search_by_checksum(fname) + self.assertTrue(attachment_content)