Skip to content

Commit

Permalink
doc: Investigate current smart- and auto-remove behavior
Browse files Browse the repository at this point in the history
Documentation of research about how the smart- and auto-remove of BIT is working.

- A maintain-docu file is added.
- Doc strings improved
- Fix minor bug: `smartRemoveKeepAll()` now check dates and not snapshot IDs. That caused inconsistent behavior with keep-rule on day level.
- Refactor: increment and decrement months
- New unit tests.

Related to #1945
  • Loading branch information
buhtz authored Dec 23, 2024
1 parent 5f7bcf5 commit 62a9c6e
Show file tree
Hide file tree
Showing 12 changed files with 1,338 additions and 230 deletions.
180 changes: 102 additions & 78 deletions common/snapshots.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,19 @@
# SPDX-FileCopyrightText: © 2008-2022 Richard Bailey
# SPDX-FileCopyrightText: © 2008-2022 Germar Reitze
# SPDX-FileCopyrightText: © 2008-2022 Taylor Raack
# SPDX-FileCopyrightText: © 2024 Christian Buhtz <c.buhtz@posteo.jp>
#
# SPDX-License-Identifier: GPL-2.0-or-later
#
# This file is part of the program "Back In time" which is released under GNU
# This file is part of the program "Back In Time" which is released under GNU
# General Public License v2 (GPLv2). See file/folder LICENSE or go to
# <https://spdx.org/licenses/GPL-2.0-or-later.html>.
from __future__ import annotations
import os
from pathlib import Path
import stat
import datetime
import calendar
import gettext
import bz2
import pwd
Expand Down Expand Up @@ -924,6 +927,7 @@ def backup(self, force=False):
# "continue on errors" is enabled

if not ret_error:
# Start auto- and smart-remove
self.freeSpace(now)
self.setTakeSnapshotMessage(
0, _('Please be patient. Finalizing…'))
Expand Down Expand Up @@ -1517,67 +1521,80 @@ def takeSnapshot(self, sid, now, include_folders):
return [True, has_errors]

def smartRemoveKeepAll(self,
snapshots,
min_date,
max_date):
snapshots: list[SID],
min_date: datetime.date,
max_date: datetime.date) -> set[SID]:
"""
Return all snapshots between ``min_date`` and ``max_date``.
Return all snapshots in the timedelta beginning with ``min_date`` and
ending before ``max_date``.
Args:
snapshots (list): full list of :py:class:`SID` objects
min_date (datetime.date): minimum date for snapshots to keep
max_date (datetime.date): maximum date for snapshots to keep
snapshots (list): Full list of :py:class:`SID` objects.
min_date (datetime.date): Minimum date (included in the range).
max_date (datetime.date): Maximum date (excluded from the range).
Returns:
set: set of snapshots that should be kept
set: Set of snapshots that should be kept.
"""
min_id = SID(min_date, self.config)
max_id = SID(max_date, self.config)
logger.debug(f'Keep all >= {min_date} < {max_date}', self)

logger.debug("Keep all >= %s and < %s" %(min_id, max_id), self)
result = filter(lambda sid: sid.date.date() >= min_date
and sid.date.date() < max_date,
snapshots)

return set([sid for sid in snapshots if sid >= min_id and sid < max_id])
return set(result)

def smartRemoveKeepFirst(self,
snapshots,
min_date,
max_date,
keep_healthy = False):
"""
Return only the first snapshot between ``min_date`` and ``max_date``.
keep_healthy=False):
"""Return the first snapshot between ``min_date`` and ``max_date``.
The first snapshot in ``snapshots`` that hit the timedetla beginning
with ``min_date`` and ending before ``max_date`` will be returned.
Snapshots outthat that range are also lost. The list is not ordered by
date.
Args:
snapshots (list): full list of :py:class:`SID` objects
min_date (datetime.date): minimum date for snapshots to keep
max_date (datetime.date): maximum date for snapshots to keep
keep_healthy (bool): return the first healthy snapshot (not
marked as failed) instead of the first
at all. If all snapshots failed this
will again return the very first
snapshot
snapshots (list): Full list of :py:class:`SID` objects.
min_date (datetime.date): Minimum date (included in the range).
max_date (datetime.date): Maximum date (excluded from the range).
keep_healthy (bool): Return the first healthy snapshot (not marked
as failed) instead of the first at all. If all snapshots failed
this will again return the very first snapshot.
Returns:
set: set of snapshots that should be kept
set: Set of one snapshot that should be kept or an empty set.
TODO: It should compare datest not SIDs because of their tag.
"""
# print(f'smartRemoveKeepFirst() :: {min_date=} {max_date=}') # DEBUG
min_id = SID(min_date, self.config)
max_id = SID(max_date, self.config)

logger.debug("Keep first >= %s and < %s" %(min_id, max_id), self)
logger.debug("Keep first >= %s and < %s" % (min_id, max_id), self)

for sid in snapshots:
# try to keep the first healthy snapshot
if keep_healthy and sid.failed:
logger.debug("Do not keep failed snapshot %s" %sid, self)
logger.debug("Do not keep failed snapshot %s" % sid, self)
continue

# DEBUG
# print(f'smartRemoveKeepFirst() :: for sid ... sid={str(sid)}')

if sid >= min_id and sid < max_id:
# print(f' return {str(sid)}')
return set([sid])

# if all snapshots failed return the first snapshot
# no matter if it has errors
if keep_healthy:
return self.smartRemoveKeepFirst(snapshots,
min_date,
max_date,
keep_healthy = False)
keep_healthy=False)
return set()

def incMonth(self, date):
Expand All @@ -1591,12 +1608,13 @@ def incMonth(self, date):
Returns:
datetime.date: 1st day of next month
"""
y = date.year
m = date.month + 1
if m > 12:
m = 1
y = y + 1
return datetime.date(y, m, 1)
# Last day in current month
last = datetime.date(
year=date.year,
month=date.month,
day=calendar.monthrange(date.year, date.month)[1])

return last + datetime.timedelta(days=1)

def decMonth(self, date):
"""
Expand All @@ -1610,22 +1628,22 @@ def decMonth(self, date):
Returns:
datetime.date: 1st day of previous month
"""
y = date.year
m = date.month - 1
if m < 1:
m = 12
y = y - 1
return datetime.date(y, m, 1)
# First day of current month
first = datetime.date(year=date.year, month=date.month, day=1)

# Last day of previous month
prev = first - datetime.timedelta(days=1)

# First day of previous month
return datetime.date(year=prev.year, month=prev.month, day=1)

def smartRemoveList(self,
now_full,
keep_all,
keep_one_per_day,
keep_one_per_week,
keep_one_per_month):
"""
Get a list of old snapshots that should be removed based on configurable
intervals.
"""Get list of backups to be removed based on configurable intervals.
Args:
now_full (datetime.datetime): date and time when takeSnapshot was
Expand All @@ -1641,8 +1659,10 @@ def smartRemoveList(self,
Returns:
list: snapshots that should be removed
"""
snapshots = listSnapshots(self.config)
# Latest/younges backup first, the oldest is last
snapshots = listSnapshots(self.config, reverse=True)
logger.debug(f'Considered: {snapshots}', self)

if len(snapshots) <= 1:
Expand All @@ -1654,7 +1674,7 @@ def smartRemoveList(self,

now = now_full.date()

# keep the last snapshot
# keep the last/youngest backup
keep = set([snapshots[0]])

# keep all for the last keep_all days
Expand Down Expand Up @@ -1821,28 +1841,28 @@ def smartRemove(self, del_snapshots, log = None):
self.remove(sid)

def freeSpace(self, now):
"""
Remove old snapshots on based on different rules (only if enabled).
First rule is to remove snapshots older than X years. Next will call
:py:func:`smartRemove` to remove snapshots based on
configurable intervals. Third rule is to remove the oldest snapshot
until there is enough free space. Last rule will remove the oldest
snapshot until there are enough free inodes.
"""Remove old backups based on several rules (if enabled).
Rules are considered in the following order:
1. Remove snapshots older than X years.
2. Smart-remove rules with calling :py:func:`smartRemoveList`. See
there for details.
3. Remove the oldest backup until there is enough free space.
4. Remove the oldest backup until there are enough free inodes.
'last_snapshot' symlink will be fixed when done.
The 'last_snapshot' symlink will be fixed when done.
Args:
now (datetime.datetime): date and time when takeSnapshot was
started
now (datetime.datetime): Timestamp when takeSnapshot was started.
"""
snapshots = listSnapshots(self.config, reverse = False)
snapshots = listSnapshots(self.config, reverse=False)
if not snapshots:
logger.debug('No snapshots. Skip freeSpace', self)
return

last_snapshot = snapshots[-1]

#remove old backups
# Remove old backups
if self.config.removeOldSnapshotsEnabled():
self.setTakeSnapshotMessage(0, _('Removing old snapshots'))

Expand Down Expand Up @@ -1877,15 +1897,15 @@ def freeSpace(self, now):
keep_one_per_month)
self.smartRemove(del_snapshots)

# try to keep min free space
# Try to keep min free space
if self.config.minFreeSpaceEnabled():
self.setTakeSnapshotMessage(0, _('Trying to keep min free space'))

minFreeSpace = self.config.minFreeSpaceMib()

logger.debug("Keep min free disk space: {} MiB".format(minFreeSpace), self)

snapshots = listSnapshots(self.config, reverse = False)
snapshots = listSnapshots(self.config, reverse=False)

while True:
if len(snapshots) <= 1:
Expand Down Expand Up @@ -1913,7 +1933,7 @@ def freeSpace(self, now):
self.remove(snapshots[0])
del snapshots[0]

#try to keep free inodes
# Try to keep free inodes
if self.config.minFreeInodesEnabled():
minFreeInodes = self.config.minFreeInodes()
self.setTakeSnapshotMessage(
Expand All @@ -1934,7 +1954,7 @@ def freeSpace(self, now):
try:
info = os.statvfs(self.config.snapshotsPath())
free_inodes = info.f_favail
max_inodes = info.f_files
max_inodes = info.f_files
except Exception as e:
logger.debug('Failed to get free inodes for snapshot path %s: %s'
% (self.config.snapshotsPath(), str(e)),
Expand All @@ -1955,7 +1975,7 @@ def freeSpace(self, now):
self.remove(snapshots[0])
del snapshots[0]

#set correct last snapshot again
# Set correct last snapshot again
if last_snapshot is not snapshots[-1]:
self.createLastSnapshotSymlink(snapshots[-1])

Expand Down Expand Up @@ -2419,6 +2439,8 @@ def __init__(self, date, cfg):

if isinstance(date, datetime.datetime):
self.sid = '-'.join((date.strftime('%Y%m%d-%H%M%S'), self.config.tag(self.profileID)))
# TODO: Don't use "date" as attribute name. Btw: It is not a date
# but a datetime.
self.date = date

elif isinstance(date, datetime.date):
Expand All @@ -2434,10 +2456,12 @@ def __init__(self, date, cfg):
raise LastSnapshotSymlink()

else:
raise ValueError("'date' must be in snapshot ID format (e.g 20151218-173512-123)")
raise ValueError("'date' must be in snapshot ID format "
f"(e.g 20151218-173512-123) but is '{date}'")

else:
raise TypeError("'date' must be an instance of str, datetime.date or datetime.datetime")
raise TypeError("'date' must be an instance of str, datetime.date "
f"or datetime.datetime but is '{date}'")

def __repr__(self):
return self.sid
Expand Down Expand Up @@ -3061,17 +3085,16 @@ def path(self, *path, use_mode = []):
return os.path.join(os.sep, *path)


def iterSnapshots(cfg, includeNewSnapshot = False):
"""
A generator to iterate over snapshots in current snapshot path.
def iterSnapshots(cfg, includeNewSnapshot=False):
"""A generator to iterate over snapshots in current snapshot path.
Args:
cfg (config.Config): current config
includeNewSnapshot (bool): include a NewSnapshot instance if
'new_snapshot' folder is available.
cfg (config.Config): Current config instance.
includeNewSnapshot (bool): Include a NewSnapshot instance if
'new_snapshot' directory is available (default: False).
Yields:
SID: snapshot IDs
SID: Snapshot IDs
"""
path = cfg.snapshotsFullPath()

Expand Down Expand Up @@ -3103,21 +3126,22 @@ def iterSnapshots(cfg, includeNewSnapshot = False):
"'{}' is not a snapshot ID: {}".format(item, str(e)))


def listSnapshots(cfg, includeNewSnapshot = False, reverse = True):
def listSnapshots(cfg, includeNewSnapshot=False, reverse=True):
"""
List of snapshots in current snapshot path.
Args:
cfg (config.Config): current config (config.Config instance)
includeNewSnapshot (bool): include a NewSnapshot instance if
'new_snapshot' folder is available
reverse (bool): sort reverse
cfg (config.Config): Current config instance.
includeNewSnapshot (bool): Include a NewSnapshot instance if
'new_snapshot' directory is available (default: False).
reverse (bool): Sort reverse (default: True).
Returns:
list: list of :py:class:`SID` objects
list: List of :py:class:`SID` objects.
"""
ret = list(iterSnapshots(cfg, includeNewSnapshot))
ret.sort(reverse = reverse)
ret.sort(reverse=reverse)

return ret


Expand Down
Loading

0 comments on commit 62a9c6e

Please sign in to comment.