Skip to content

Commit

Permalink
Merge pull request #5119 from andresailer/rel-v7r0
Browse files Browse the repository at this point in the history
[v7r0] HTCondorCE: Limit cleanup to a single run per minute per SiteDirector
  • Loading branch information
Andrei Tsaregorodtsev authored Apr 28, 2021
2 parents 1664d0c + e81d0a4 commit eba57b1
Showing 1 changed file with 22 additions and 4 deletions.
26 changes: 22 additions & 4 deletions Resources/Computing/HTCondorCEComputingElement.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,9 @@
import os
import tempfile
import commands
import datetime
import errno
import threading

from DIRAC import S_OK, S_ERROR, gConfig
from DIRAC.Resources.Computing.ComputingElement import ComputingElement
Expand Down Expand Up @@ -160,6 +162,10 @@ class HTCondorCEComputingElement(ComputingElement):
implementing the functions jobSubmit, getJobOutput
"""

# static variables to ensure single cleanup every minute
_lastCleanupTime = datetime.datetime.utcnow()
_cleanupLock = threading.Lock()

#############################################################################
def __init__(self, ceUniqueID):
""" Standard constructor.
Expand Down Expand Up @@ -516,21 +522,33 @@ def __cleanup(self):
# FIXME: again some issue with the working directory...
# workingDirectory = self.ceParameters.get( 'WorkingDirectory', DEFAULT_WORKINGDIRECTORY )

if not HTCondorCEComputingElement._cleanupLock.acquire(False):
return

now = datetime.datetime.utcnow()
if (now - HTCondorCEComputingElement._lastCleanupTime).total_seconds() < 60:
HTCondorCEComputingElement._cleanupLock.release()
return

HTCondorCEComputingElement._lastCleanupTime = now

self.log.debug("Cleaning working directory: %s" % self.workingDirectory)

# remove all files older than 120 minutes starting with DIRAC_ Condor will
# push files on submission, but it takes at least a few seconds until this
# happens so we can't directly unlink after condor_submit
status, stdout = commands.getstatusoutput('find %s -mmin +120 -name "DIRAC_*" -delete ' % self.workingDirectory)
status, stdout = commands.getstatusoutput('find -O3 %s -maxdepth 1 -mmin +120 -name "DIRAC_*" -delete ' %
self.workingDirectory)
if status:
self.log.error("Failure during HTCondorCE __cleanup", stdout)

# remove all out/err/log files older than "DaysToKeepLogs" days in the CE part of the working Directory
workDir = os.path.join(self.workingDirectory, self.ceName)
findPars = dict(workDir=workDir, days=self.daysToKeepLogs)
# remove all out/err/log files older than "DaysToKeepLogs" days in the working directory
# not running this for each CE so we do global cleanup
findPars = dict(workDir=self.workingDirectory, days=self.daysToKeepLogs)
# remove all out/err/log files older than "DaysToKeepLogs" days
status, stdout = commands.getstatusoutput(
r'find %(workDir)s -mtime +%(days)s -type f \( -name "*.out" -o -name "*.err" -o -name "*.log" \) -delete ' %
findPars)
if status:
self.log.error("Failure during HTCondorCE __cleanup", stdout)
self._cleanupLock.release()

0 comments on commit eba57b1

Please sign in to comment.