From e06559922d8401fe4d1cdd3c675f3d27bb416a4a Mon Sep 17 00:00:00 2001 From: Guilherme Amadio Date: Sun, 1 Sep 2024 17:01:40 +0200 Subject: [PATCH] Initial rebase of alja/purge-main-rb1 onto master. Some conflict during rebasr, assumingly due to commit on master: [Server] Commit remaining files/patches for inadvertent commit c29ef89 --- docs/ReleaseNotes.txt | 51 ++ src/XrdHeaders.cmake | 1 + src/XrdOss/XrdOssAt.hh | 2 + src/XrdPfc.cmake | 38 +- src/XrdPfc/README | 8 +- src/XrdPfc/XrdPfc.cc | 159 ++-- src/XrdPfc/XrdPfc.hh | 192 +---- src/XrdPfc/XrdPfcCommand.cc | 23 +- src/XrdPfc/XrdPfcConfiguration.cc | 92 ++- src/XrdPfc/XrdPfcDecision.hh | 4 +- src/XrdPfc/XrdPfcDirState.cc | 246 ++++++ src/XrdPfc/XrdPfcDirState.hh | 204 +++++ src/XrdPfc/XrdPfcDirStateSnapshot.cc | 121 +++ src/XrdPfc/XrdPfcDirStateSnapshot.hh | 94 +++ src/XrdPfc/XrdPfcFPurgeState.cc | 202 +++++ src/XrdPfc/XrdPfcFPurgeState.hh | 77 ++ src/XrdPfc/XrdPfcFSctl.cc | 14 +- src/XrdPfc/XrdPfcFile.cc | 120 ++- src/XrdPfc/XrdPfcFile.hh | 45 +- src/XrdPfc/XrdPfcFsTraversal.cc | 215 ++++++ src/XrdPfc/XrdPfcFsTraversal.hh | 85 +++ src/XrdPfc/XrdPfcIO.hh | 2 - src/XrdPfc/XrdPfcIOFile.cc | 14 +- src/XrdPfc/XrdPfcIOFile.hh | 3 +- src/XrdPfc/XrdPfcIOFileBlock.cc | 17 +- src/XrdPfc/XrdPfcIOFileBlock.hh | 3 +- src/XrdPfc/XrdPfcInfo.cc | 2 +- src/XrdPfc/XrdPfcInfo.hh | 12 +- src/XrdPfc/XrdPfcPathParseTools.hh | 138 ++++ src/XrdPfc/XrdPfcPurge.cc | 1031 +++----------------------- src/XrdPfc/XrdPfcPurgePin.hh | 79 ++ src/XrdPfc/XrdPfcPurgeQuota.cc | 137 ++++ src/XrdPfc/XrdPfcResourceMonitor.cc | 940 +++++++++++++++++++++++ src/XrdPfc/XrdPfcResourceMonitor.hh | 264 +++++++ src/XrdPfc/XrdPfcStats.hh | 151 ++-- src/XrdPfc/XrdPfcTypes.hh | 4 + src/XrdVersionPlugin.hh | 3 +- 37 files changed, 3488 insertions(+), 1305 deletions(-) create mode 100644 src/XrdPfc/XrdPfcDirState.cc create mode 100644 src/XrdPfc/XrdPfcDirState.hh create mode 100644 src/XrdPfc/XrdPfcDirStateSnapshot.cc create mode 100644 src/XrdPfc/XrdPfcDirStateSnapshot.hh create mode 100644 src/XrdPfc/XrdPfcFPurgeState.cc create mode 100644 src/XrdPfc/XrdPfcFPurgeState.hh create mode 100644 src/XrdPfc/XrdPfcFsTraversal.cc create mode 100644 src/XrdPfc/XrdPfcFsTraversal.hh create mode 100644 src/XrdPfc/XrdPfcPathParseTools.hh create mode 100644 src/XrdPfc/XrdPfcPurgePin.hh create mode 100644 src/XrdPfc/XrdPfcPurgeQuota.cc create mode 100644 src/XrdPfc/XrdPfcResourceMonitor.cc create mode 100644 src/XrdPfc/XrdPfcResourceMonitor.hh diff --git a/docs/ReleaseNotes.txt b/docs/ReleaseNotes.txt index a5cc51b1f1d..5bb43c941d7 100644 --- a/docs/ReleaseNotes.txt +++ b/docs/ReleaseNotes.txt @@ -5,6 +5,57 @@ XRootD Release Notes ============= +------------- +Version 5.7.1 +------------- + ++ **New Features** + **[Apps]** Allow cconfig to write out combined config file (issue #1894) + **[Pss]** Allow for API endpoints for fixed remote origins (issue #2068) + **[Protocol]** Allow kXR_query to return proxy origin value, for proxy servers + **[Protocol]** Define readv limits + **[Protocol]** Indicate whether or not server has a cache in kXR_Protocol response + **[Server]** Allow server to assume an arbitrary network identity (issue #1855) + **[cmsd]** Allow a redirector to be configured read/only (issue #1764) + **[systemd]** Harden systemd service units for better security (issue #2033) + ++ **Major bug fixes** + **[POSIX]** Do not leak file pointer on open error (issue #2302) + **[Python]** Fix memory leaks when creating Python objects (#2324) + **[Secgsi]** Ensure correct certificate is used when passed via cgi with `xrd.gsiusrproxy=...` (issue #2292) + **[XrdCl]** Fix too few arguments to formatting function + ++ **Minor bug fixes** + **[POSIX]** Suppress error message when tearing down client connections (issue #2288) + **[Secgsi]** Fix code to follow documentation (issue #1817) + **[Seckrb5]** Improve error messages and use const where needed (issue #1948) + **[Server]** Allow more flexibility in adminpath permissions (issue #2276) + **[XrdCl]** Fix hidden overloaded virtual compilation error (#2291) + **[XrdCl]** Redact tokens in client logs (issue #2296) + **[XrdCl]** xrdfs: Fix typos in command line help string (issue #2323) + **[XrdHttp]** Fix CodeQL warning for overrunning write + **[XrdNet]** Avoid network identity failures (issue #1772, #2159) + **[XrdPfc]** Make sure direct vread requests conform to protocol limits (issue #2308) + **[XrdSecgsi]** Fix potential double free in GetSrvCertEnt() + **[XrdSecztn]** Fix potential use after free + ++ **Miscellaneous** + **[CMake]** Update CMake minimum requirement and supported versions + **[CMake]** Update test.cmake options for coverage builds + **[Misc]** Add SECURITY.md file describing XRootD security policy + **[Pss]** Export the final origin url for subprocess use + **[Tests]** Add new XRootD client/server test configurations + **[XrdApps]** Replace pragma once with header guards + **[XrdClHttp]** Conditionally load Davix grid module + **[XrdCl]** Add flag to optionally suppress force disconnect error messages + **[XrdHttp]** Apply keepalive when redirecting HTTP clients (#2290) + **[XrdNet]** Make sure domain value is defined + **[XrdNet]** Use lower case version of host names + **[XrdSys]** Determine `IOV_MAX` at runtime + **[XrdSys]** Dump coverage information on `SIGTERM` + **[XrdTpc]** Replace pragma once with header guards + **[docker]** Update CentOS 7 Dockerfile to use CentOS 7 Vault + ------------- Version 5.7.0 ------------- diff --git a/src/XrdHeaders.cmake b/src/XrdHeaders.cmake index 5836be561d3..aa4eecaa970 100644 --- a/src/XrdHeaders.cmake +++ b/src/XrdHeaders.cmake @@ -26,6 +26,7 @@ set( XROOTD_PUBLIC_HEADERS XrdNet/XrdNetSocket.hh XrdOuc/XrdOucBuffer.hh XrdOuc/XrdOucCRC.hh + XrdOuc/XrdOucCache.hh XrdOuc/XrdOucCacheCM.hh XrdOuc/XrdOucCacheStats.hh XrdOuc/XrdOucCallBack.hh diff --git a/src/XrdOss/XrdOssAt.hh b/src/XrdOss/XrdOssAt.hh index 6bcfb17e694..2c0dcfe2151 100644 --- a/src/XrdOss/XrdOssAt.hh +++ b/src/XrdOss/XrdOssAt.hh @@ -30,6 +30,8 @@ /* specific prior written permission of the institution or contributor. */ /******************************************************************************/ +#include "XrdOuc/XrdOucEnv.hh" + #include #include diff --git a/src/XrdPfc.cmake b/src/XrdPfc.cmake index 34d73b692c2..9e8e59f06b3 100644 --- a/src/XrdPfc.cmake +++ b/src/XrdPfc.cmake @@ -5,6 +5,7 @@ set( LIB_XRD_FILECACHE XrdPfc-${PLUGIN_VERSION} ) set( LIB_XRD_FILECACHE_LEGACY XrdFileCache-${PLUGIN_VERSION} ) set( LIB_XRD_BLACKLIST XrdBlacklistDecision-${PLUGIN_VERSION} ) +set( LIB_XRD_PURGEQUOTA XrdPfcPurgeQuota-${PLUGIN_VERSION} ) #------------------------------------------------------------------------------- # Shared library version @@ -15,11 +16,18 @@ set( LIB_XRD_BLACKLIST XrdBlacklistDecision-${PLUGIN_VERSION} ) #------------------------------------------------------------------------------- add_library( ${LIB_XRD_FILECACHE} - MODULE + SHARED XrdPfc/XrdPfcTypes.hh XrdPfc/XrdPfc.cc XrdPfc/XrdPfc.hh XrdPfc/XrdPfcConfiguration.cc + XrdPfc/XrdPfcDirState.cc XrdPfc/XrdPfcDirState.hh + XrdPfc/XrdPfcDirStateSnapshot.cc XrdPfc/XrdPfcDirStateSnapshot.hh + XrdPfc/XrdPfcFPurgeState.cc XrdPfc/XrdPfcFPurgeState.hh XrdPfc/XrdPfcPurge.cc + XrdPfc/XrdPfcPurgePin.hh + XrdPfc/XrdPfcResourceMonitor.cc XrdPfc/XrdPfcResourceMonitor.hh + XrdPfc/XrdPfcPathParseTools.hh + XrdPfc/XrdPfcFsTraversal.cc XrdPfc/XrdPfcFsTraversal.hh XrdPfc/XrdPfcCommand.cc XrdPfc/XrdPfcFile.cc XrdPfc/XrdPfcFile.hh XrdPfc/XrdPfcFSctl.cc XrdPfc/XrdPfcFSctl.hh @@ -53,6 +61,21 @@ target_link_libraries( XrdUtils ) +#------------------------------------------------------------------------------- +# The XrdPurgeQuota library +#------------------------------------------------------------------------------- +add_library( + ${LIB_XRD_PURGEQUOTA} + MODULE + XrdPfc/XrdPfcPurgeQuota.cc) + +target_link_libraries( + ${LIB_XRD_PURGEQUOTA} + PRIVATE + XrdUtils + ${LIB_XRD_FILECACHE} + ) + #------------------------------------------------------------------------------- # xrdpfc_print #------------------------------------------------------------------------------- @@ -81,6 +104,19 @@ install( COMMAND ln -sf lib${LIB_XRD_FILECACHE}.so lib${LIB_XRD_FILECACHE_LEGACY}.so WORKING_DIRECTORY \$ENV{DESTDIR}/${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR} )" ) +install( + FILES + ${CMAKE_CURRENT_SOURCE_DIR}/XrdPfc/XrdPfcPurgePin.hh + ${CMAKE_CURRENT_SOURCE_DIR}/XrdPfc/XrdPfcDirStateSnapshot.hh + ${CMAKE_CURRENT_SOURCE_DIR}/XrdPfc/XrdPfcDirState.hh + ${CMAKE_CURRENT_SOURCE_DIR}/XrdPfc/XrdPfcStats.hh + ${CMAKE_CURRENT_SOURCE_DIR}/XrdPfc/XrdPfc.hh + ${CMAKE_CURRENT_SOURCE_DIR}/XrdPfc/XrdPfcFile.hh + ${CMAKE_CURRENT_SOURCE_DIR}/XrdPfc/XrdPfcTypes.hh + ${CMAKE_CURRENT_SOURCE_DIR}/XrdPfc/XrdPfcInfo.hh + DESTINATION ${CMAKE_INSTALL_PREFIX}/include/xrootd/XrdPfc +) + install( TARGETS ${LIB_XRD_BLACKLIST} LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ) diff --git a/src/XrdPfc/README b/src/XrdPfc/README index 488b31eeb28..673c728088c 100644 --- a/src/XrdPfc/README +++ b/src/XrdPfc/README @@ -114,7 +114,7 @@ CONFIGURATION pfc.blocksize: prefetch buffer size, default 1M -pfc.ram [bytes[g]]: maximum allowed RAM usage for caching proxy +pfc.ram [bytes[g]]: maximum allowed RAM usage for caching proxy pfc.prefetch : prefetch level, default is 10. Value zero disables prefetching. @@ -122,16 +122,16 @@ pfc.diskusage diskusage boundaries, can be specified relative in per pfc.user : username used by XrdOss plugin -pfc.filefragmentmode [fragmentsize ] -- enable prefetching a unit of a file, +pfc.filefragmentmode [fragmentsize ] -- enable prefetching a unit of a file, with default block size -pfc.osslib [] path to alternative plign for output file system +pfc.osslib [] path to alternative plign for output file system pfc.decisionlib [] path to decision library and plugin parameters pfc.trace default level is warning, xrootd option -d sets debug level -Examples +Examples a) Enable proxy file prefetching: pps.cachelib libXrdPfc.so diff --git a/src/XrdPfc/XrdPfc.cc b/src/XrdPfc/XrdPfc.cc index 9d411ced2c6..4992aa13f05 100644 --- a/src/XrdPfc/XrdPfc.cc +++ b/src/XrdPfc/XrdPfc.cc @@ -28,7 +28,6 @@ #include "XrdOuc/XrdOucUtils.hh" #include "XrdOuc/XrdOucPrivateUtils.hh" -#include "XrdSys/XrdSysPthread.hh" #include "XrdSys/XrdSysTimer.hh" #include "XrdSys/XrdSysTrace.hh" @@ -42,23 +41,17 @@ #include "XrdPfcInfo.hh" #include "XrdPfcIOFile.hh" #include "XrdPfcIOFileBlock.hh" +#include "XrdPfcResourceMonitor.hh" using namespace XrdPfc; -Cache * Cache::m_instance = 0; +Cache *Cache::m_instance = nullptr; +XrdScheduler *Cache::schedP = nullptr; -XrdScheduler *Cache::schedP = 0; - -void *ResourceMonitorHeartBeatThread(void*) -{ - Cache::GetInstance().ResourceMonitorHeartBeat(); - return 0; -} - -void *PurgeThread(void*) +void *ResourceMonitorThread(void*) { - Cache::GetInstance().Purge(); + Cache::ResMon().main_thread_function(); return 0; } @@ -100,11 +93,13 @@ XrdOucCache *XrdOucGetCache(XrdSysLogger *logger, err.Say("Config Proxy file cache initialization failed."); return 0; } - err.Say("------ Proxy file cache initialization completed."); + err.Say("++++++ Proxy file cache initialization completed."); { pthread_t tid; + XrdSysThread::Run(&tid, ResourceMonitorThread, 0, 0, "XrdPfc ResourceMonitor"); + for (int wti = 0; wti < instance.RefConfiguration().m_wqueue_threads; ++wti) { XrdSysThread::Run(&tid, ProcessWriteTaskThread, 0, 0, "XrdPfc WriteTasks "); @@ -114,10 +109,6 @@ XrdOucCache *XrdOucGetCache(XrdSysLogger *logger, { XrdSysThread::Run(&tid, PrefetchThread, 0, 0, "XrdPfc Prefetch "); } - - XrdSysThread::Run(&tid, ResourceMonitorHeartBeatThread, 0, 0, "XrdPfc ResourceMonitorHeartBeat"); - - XrdSysThread::Run(&tid, PurgeThread, 0, 0, "XrdPfc Purge"); } XrdPfcFSctl* pfcFSctl = new XrdPfcFSctl(instance, logger); @@ -129,27 +120,6 @@ XrdOucCache *XrdOucGetCache(XrdSysLogger *logger, //============================================================================== -void Configuration::calculate_fractional_usages(long long du, long long fu, - double &frac_du, double &frac_fu) -{ - // Calculate fractional disk / file usage and clamp them to [0, 1]. - - // Fractional total usage above LWM: - // - can be > 1 if usage is above HWM; - // - can be < 0 if triggered via age-based-purging. - frac_du = (double) (du - m_diskUsageLWM) / (m_diskUsageHWM - m_diskUsageLWM); - - // Fractional file usage above baseline. - // - can be > 1 if file usage is above max; - // - can be < 0 if file usage is below baseline. - frac_fu = (double) (fu - m_fileUsageBaseline) / (m_fileUsageMax - m_fileUsageBaseline); - - frac_du = std::min( std::max( frac_du, 0.0), 1.0 ); - frac_fu = std::min( std::max( frac_fu, 0.0), 1.0 ); -} - -//============================================================================== - Cache &Cache::CreateInstance(XrdSysLogger *logger, XrdOucEnv *env) { assert (m_instance == 0); @@ -157,9 +127,10 @@ Cache &Cache::CreateInstance(XrdSysLogger *logger, XrdOucEnv *env) return *m_instance; } - Cache& Cache::GetInstance() { return *m_instance; } -const Cache& Cache::TheOne() { return *m_instance; } -const Configuration& Cache::Conf() { return m_instance->RefConfiguration(); } + Cache& Cache::GetInstance() { return *m_instance; } +const Cache& Cache::TheOne() { return *m_instance; } +const Configuration& Cache::Conf() { return m_instance->RefConfiguration(); } + ResourceMonitor& Cache::ResMon() { return m_instance->RefResMon(); } bool Cache::Decide(XrdOucCacheIO* io) { @@ -190,19 +161,14 @@ Cache::Cache(XrdSysLogger *logger, XrdOucEnv *env) : m_traceID("Cache"), m_oss(0), m_gstream(0), + m_purge_pin(0), m_prefetch_condVar(0), m_prefetch_enabled(false), m_RAM_used(0), m_RAM_write_queue(0), m_RAM_std_size(0), m_isClient(false), - m_in_purge(false), - m_active_cond(0), - m_stats_n_purge_cond(0), - m_fs_state(0), - m_last_scan_duration(0), - m_last_purge_duration(0), - m_spt_state(SPTS_Idle) + m_active_cond(0) { // Default log level is Warning. m_trace->What = 2; @@ -349,6 +315,15 @@ void Cache::ProcessWriteTasks() } } +long long Cache::WritesSinceLastCall() +{ + // Called from ResourceMonitor for an alternative estimation of disk writes. + XrdSysCondVarHelper lock(&m_writeQ.condVar); + long long ret = m_writeQ.writes_between_purges; + m_writeQ.writes_between_purges = 0; + return ret; +} + //============================================================================== char* Cache::RequestRAM(long long size) @@ -412,7 +387,7 @@ void Cache::ReleaseRAM(char* buf, long long size) File* Cache::GetFile(const std::string& path, IO* io, long long off, long long filesize) { // Called from virtual IO::Attach - + TRACE(Debug, "GetFile " << path << ", io " << io); ActiveMap_i it; @@ -493,9 +468,9 @@ File* Cache::GetFile(const std::string& path, IO* io, long long off, long long f void Cache::ReleaseFile(File* f, IO* io) { // Called from virtual IO::DetachFinalize. - + TRACE(Debug, "ReleaseFile " << f->GetLocalPath() << ", io " << io); - + { XrdSysCondVarHelper lock(&m_active_cond); @@ -504,7 +479,10 @@ void Cache::ReleaseFile(File* f, IO* io) dec_ref_cnt(f, true); } - + +//============================================================================== +//============================================================================== + namespace { @@ -627,6 +605,7 @@ void Cache::dec_ref_cnt(File* f, bool high_debug) } } + bool finished_p = false; { XrdSysCondVarHelper lock(&m_active_cond); @@ -637,41 +616,44 @@ void Cache::dec_ref_cnt(File* f, bool high_debug) ActiveMap_i it = m_active.find(f->GetLocalPath()); m_active.erase(it); - m_closed_files_stats.insert(std::make_pair(f->GetLocalPath(), f->DeltaStatsFromLastCall())); + finished_p = true; + } + } - if (m_gstream) + if (finished_p) + { + if (m_gstream) + { + const Stats &st = f->RefStats(); + const Info::AStat *as = f->GetLastAccessStats(); + + char buf[4096]; + int len = snprintf(buf, 4096, "{\"event\":\"file_close\"," + "\"lfn\":\"%s\",\"size\":%lld,\"blk_size\":%d,\"n_blks\":%d,\"n_blks_done\":%d," + "\"access_cnt\":%lu,\"attach_t\":%lld,\"detach_t\":%lld,\"remotes\":%s," + "\"b_hit\":%lld,\"b_miss\":%lld,\"b_bypass\":%lld,\"b_write\":%lld,\"n_cks_errs\":%d}", + f->GetLocalPath().c_str(), f->GetFileSize(), f->GetBlockSize(), + f->GetNBlocks(), f->GetNDownloadedBlocks(), + (unsigned long) f->GetAccessCnt(), (long long) as->AttachTime, (long long) as->DetachTime, + f->GetRemoteLocations().c_str(), + st.m_BytesHit, st.m_BytesMissed, st.m_BytesBypassed, st.m_BytesWritten, st.m_NCksumErrors + ); + bool suc = false; + if (len < 4096) { - const Stats &st = f->RefStats(); - const Info::AStat *as = f->GetLastAccessStats(); - - char buf[4096]; - int len = snprintf(buf, 4096, "{\"event\":\"file_close\"," - "\"lfn\":\"%s\",\"size\":%lld,\"blk_size\":%d,\"n_blks\":%d,\"n_blks_done\":%d," - "\"access_cnt\":%lu,\"attach_t\":%lld,\"detach_t\":%lld,\"remotes\":%s," - "\"b_hit\":%lld,\"b_miss\":%lld,\"b_bypass\":%lld,\"n_cks_errs\":%d}", - f->GetLocalPath().c_str(), f->GetFileSize(), f->GetBlockSize(), - f->GetNBlocks(), f->GetNDownloadedBlocks(), - (unsigned long) f->GetAccessCnt(), (long long) as->AttachTime, (long long) as->DetachTime, - f->GetRemoteLocations().c_str(), - as->BytesHit, as->BytesMissed, as->BytesBypassed, st.m_NCksumErrors - ); - bool suc = false; - if (len < 4096) - { - suc = m_gstream->Insert(buf, len + 1); - } - if ( ! suc) - { - TRACE(Error, "Failed g-stream insertion of file_close record, len=" << len); - } + suc = m_gstream->Insert(buf, len + 1); + } + if ( ! suc) + { + TRACE(Error, "Failed g-stream insertion of file_close record, len=" << len); } - - delete f; } + + delete f; } } -bool Cache::IsFileActiveOrPurgeProtected(const std::string& path) +bool Cache::IsFileActiveOrPurgeProtected(const std::string& path) const { XrdSysCondVarHelper lock(&m_active_cond); @@ -679,6 +661,11 @@ bool Cache::IsFileActiveOrPurgeProtected(const std::string& path) m_purge_delay_set.find(path) != m_purge_delay_set.end(); } +void Cache::ClearPurgeProtectedSet() +{ + XrdSysCondVarHelper lock(&m_active_cond); + m_purge_delay_set.clear(); +} //============================================================================== //=== PREFETCH @@ -1132,6 +1119,7 @@ int Cache::Unlink(const char *curl) int Cache::UnlinkFile(const std::string& f_name, bool fail_if_open) { + static const char* trc_pfx = "UnlinkFile "; ActiveMap_i it; File *file = 0; { @@ -1143,7 +1131,7 @@ int Cache::UnlinkFile(const std::string& f_name, bool fail_if_open) { if (fail_if_open) { - TRACE(Info, "UnlinkCommon " << f_name << ", file currently open and force not requested - denying request"); + TRACE(Info, trc_pfx << f_name << ", file currently open and force not requested - denying request"); return -EBUSY; } @@ -1151,7 +1139,7 @@ int Cache::UnlinkFile(const std::string& f_name, bool fail_if_open) // Attach() with possible File::Open(). Ask for retry. if (it->second == 0) { - TRACE(Info, "UnlinkCommon " << f_name << ", an operation on this file is ongoing - denying request"); + TRACE(Info, trc_pfx << f_name << ", an operation on this file is ongoing - denying request"); return -EAGAIN; } @@ -1173,10 +1161,15 @@ int Cache::UnlinkFile(const std::string& f_name, bool fail_if_open) std::string i_name = f_name + Info::s_infoExtension; // Unlink file & cinfo + struct stat f_stat; + bool stat_ok = (m_oss->Stat(f_name.c_str(), &f_stat) == XrdOssOK); int f_ret = m_oss->Unlink(f_name.c_str()); int i_ret = m_oss->Unlink(i_name.c_str()); - TRACE(Debug, "UnlinkCommon " << f_name << ", f_ret=" << f_ret << ", i_ret=" << i_ret); + if (stat_ok) + m_res_mon->register_file_purge(f_name, f_stat.st_blocks); + + TRACE(Debug, trc_pfx << f_name << ", f_ret=" << f_ret << ", i_ret=" << i_ret); { XrdSysCondVarHelper lock(&m_active_cond); diff --git a/src/XrdPfc/XrdPfc.hh b/src/XrdPfc/XrdPfc.hh index fdc4093dd70..4c3a06b265b 100644 --- a/src/XrdPfc/XrdPfc.hh +++ b/src/XrdPfc/XrdPfc.hh @@ -9,7 +9,7 @@ // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // -// XRootD is distributed in the hope that it will be useful, +// XRootD is distributed in the hope that it will be useful,fm_pu // but WITHOUT ANY emacs WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. @@ -27,11 +27,11 @@ #include "XrdSys/XrdSysPthread.hh" #include "XrdOuc/XrdOucCache.hh" #include "XrdOuc/XrdOucCallBack.hh" -#include "XrdCl/XrdClDefaultEnv.hh" #include "XrdPfcFile.hh" #include "XrdPfcDecision.hh" +class XrdOss; class XrdOucStream; class XrdSysError; class XrdSysTrace; @@ -41,8 +41,16 @@ namespace XrdPfc { class File; class IO; +class PurgePin; +class ResourceMonitor; -class DataFsState; + +template +struct MutexHolder { + MOO &mutex; + MutexHolder(MOO &m) : mutex(m) { mutex.Lock(); } + ~MutexHolder() { mutex.UnLock(); } +}; } @@ -62,8 +70,6 @@ struct Configuration bool is_dir_stat_reporting_on() const { return m_dirStatsMaxDepth >= 0 || ! m_dirStatsDirs.empty() || ! m_dirStatsDirGlobs.empty(); } bool is_purge_plugin_set_up() const { return false; } - void calculate_fractional_usages(long long du, long long fu, double &frac_du, double &frac_fu); - CkSumCheck_e get_cs_Chk() const { return (CkSumCheck_e) m_cs_Chk; } bool is_cschk_cache() const { return m_cs_Chk & CSChk_Cache; } @@ -133,128 +139,6 @@ struct TmpConfiguration {} }; -//============================================================================== - -struct SplitParser -{ - char *f_str; - const char *f_delim; - char *f_state; - bool f_first; - - SplitParser(const std::string &s, const char *d) : - f_str(strdup(s.c_str())), f_delim(d), f_state(0), f_first(true) - {} - ~SplitParser() { free(f_str); } - - char* get_token() - { - if (f_first) { f_first = false; return strtok_r(f_str, f_delim, &f_state); } - else { return strtok_r(0, f_delim, &f_state); } - } - - char* get_reminder_with_delim() - { - if (f_first) { return f_str; } - else { *(f_state - 1) = f_delim[0]; return f_state - 1; } - } - - char *get_reminder() - { - return f_first ? f_str : f_state; - } - - int fill_argv(std::vector &argv) - { - if (!f_first) return 0; - int dcnt = 0; { char *p = f_str; while (*p) { if (*(p++) == f_delim[0]) ++dcnt; } } - argv.reserve(dcnt + 1); - int argc = 0; - char *i = strtok_r(f_str, f_delim, &f_state); - while (i) - { - ++argc; - argv.push_back(i); - // printf(" arg %d : '%s'\n", argc, i); - i = strtok_r(0, f_delim, &f_state); - } - return argc; - } -}; - -struct PathTokenizer : private SplitParser -{ - std::vector m_dirs; - const char *m_reminder; - int m_n_dirs; - - PathTokenizer(const std::string &path, int max_depth, bool parse_as_lfn) : - SplitParser(path, "/"), - m_reminder (0), - m_n_dirs (0) - { - // If parse_as_lfn is true store final token into m_reminder, regardless of maxdepth. - // This assumes the last token is a file name (and full path is lfn, including the file name). - - m_dirs.reserve(max_depth); - - char *t = 0; - for (int i = 0; i < max_depth; ++i) - { - t = get_token(); - if (t == 0) break; - m_dirs.emplace_back(t); - } - if (parse_as_lfn && *get_reminder() == 0 && ! m_dirs.empty()) - { - m_reminder = m_dirs.back(); - m_dirs.pop_back(); - } - else - { - m_reminder = get_reminder(); - } - m_n_dirs = (int) m_dirs.size(); - } - - int get_n_dirs() - { - return m_n_dirs; - } - - const char *get_dir(int pos) - { - if (pos >= m_n_dirs) return 0; - return m_dirs[pos]; - } - - std::string make_path() - { - std::string res; - for (std::vector::iterator i = m_dirs.begin(); i != m_dirs.end(); ++i) - { - res += "/"; - res += *i; - } - if (m_reminder != 0) - { - res += "/"; - res += m_reminder; - } - return res; - } - - void deboog() - { - printf("PathTokenizer::deboog size=%d\n", m_n_dirs); - for (int i = 0; i < m_n_dirs; ++i) - { - printf(" %2d: %s\n", i, m_dirs[i]); - } - printf(" rem: %s\n", m_reminder); - } -}; - //============================================================================== // Cache @@ -336,21 +220,13 @@ public: static const Cache &TheOne(); static const Configuration &Conf(); + static ResourceMonitor &ResMon(); + //--------------------------------------------------------------------- //! Version check. //--------------------------------------------------------------------- static bool VCheck(XrdVersionInfo &urVersion) { return true; } - //--------------------------------------------------------------------- - //! Thread function checking resource usage periodically. - //--------------------------------------------------------------------- - void ResourceMonitorHeartBeat(); - - //--------------------------------------------------------------------- - //! Thread function invoked to scan and purge files from disk when needed. - //--------------------------------------------------------------------- - void Purge(); - //--------------------------------------------------------------------- //! Remove cinfo and data files from cache. //--------------------------------------------------------------------- @@ -372,6 +248,8 @@ public: //--------------------------------------------------------------------- void ProcessWriteTasks(); + long long WritesSinceLastCall(); + char* RequestRAM(long long size); void ReleaseRAM(char* buf, long long size); @@ -384,8 +262,10 @@ public: XrdOss* GetOss() const { return m_oss; } - bool IsFileActiveOrPurgeProtected(const std::string&); - + bool IsFileActiveOrPurgeProtected(const std::string&) const; + void ClearPurgeProtectedSet(); + PurgePin* GetPurgePin() const { return m_purge_pin; } + File* GetFile(const std::string&, IO*, long long off = 0, long long filesize = 0); void ReleaseFile(File*, IO*); @@ -393,10 +273,11 @@ public: void ScheduleFileSync(File* f) { schedule_file_sync(f, false, false); } void FileSyncDone(File*, bool high_debug); - + XrdSysError* GetLog() { return &m_log; } XrdSysTrace* GetTrace() { return m_trace; } + ResourceMonitor& RefResMon() { return *m_res_mon; } XrdXrootdGStream* GetGStream() { return m_gstream; } void ExecuteCommandUrl(const std::string& command_url); @@ -408,6 +289,7 @@ private: bool ConfigXeq(char *, XrdOucStream &); bool xcschk(XrdOucStream &); bool xdlib(XrdOucStream &); + bool xplib(XrdOucStream &); bool xtrace(XrdOucStream &); bool cfg2bytes(const std::string &str, long long &store, long long totalSpace, const char *name); @@ -423,7 +305,10 @@ private: XrdXrootdGStream *m_gstream; - std::vector m_decisionpoints; //!< decision plugins + ResourceMonitor *m_res_mon; + + std::vector m_decisionpoints; //!< decision plugins + PurgePin* m_purge_pin; //!< purge plugin Configuration m_configuration; //!< configurable parameters @@ -453,15 +338,11 @@ private: // active map, purge delay set typedef std::map ActiveMap_t; typedef ActiveMap_t::iterator ActiveMap_i; - typedef std::multimap StatsMMap_t; - typedef StatsMMap_t::iterator StatsMMap_i; typedef std::set FNameSet_t; - ActiveMap_t m_active; //!< Map of currently active / open files. - StatsMMap_t m_closed_files_stats; - FNameSet_t m_purge_delay_set; - bool m_in_purge; - XrdSysCondVar m_active_cond; //!< Cond-var protecting active file data structures. + ActiveMap_t m_active; //!< Map of currently active / open files. + FNameSet_t m_purge_delay_set; //!< Set of files that should not be purged. + mutable XrdSysCondVar m_active_cond; //!< Cond-var protecting active file data structures. void inc_ref_cnt(File*, bool lock, bool high_debug); void dec_ref_cnt(File*, bool high_debug); @@ -471,21 +352,6 @@ private: // prefetching typedef std::vector PrefetchList; PrefetchList m_prefetchList; - - //--------------------------------------------------------------------------- - // Statistics, heart-beat, scan-and-purge - - enum ScanAndPurgeThreadState_e { SPTS_Idle, SPTS_Scan, SPTS_Purge, SPTS_Done }; - - XrdSysCondVar m_stats_n_purge_cond; //!< communication between heart-beat and scan-purge threads - - DataFsState *m_fs_state; //!< directory state for access / usage info and quotas - - int m_last_scan_duration; - int m_last_purge_duration; - ScanAndPurgeThreadState_e m_spt_state; - - void copy_out_active_stats_and_update_data_fs_state(); }; } diff --git a/src/XrdPfc/XrdPfcCommand.cc b/src/XrdPfc/XrdPfcCommand.cc index 60d0ca3f19b..15c527a75a5 100644 --- a/src/XrdPfc/XrdPfcCommand.cc +++ b/src/XrdPfc/XrdPfcCommand.cc @@ -19,6 +19,8 @@ #include "XrdPfcInfo.hh" #include "XrdPfc.hh" #include "XrdPfcTrace.hh" +#include "XrdPfcPathParseTools.hh" +#include "XrdPfcResourceMonitor.hh" #include "XrdOfs/XrdOfsConfigPI.hh" #include "XrdOss/XrdOss.hh" @@ -245,6 +247,15 @@ void Cache::ExecuteCommandUrl(const std::string& command_url) myInfo.Write(myInfoFile, cinfo_path.c_str()); + // Fake last modified time to the last access_time + { + time_t last_detach; + myInfo.GetLatestDetachTime(last_detach); + struct timespec acc_mod_time[2] = { {last_detach, UTIME_OMIT}, {last_detach, 0} }; + + futimens(myInfoFile->getFD(), acc_mod_time); + } + myInfoFile->Close(); delete myInfoFile; myFile->Close(); delete myFile; @@ -255,6 +266,14 @@ void Cache::ExecuteCommandUrl(const std::string& command_url) m_writeQ.writes_between_purges += file_size; } + { + int token = m_res_mon->register_file_open(file_path, time_now, false); + XrdPfc::Stats stats; + stats.m_BytesWritten = file_size; + stats.m_StBlocksAdded = (file_size & 0x1ff) ? (file_size >> 9) + 1 : file_size >> 9; + m_res_mon->register_file_update_stats(token, stats); + m_res_mon->register_file_close(token, time(0), stats); + } } } @@ -280,7 +299,7 @@ void Cache::ExecuteCommandUrl(const std::string& command_url) SplitParser ap(token, " "); int argc = ap.fill_argv(argv); - XrdOucArgs Spec(&m_log, err_prefix, "hvs:b:t:d:", + XrdOucArgs Spec(&m_log, err_prefix, "h", "help", 1, "h", (const char *) 0); @@ -307,7 +326,7 @@ void Cache::ExecuteCommandUrl(const std::string& command_url) return; } - std::string f_name(cp.get_reminder()); + std::string f_name(cp.get_reminder_with_delim()); TRACE(Debug, err_prefix << "file argument '" << f_name << "'."); diff --git a/src/XrdPfc/XrdPfcConfiguration.cc b/src/XrdPfc/XrdPfcConfiguration.cc index 2cdb5869394..387786fc132 100644 --- a/src/XrdPfc/XrdPfcConfiguration.cc +++ b/src/XrdPfc/XrdPfcConfiguration.cc @@ -2,6 +2,9 @@ #include "XrdPfcTrace.hh" #include "XrdPfcInfo.hh" +#include "XrdPfcResourceMonitor.hh" +#include "XrdPfcPurgePin.hh" + #include "XrdOss/XrdOss.hh" #include "XrdOuc/XrdOucEnv.hh" @@ -228,6 +231,59 @@ bool Cache::xdlib(XrdOucStream &Config) return true; } +/* Function: xplib + + Purpose: To parse the directive: purgelib [] + + the path of the decision library to be used. + optional parameters to be passed. + + + Output: true upon success or false upon failure. + */ +bool Cache::xplib(XrdOucStream &Config) +{ + const char* val; + + std::string libp; + if (! (val = Config.GetWord()) || ! val[0]) + { + TRACE(Info," Cache::Config() purgelib not specified; will use LRU for purging files"); + return true; + } + else + { + libp = val; + } + + char params[4096]; + if (val[0]) + Config.GetRest(params, 4096); + else + params[0] = 0; + + XrdOucPinLoader* myLib = new XrdOucPinLoader(&m_log, 0, "purgelib", + libp.c_str()); + + PurgePin *(*ep)(XrdSysError&); + ep = (PurgePin *(*)(XrdSysError&))myLib->Resolve("XrdPfcGetPurgePin"); + if (! ep) {myLib->Unload(true); return false; } + + PurgePin * dp = ep(m_log); + if (! dp) + { + TRACE(Error, "Config() purgelib was not able to create a Purge Plugin object?"); + return false; + } + m_purge_pin = dp; + + if (params[0]) + m_purge_pin->ConfigPurgePin(params); + + + return true; +} + /* Function: xtrace Purpose: To parse the directive: trace @@ -334,6 +390,10 @@ bool Cache::Config(const char *config_filename, const char *parameters) { retval = xdlib(Config); } + else if (! strcmp(var,"pfc.purgelib")) + { + retval = xplib(Config); + } else if (! strcmp(var,"pfc.trace")) { retval = xtrace(Config); @@ -382,6 +442,18 @@ bool Cache::Config(const char *config_filename, const char *parameters) // sets default value for disk usage XrdOssVSInfo sP; { + if (m_configuration.m_meta_space != m_configuration.m_data_space && + m_oss->StatVS(&sP, m_configuration.m_meta_space.c_str(), 1) < 0) + { + m_log.Emsg("ConfigParameters()", "error obtaining stat info for meta space ", m_configuration.m_meta_space.c_str()); + return false; + } + if (m_configuration.m_meta_space != m_configuration.m_data_space && sP.Total < 10ll << 20) + { + m_log.Emsg("ConfigParameters()", "available data space is less than 10 MB (can be due to a mistake in oss.localroot directive) for space ", + m_configuration.m_meta_space.c_str()); + return false; + } if (m_oss->StatVS(&sP, m_configuration.m_data_space.c_str(), 1) < 0) { m_log.Emsg("ConfigParameters()", "error obtaining stat info for data space ", m_configuration.m_data_space.c_str()); @@ -419,6 +491,13 @@ bool Cache::Config(const char *config_filename, const char *parameters) m_log.Emsg("ConfigParameters()", "pfc.diskusage files should have baseline < nominal < max."); aOK = false; } + + + if (aOK && m_configuration.m_fileUsageMax >= m_configuration.m_diskUsageLWM) + { + m_log.Emsg("ConfigParameters()", "pfc.diskusage files values must be below lowWatermark"); + aOK = false; + } } else aOK = false; } @@ -456,7 +535,7 @@ bool Cache::Config(const char *config_filename, const char *parameters) } // Setup number of standard-size blocks not released back to the system to 5% of total RAM. m_configuration.m_RamKeepStdBlocks = (m_configuration.m_RamAbsAvailable / m_configuration.m_bufferSize + 1) * 5 / 100; - + // Set tracing to debug if this is set in environment char* cenv = getenv("XRDDEBUG"); @@ -552,9 +631,16 @@ bool Cache::Config(const char *config_filename, const char *parameters) m_gstream = (XrdXrootdGStream*) m_env->GetPtr("pfc.gStream*"); - m_log.Say("Config Proxy File Cache g-stream has", m_gstream ? "" : " NOT", " been configured via xrootd.monitor directive"); + m_log.Say(" pfc g-stream has", m_gstream ? "" : " NOT", " been configured via xrootd.monitor directive\n"); + + // Create the ResourceMonitor and get it ready for starting the main thread function. + if (aOK) + { + m_res_mon = new ResourceMonitor(*m_oss); + m_res_mon->init_before_main(); + } - m_log.Say("------ Proxy File Cache configuration parsing ", aOK ? "completed" : "failed"); + m_log.Say("=====> Proxy file cache configuration parsing ", aOK ? "completed" : "failed"); if (ofsCfg) delete ofsCfg; diff --git a/src/XrdPfc/XrdPfcDecision.hh b/src/XrdPfc/XrdPfcDecision.hh index 3f62c87764b..9e85f8651fb 100644 --- a/src/XrdPfc/XrdPfcDecision.hh +++ b/src/XrdPfc/XrdPfcDecision.hh @@ -19,10 +19,8 @@ //---------------------------------------------------------------------------------- #include -#include -#include -#include "XrdOss/XrdOss.hh" +class XrdOss; class XrdSysError; namespace XrdPfc diff --git a/src/XrdPfc/XrdPfcDirState.cc b/src/XrdPfc/XrdPfcDirState.cc new file mode 100644 index 00000000000..9b777661e82 --- /dev/null +++ b/src/XrdPfc/XrdPfcDirState.cc @@ -0,0 +1,246 @@ +#include "XrdPfcDirState.hh" +#include "XrdPfcPathParseTools.hh" + +#include + +using namespace XrdPfc; + +//---------------------------------------------------------------------------- +//! Constructor +//---------------------------------------------------------------------------- +DirState::DirState() : m_parent(0), m_depth(0) +{} + +//---------------------------------------------------------------------------- +//! Constructor +//! @param DirState parent directory +//---------------------------------------------------------------------------- +DirState::DirState(DirState *parent) : + m_parent(parent), + m_depth(m_parent->m_depth + 1) +{} + +//---------------------------------------------------------------------------- +//! Constructor +//! @param parent parent DirState object +//! @param dname name of this directory only, no slashes, no extras. +//---------------------------------------------------------------------------- +DirState::DirState(DirState *parent, const std::string &dname) : + DirStateBase(dname), + m_parent(parent), + m_depth(m_parent->m_depth + 1) +{} + +//---------------------------------------------------------------------------- +//! Internal function called from find_dir or find_path_tok +//! @param dir subdir name +//---------------------------------------------------------------------------- +DirState *DirState::create_child(const std::string &dir) +{ + std::pair ir = m_subdirs.insert(std::make_pair(dir, DirState(this, dir))); + return &ir.first->second; +} + +//---------------------------------------------------------------------------- +//! Internal function called from find_path +//! @param dir subdir name +//---------------------------------------------------------------------------- +DirState *DirState::find_path_tok(PathTokenizer &pt, int pos, bool create_subdirs, + DirState **last_existing_dir) +{ + if (pos == pt.get_n_dirs()) + return this; + + DirState *ds = nullptr; + + DsMap_i i = m_subdirs.find(pt.m_dirs[pos]); + + if (i != m_subdirs.end()) + { + ds = &i->second; + if (last_existing_dir) + *last_existing_dir = ds; + } + else if (create_subdirs) + { + ds = create_child(pt.m_dirs[pos]); + } + + if (ds) + return ds->find_path_tok(pt, pos + 1, create_subdirs, last_existing_dir); + + return nullptr; +} + +//---------------------------------------------------------------------------- +//! Recursive function to find DirState with given absolute dir path +//! @param path full path to parse +//! @param max_depth directory depth to which to descend (value < 0 means full descent) +//! @param parse_as_lfn +//! @param create_subdirs +DirState *DirState::find_path(const std::string &path, int max_depth, bool parse_as_lfn, + bool create_subdirs, DirState **last_existing_dir) +{ + PathTokenizer pt(path, max_depth, parse_as_lfn); + + if (last_existing_dir) + *last_existing_dir = this; + + return find_path_tok(pt, 0, create_subdirs, last_existing_dir); +} + +//---------------------------------------------------------------------------- +//! Non recursive function to find an entry in this directory only. +//! @param dir subdir name @param bool create the subdir in this DirsStat +//! @param create_subdirs if true and the dir is not found, a new DirState +//! child is created +DirState *DirState::find_dir(const std::string &dir, + bool create_subdirs) +{ + DsMap_i i = m_subdirs.find(dir); + + if (i != m_subdirs.end()) + return &i->second; + + if (create_subdirs) + return create_child(dir); + + return nullptr; +} + +//---------------------------------------------------------------------------- +//! Propagate usages to parents after initial directory scan. +//! Called from ResourceMonitor::perform_initial_scan() +//---------------------------------------------------------------------------- +void DirState::upward_propagate_initial_scan_usages() +{ + DirUsage &here = m_here_usage; + DirUsage &subdirs = m_recursive_subdir_usage; + + for (auto & [name, daughter] : m_subdirs) + { + daughter.upward_propagate_initial_scan_usages(); + + DirUsage &dhere = daughter.m_here_usage; + DirUsage &dsubdirs = daughter.m_recursive_subdir_usage; + + here.m_NDirectories += 1; + + subdirs.m_StBlocks += dhere.m_StBlocks + dsubdirs.m_StBlocks; + subdirs.m_NFiles += dhere.m_NFiles + dsubdirs.m_NFiles; + subdirs.m_NDirectories += dhere.m_NDirectories + dsubdirs.m_NDirectories; + } +} + +//---------------------------------------------------------------------------- +//! Propagate stat to parents +//! Called from ResourceMonitor::heart_beat() +//---------------------------------------------------------------------------- +void DirState::upward_propagate_stats_and_times() +{ + for (DsMap_i i = m_subdirs.begin(); i != m_subdirs.end(); ++i) + { + i->second.upward_propagate_stats_and_times(); + + m_recursive_subdir_stats.AddUp(i->second.m_recursive_subdir_stats); + m_recursive_subdir_stats.AddUp(i->second.m_here_stats); + // nothing to do for m_here_stats. + + m_recursive_subdir_usage.update_last_times(i->second.m_recursive_subdir_usage); + m_recursive_subdir_usage.update_last_times(i->second.m_here_usage); + } +} + +void DirState::apply_stats_to_usages() +{ + for (DsMap_i i = m_subdirs.begin(); i != m_subdirs.end(); ++i) + { + i->second.apply_stats_to_usages(); + } + m_here_usage.update_from_stats(m_here_stats); + m_recursive_subdir_usage.update_from_stats(m_recursive_subdir_stats); +} + +//---------------------------------------------------------------------------- +//! Reset current transaction statistics. +//! Called from ... to be seen if needed at all XXXX +//---------------------------------------------------------------------------- +void DirState::reset_stats() +{ + for (DsMap_i i = m_subdirs.begin(); i != m_subdirs.end(); ++i) + { + i->second.reset_stats(); + } + m_here_stats.Reset(); + m_recursive_subdir_stats.Reset(); +} + +int DirState::count_dirs_to_level(int max_depth) const +{ + int n_dirs = 1; + if (m_depth < max_depth) + { + for (auto & [name, ds] : m_subdirs) + { + n_dirs += ds.count_dirs_to_level(max_depth); + } + } + return n_dirs; +} + +//---------------------------------------------------------------------------- +//! Recursive print of statistics. Called if defined in pfc configuration. +//! +//---------------------------------------------------------------------------- +void DirState::dump_recursively(const char *name, int max_depth) const +{ + printf("%*d %s usage_here=%lld usage_sub=%lld usage_total=%lld num_ios=%d duration=%d b_hit=%lld b_miss=%lld b_byps=%lld b_wrtn=%lld\n", + 2 + 2 * m_depth, m_depth, name, + 512 * m_here_usage.m_StBlocks, 512 * m_recursive_subdir_usage.m_StBlocks, + 512 * (m_here_usage.m_StBlocks + m_recursive_subdir_usage.m_StBlocks), + // XXXXX here_stats or sum up? or both? + m_here_stats.m_NumIos, m_here_stats.m_Duration, + m_here_stats.m_BytesHit, m_here_stats.m_BytesMissed, m_here_stats.m_BytesBypassed, + m_here_stats.m_BytesWritten); + + if (m_depth < max_depth) + { + for (auto & [name, ds] : m_subdirs) + { + ds.dump_recursively(name.c_str(), max_depth); + } + } +} + + +//============================================================================== +// DataFsState +//============================================================================== + +void DataFsState::upward_propagate_stats_and_times() +{ + m_root.upward_propagate_stats_and_times(); +} + +void DataFsState::apply_stats_to_usages() +{ + m_usage_update_time = time(0); + m_root.apply_stats_to_usages(); +} + +void DataFsState::reset_stats() +{ + m_root.reset_stats(); + m_stats_reset_time = time(0); +} + +void DataFsState::dump_recursively(int max_depth) const +{ + if (max_depth < 0) + max_depth = 4096; + + printf("DataFsState::dump_recursively delta_t = %lld, max_dump_depth = %d\n", + (long long)(m_usage_update_time - m_stats_reset_time), max_depth); + + m_root.dump_recursively("root", max_depth); +} diff --git a/src/XrdPfc/XrdPfcDirState.hh b/src/XrdPfc/XrdPfcDirState.hh new file mode 100644 index 00000000000..39f6952a89e --- /dev/null +++ b/src/XrdPfc/XrdPfcDirState.hh @@ -0,0 +1,204 @@ +#ifndef __XRDPFC_DIRSTATE_HH__ +#define __XRDPFC_DIRSTATE_HH__ + +#include "XrdPfcStats.hh" + +#include +#include +#include + + +//============================================================================== +// Manifest: +//------------------------------------------------------------------------------ +// - Data-holding struct DirUsage -- complementary to Stats. +// - Base classes for DirState and DataFsState, shared between in-memory +// tree form and snap-shot vector form. +// - Forward declatation of structs for DirState export in vector form: +// - struct DirStateElement \_ for stats and usages snapshot +// - struct DataFsSnapshot / +// - struct DirPurgeElement \_ for purge snapshot +// - struct DataFsPurgeshot / +// Those are in another file so the object file can be included in the +// dedicated binary for processing of the binary dumps. +// - class DirState -- state of a directory, including current delta-stats. +// - class DataFSState -- manager of the DirState tree, starting from root (as in "/"). +// +// Structs for DirState export in vector form (DirStateElement and DataFsSnapshot) +// are declared in XrdPfcDirStateSnapshot.hh. + +//============================================================================== + + +namespace XrdPfc +{ +class PathTokenizer; + +//============================================================================== +// Data-holding struct DirUsage -- complementary to Stats. +//============================================================================== + +struct DirUsage +{ + time_t m_LastOpenTime = 0; + time_t m_LastCloseTime = 0; + long long m_StBlocks = 0; + int m_NFilesOpen = 0; + int m_NFiles = 0; + int m_NDirectories = 0; + + DirUsage() = default; + + DirUsage(const DirUsage& s) = default; + + DirUsage& operator=(const DirUsage&) = default; + + DirUsage(const DirUsage &a, const DirUsage &b) : + m_LastOpenTime (std::max(a.m_LastOpenTime, b.m_LastOpenTime)), + m_LastCloseTime (std::max(a.m_LastCloseTime, b.m_LastCloseTime)), + m_StBlocks (a.m_StBlocks + b.m_StBlocks), + m_NFilesOpen (a.m_NFilesOpen + b.m_NFilesOpen), + m_NFiles (a.m_NFiles + b.m_NFiles), + m_NDirectories (a.m_NDirectories + b.m_NDirectories) + {} + + void update_from_stats(const DirStats& s) + { + m_StBlocks += s.m_StBlocksAdded - s.m_StBlocksRemoved; + m_NFilesOpen += s.m_NFilesOpened - s.m_NFilesClosed; + m_NFiles += s.m_NFilesCreated - s.m_NFilesRemoved; + m_NDirectories += s.m_NDirectoriesCreated - s.m_NDirectoriesRemoved; + } + + void update_last_times(const DirUsage& u) + { + m_LastOpenTime = std::max(m_LastOpenTime, u.m_LastOpenTime); + m_LastCloseTime = std::max(m_LastCloseTime, u.m_LastCloseTime); + } +}; + + +//============================================================================== +// Base classes, shared between in-memory tree form and snap-shot vector form. +//============================================================================== + +struct DirStateBase +{ + std::string m_dir_name; + + DirStateBase() {} + DirStateBase(const std::string &dname) : m_dir_name(dname) {} +}; + +struct DataFsStateBase +{ + time_t m_usage_update_time = 0; + time_t m_stats_reset_time = 0; + + long long m_disk_total = 0; // In bytes, from Oss::StatVS() on space data + long long m_disk_used = 0; // "" + long long m_file_usage = 0; // Calculate usage by data files in the cache + long long m_meta_total = 0; // In bytes, from Oss::StatVS() on space meta + long long m_meta_used = 0; // "" +}; + + +//============================================================================== +// Structs for DirState export in vector form +//============================================================================== + +struct DirStateElement; +struct DataFsSnapshot; + +struct DirPurgeElement; +struct DataFsPurgeshot; + + +//============================================================================== +// DirState +//============================================================================== + +struct DirState : public DirStateBase +{ + typedef std::map DsMap_t; + typedef DsMap_t::iterator DsMap_i; + + DirStats m_here_stats; + DirStats m_recursive_subdir_stats; + + DirUsage m_here_usage; + DirUsage m_recursive_subdir_usage; + + // This should be optional, only if needed and only up to some max level. + // Preferably stored in some extrnal vector (as AccessTokens are) and indexed from here. + // DirStats m_purge_stats; // here + subdir, running avg., as per purge params + // DirStats m_report_stats; // here + subdir, reset after sshot dump + + DirState *m_parent = nullptr; + DsMap_t m_subdirs; + int m_depth; + bool m_scanned = false; // set to true after files in this directory are scanned. + + void init(); + + DirState* create_child(const std::string &dir); + + DirState* find_path_tok(PathTokenizer &pt, int pos, bool create_subdirs, + DirState **last_existing_dir = nullptr); + + // --- public part --- + + DirState(); + + DirState(DirState *parent); + + DirState(DirState *parent, const std::string& dname); + + DirState* get_parent() { return m_parent; } + + DirState* find_path(const std::string &path, int max_depth, bool parse_as_lfn, bool create_subdirs, + DirState **last_existing_dir = nullptr); + + DirState* find_dir(const std::string &dir, bool create_subdirs); + + // initial scan support + void upward_propagate_initial_scan_usages(); + + // stat support + void upward_propagate_stats_and_times(); + void apply_stats_to_usages(); + void reset_stats(); + + int count_dirs_to_level(int max_depth) const; + + void dump_recursively(const char *name, int max_depth) const; +}; + + +//============================================================================== +// DataFsState +//============================================================================== + +struct DataFsState : public DataFsStateBase +{ + DirState m_root; + + DataFsState() : m_root() {} + + DirState* get_root() { return & m_root; } + + DirState* find_dirstate_for_lfn(const std::string& lfn, DirState **last_existing_dir = nullptr) + { + return m_root.find_path(lfn, -1, true, true, last_existing_dir); + } + + void upward_propagate_stats_and_times(); + void apply_stats_to_usages(); + void reset_stats(); + + void dump_recursively(int max_depth) const; +}; + +} + +#endif diff --git a/src/XrdPfc/XrdPfcDirStateSnapshot.cc b/src/XrdPfc/XrdPfcDirStateSnapshot.cc new file mode 100644 index 00000000000..2bc1422c285 --- /dev/null +++ b/src/XrdPfc/XrdPfcDirStateSnapshot.cc @@ -0,0 +1,121 @@ +#include "XrdPfcDirStateSnapshot.hh" +#include "XrdPfcPathParseTools.hh" + +#include "XrdOuc/XrdOucJson.hh" + +#include +#include +#include + + +// Redefine to also support ordered_json ... we want to keep variable order in JSON save files. +#define PFC_DEFINE_TYPE_NON_INTRUSIVE(Type, ...) \ + inline void to_json(nlohmann::json &nlohmann_json_j, const Type &nlohmann_json_t) { \ + NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) \ + } \ + inline void from_json(const nlohmann::json &nlohmann_json_j, Type &nlohmann_json_t) { \ + NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM, __VA_ARGS__)) \ + } \ + inline void to_json(nlohmann::ordered_json &nlohmann_json_j, const Type &nlohmann_json_t) { \ + NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) \ + } \ + inline void from_json(const nlohmann::ordered_json &nlohmann_json_j, Type &nlohmann_json_t) { \ + NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM, __VA_ARGS__)) \ + } + +namespace XrdPfc +{ +PFC_DEFINE_TYPE_NON_INTRUSIVE(DirStats, + m_NumIos, m_Duration, m_BytesHit, m_BytesMissed, m_BytesBypassed, m_BytesWritten, m_StBlocksAdded, m_NCksumErrors, + m_StBlocksRemoved, m_NFilesOpened, m_NFilesClosed, m_NFilesCreated, m_NFilesRemoved, m_NDirectoriesCreated, m_NDirectoriesRemoved) +PFC_DEFINE_TYPE_NON_INTRUSIVE(DirUsage, + m_LastOpenTime, m_LastCloseTime, m_StBlocks, m_NFilesOpen, m_NFiles, m_NDirectories) +PFC_DEFINE_TYPE_NON_INTRUSIVE(DirStateElement, + m_dir_name, m_stats, m_usage, + m_parent, m_daughters_begin, m_daughters_end) +PFC_DEFINE_TYPE_NON_INTRUSIVE(DataFsSnapshot, + m_usage_update_time, m_stats_reset_time, m_disk_total, m_disk_used, m_file_usage, m_meta_total, m_meta_used, + m_dir_states) +} + +namespace +{ +// Open file for writing, throw exception on failure. +void open_ofstream(std::ofstream &ofs, const std::string &fname, const char *pfx = nullptr) +{ + ofs.open(fname, std::ofstream::trunc); + if (!ofs) + { + char m[2048]; + snprintf(m, 2048, "%s%sError opening %s for write: %m", pfx ? pfx : "", pfx ? " " : "", fname.c_str()); + throw std::runtime_error(m); + } +} +} + +using namespace XrdPfc; + +void DataFsSnapshot::write_json_file(const std::string &fname, bool include_preamble) +{ + // Throws exception on failed file-open. + + std::ofstream ofs; + open_ofstream(ofs, fname, __func__); + + if (include_preamble) + { + ofs << "{ \"dirstate_snapshot\": "; + } + + nlohmann::ordered_json j; + to_json(j, *this); + + ofs << std::setw(1); + ofs << j; + + if (include_preamble) + { + ofs << " }"; + } + + ofs << "\n"; + ofs.close(); +} + +void DataFsSnapshot::dump() +{ + nlohmann::ordered_json j; // = *this; + to_json(j, *this); + std::cout << j.dump(3) << "\n"; +} + +// DataFsPurgeshot + +int DataFsPurgeshot::find_dir_entry_from_tok(int entry, PathTokenizer &pt, int pos, int *last_existing_entry) const +{ + if (pos == pt.get_n_dirs()) + return entry; + + const DirPurgeElement &dpe = m_dir_vec[entry]; + for (int i = dpe.m_daughters_begin; i != dpe.m_daughters_end; ++i) + { + if (m_dir_vec[i].m_dir_name == pt.get_dir(pos)) { + return find_dir_entry_from_tok(i, pt, pos + 1, last_existing_entry); + } + } + if (last_existing_entry) + *last_existing_entry = entry; + return -1; +} + +int DataFsPurgeshot::find_dir_entry_for_dir_path(const std::string &dir_path) const +{ + PathTokenizer pt(dir_path, -1, false); + return find_dir_entry_from_tok(0, pt, 0, nullptr); +} + +const DirUsage* DataFsPurgeshot::find_dir_usage_for_dir_path(const std::string &dir_path) const +{ + int entry = find_dir_entry_for_dir_path(dir_path); + return entry >= 0 ? &m_dir_vec[entry].m_usage : nullptr; +} diff --git a/src/XrdPfc/XrdPfcDirStateSnapshot.hh b/src/XrdPfc/XrdPfcDirStateSnapshot.hh new file mode 100644 index 00000000000..045f5854fba --- /dev/null +++ b/src/XrdPfc/XrdPfcDirStateSnapshot.hh @@ -0,0 +1,94 @@ +#ifndef __XRDPFC_DIRSTATESNAPSHOT_HH__ +#define __XRDPFC_DIRSTATESNAPSHOT_HH__ + +#include "XrdPfcDirState.hh" + +#include + +//============================================================================== +// Structs for DirState export in vector form +//============================================================================== + +namespace XrdPfc +{ + +// For usage / stat reporting + +struct DirStateElement : public DirStateBase +{ + DirStats m_stats; + DirUsage m_usage; + + int m_parent = -1; + int m_daughters_begin = -1, m_daughters_end = -1; + + DirStateElement() {} + DirStateElement(const DirState &b, int parent) : + DirStateBase(b), + m_stats(b.m_here_stats, b.m_recursive_subdir_stats), + m_usage(b.m_here_usage, b.m_recursive_subdir_usage), + m_parent(parent) + {} +}; + +struct DataFsSnapshot : public DataFsStateBase +{ + std::vector m_dir_states; + + DataFsSnapshot() {} + DataFsSnapshot(const DataFsState &b) : + DataFsStateBase(b) + {} + + // Import of data into vector form is implemented in ResourceMonitor + // in order to avoid dependence of this struct on DirState. + + void write_json_file(const std::string &fname, bool include_preamble); + void dump(); +}; + +// For purge planning & execution + +struct DirPurgeElement : public DirStateBase +{ + DirUsage m_usage; + + int m_parent = -1; + int m_daughters_begin = -1, m_daughters_end = -1; + + DirPurgeElement() {} + DirPurgeElement(const DirState &b, int parent) : + DirStateBase(b), + m_usage(b.m_here_usage, b.m_recursive_subdir_usage), + m_parent(parent) + {} +}; + +struct DataFsPurgeshot : public DataFsStateBase +{ + long long m_bytes_to_remove = 0; + long long m_estimated_writes_from_writeq = 0; + + bool m_space_based_purge = false; + bool m_age_based_purge = false; + + std::vector m_dir_vec; + // could have parallel vector of DirState* ... or store them in the DirPurgeElement. + // requires some interlock / ref-counting with the source tree. + // or .... just block DirState removal for the duration of the purge :) Yay. + + DataFsPurgeshot() {} + DataFsPurgeshot(const DataFsState &b) : + DataFsStateBase(b) + {} + + int find_dir_entry_from_tok(int entry, PathTokenizer &pt, int pos, int *last_existing_entry) const; + + int find_dir_entry_for_dir_path(const std::string &dir_path) const; + + const DirUsage* find_dir_usage_for_dir_path(const std::string &dir_path) const; +}; + +} + +#endif diff --git a/src/XrdPfc/XrdPfcFPurgeState.cc b/src/XrdPfc/XrdPfcFPurgeState.cc new file mode 100644 index 00000000000..3aa3f50773a --- /dev/null +++ b/src/XrdPfc/XrdPfcFPurgeState.cc @@ -0,0 +1,202 @@ +#include "XrdPfcFPurgeState.hh" +#include "XrdPfcFsTraversal.hh" +#include "XrdPfcInfo.hh" +#include "XrdPfc.hh" +#include "XrdPfcTrace.hh" + +#include "XrdOuc/XrdOucEnv.hh" +#include "XrdOuc/XrdOucUtils.hh" +#include "XrdOss/XrdOss.hh" +#include "XrdOss/XrdOssAt.hh" + +// Temporary, extensive purge tracing +// #define TRACE_PURGE(x) TRACE(Debug, x) +// #define TRACE_PURGE(x) std::cout << "PURGE " << x << "\n" +#define TRACE_PURGE(x) + +using namespace XrdPfc; + +namespace +{ + XrdSysTrace* GetTrace() { return Cache::GetInstance().GetTrace(); } +} + +const char *FPurgeState::m_traceID = "Purge"; + +//---------------------------------------------------------------------------- +//! Constructor. +//---------------------------------------------------------------------------- +FPurgeState::FPurgeState(long long iNBytesReq, XrdOss &oss) : + m_oss(oss), + m_nStBlocksReq((iNBytesReq >> 9) + 1ll), m_nStBlocksAccum(0), m_nStBlocksTotal(0), + m_tMinTimeStamp(0), m_tMinUVKeepTimeStamp(0) +{ + +} + +//---------------------------------------------------------------------------- +//! Move remaing entires to the member map. +//! This is used for cold files and for files collected from purge plugin (really?). +//---------------------------------------------------------------------------- +void FPurgeState::MoveListEntriesToMap() +{ + for (list_i i = m_flist.begin(); i != m_flist.end(); ++i) + { + m_fmap.insert(std::make_pair(i->time, *i)); + } + m_flist.clear(); +} + +//---------------------------------------------------------------------------- +//! Open info file. Look at the UV stams and last access time. +//! Store the file in sorted map or in a list.s +//! @param fname name of cache-info file +//! @param Info object +//! @param stat of the given file +//! +//---------------------------------------------------------------------------- +void FPurgeState::CheckFile(const FsTraversal &fst, const char *fname, Info &info, struct stat &fstat) +{ + static const char *trc_pfx = "FPurgeState::CheckFile "; + + long long nblocks = fstat.st_blocks; + time_t atime; + if (!info.GetLatestDetachTime(atime)) + { + // cinfo file does not contain any known accesses, use fstat.mtime instead. + TRACE(Debug, trc_pfx << "could not get access time for " << fst.m_current_path << fname << ", using mtime from stat instead."); + atime = fstat.st_mtime; + } + // TRACE(Dump, trc_pfx << "checking " << fname << " accessTime " << atime); + + m_nStBlocksTotal += nblocks; + + // Could remove aged-out / uv-keep-failed files here ... or in the calling function that + // can aggreagate info for all files in the directory. + + // For now keep using 0 time as this is used in the purge loop to make sure we continue even if enough + // disk-space has been freed. + + if (m_tMinTimeStamp > 0 && atime < m_tMinTimeStamp) + { + m_flist.push_back(PurgeCandidate(fst.m_current_path, fname, nblocks, 0)); + m_nStBlocksAccum += nblocks; + } + else if (m_tMinUVKeepTimeStamp > 0 && + Cache::Conf().does_cschk_have_missing_bits(info.GetCkSumState()) && + info.GetNoCkSumTimeForUVKeep() < m_tMinUVKeepTimeStamp) + { + m_flist.push_back(PurgeCandidate(fst.m_current_path, fname, nblocks, 0)); + m_nStBlocksAccum += nblocks; + } + else if (m_nStBlocksAccum < m_nStBlocksReq || (!m_fmap.empty() && atime < m_fmap.rbegin()->first)) + { + m_fmap.insert(std::make_pair(atime, PurgeCandidate(fst.m_current_path, fname, nblocks, atime))); + m_nStBlocksAccum += nblocks; + + // remove newest files from map if necessary + while (!m_fmap.empty() && m_nStBlocksAccum - m_fmap.rbegin()->second.nStBlocks >= m_nStBlocksReq) + { + m_nStBlocksAccum -= m_fmap.rbegin()->second.nStBlocks; + m_fmap.erase(--(m_fmap.rbegin().base())); + } + } +} + +void FPurgeState::ProcessDirAndRecurse(FsTraversal &fst) +{ + static const char *trc_pfx = "FPurgeState::ProcessDirAndRecurse "; + + for (auto it = fst.m_current_files.begin(); it != fst.m_current_files.end(); ++it) + { + // Check if the file is currently opened / purge-protected is done before unlinking of the file. + const std::string &f_name = it->first; + const std::string i_name = f_name + Info::s_infoExtension; + + XrdOssDF *fh = nullptr; + Info cinfo(GetTrace()); + + // XXX Note, the initial scan now uses stat information only! + + if (! it->second.has_both()) { + // cinfo or data file is missing. What do we do? Erase? + // Should really be checked in some other "consistency" traversal. + continue; + } + + if (fst.open_at_ro(i_name.c_str(), fh) == XrdOssOK && + cinfo.Read(fh, fst.m_current_path.c_str(), i_name.c_str())) + { + CheckFile(fst, i_name.c_str(), cinfo, it->second.stat_data); + } + else + { + TRACE(Warning, trc_pfx << "can't open or read " << fst.m_current_path << i_name << ", err " << XrdSysE2T(errno) << "; purging."); + fst.unlink_at(i_name.c_str()); + fst.unlink_at(f_name.c_str()); + // generate purge event or not? or just flag possible discrepancy? + // should this really be done in some other consistency-check traversal? + } + fst.close_delete(fh); + + // Protected top-directories are skipped. + } + + std::vector dirs; + dirs.swap(fst.m_current_dirs); + for (auto &dname : dirs) + { + if (fst.cd_down(dname)) + { + ProcessDirAndRecurse(fst); + fst.cd_up(); + } + } +} + +bool FPurgeState::TraverseNamespace(const char *root_path) +{ + bool success_p = true; + + FsTraversal fst(m_oss); + fst.m_protected_top_dirs.insert("pfc-stats"); // XXXX This should come from config. Also: N2N? + // Also ... this onoly applies to /, not any root_path + if (fst.begin_traversal(root_path)) + { + ProcessDirAndRecurse(fst); + } + else + { + // Fail startup, can't open /. + success_p = false; + } + fst.end_traversal(); + + return success_p; +} + +/* +void FPurgeState::UnlinkInfoAndData(const char *fname, long long nblocks, XrdOssDF *iOssDF) +{ + fname[fname_len - m_info_ext_len] = 0; + if (nblocks > 0) + { + if ( ! Cache.GetInstance().IsFileActiveOrPurgeProtected(dataPath)) + { + m_n_purged++; + m_bytes_purged += nblocks; + } else + { + m_n_purge_protected++; + m_bytes_purge_protected += nblocks; + m_dir_state->add_usage_purged(nblocks); + // XXXX should also tweak other stuff? + fname[fname_len - m_info_ext_len] = '.'; + return; + } + } + m_oss_at.Unlink(*iOssDF, fname); + fname[fname_len - m_info_ext_len] = '.'; + m_oss_at.Unlink(*iOssDF, fname); +} +*/ diff --git a/src/XrdPfc/XrdPfcFPurgeState.hh b/src/XrdPfc/XrdPfcFPurgeState.hh new file mode 100644 index 00000000000..a3167a4248a --- /dev/null +++ b/src/XrdPfc/XrdPfcFPurgeState.hh @@ -0,0 +1,77 @@ +#ifndef __XRDPFC_FPURGESTATE_HH__ +#define __XRDPFC_FPURGESTATE_HH__ + +#include +#include +#include +#include + +#include + +class XrdOss; + +namespace XrdPfc { + +class Info; +class FsTraversal; + +//============================================================================== +// FPurgeState +//============================================================================== + +class FPurgeState +{ +public: + struct PurgeCandidate // unknown meaning, "file that is candidate for purge", PurgeCandidate would be better. + { + std::string path; + long long nStBlocks; + time_t time; + + PurgeCandidate(const std::string &dname, const char *fname, long long n, time_t t) : + path(dname + fname), nStBlocks(n), time(t) + {} + }; + + using list_t = std::list; + using list_i = list_t::iterator; + using map_t = std::multimap; + using map_i = map_t::iterator; + +private: + XrdOss &m_oss; + + long long m_nStBlocksReq; + long long m_nStBlocksAccum; + long long m_nStBlocksTotal; + time_t m_tMinTimeStamp; + time_t m_tMinUVKeepTimeStamp; + + static const char *m_traceID; + + list_t m_flist; // list of files to be removed unconditionally + map_t m_fmap; // map of files that are purge candidates + +public: + FPurgeState(long long iNBytesReq, XrdOss &oss); + + map_t &refMap() { return m_fmap; } + list_t &refList() { return m_flist; } + + void setMinTime(time_t min_time) { m_tMinTimeStamp = min_time; } + time_t getMinTime() const { return m_tMinTimeStamp; } + void setUVKeepMinTime(time_t min_time) { m_tMinUVKeepTimeStamp = min_time; } + long long getNStBlocksTotal() const { return m_nStBlocksTotal; } + long long getNBytesTotal() const { return 512ll * m_nStBlocksTotal; } + + void MoveListEntriesToMap(); + + void CheckFile(const FsTraversal &fst, const char *fname, Info &info, struct stat &fstat); + + void ProcessDirAndRecurse(FsTraversal &fst); + bool TraverseNamespace(const char *root_path); +}; + +} // namespace XrdPfc + +#endif diff --git a/src/XrdPfc/XrdPfcFSctl.cc b/src/XrdPfc/XrdPfcFSctl.cc index 2298bfd75b5..bde0b5840de 100644 --- a/src/XrdPfc/XrdPfcFSctl.cc +++ b/src/XrdPfc/XrdPfcFSctl.cc @@ -49,7 +49,7 @@ XrdPfcFSctl::XrdPfcFSctl(XrdPfc::Cache &cInst, XrdSysLogger *logP) : myCache(cInst), hProc(0), Log(logP, "PfcFsctl"), sysTrace(cInst.GetTrace()), m_traceID("PfcFSctl") {} - + /******************************************************************************/ /* C o n f i g u r e */ /******************************************************************************/ @@ -64,11 +64,11 @@ bool XrdPfcFSctl::Configure(const char *CfgFN, hProc = (XrdOfsHandle*)envP->GetPtr("XrdOfsHandle*"); return hProc != 0; } - + /******************************************************************************/ /* F S c t l [ F i l e ] */ /******************************************************************************/ - + int XrdPfcFSctl::FSctl(const int cmd, int alen, const char *args, @@ -76,7 +76,7 @@ int XrdPfcFSctl::FSctl(const int cmd, XrdOucErrInfo &eInfo, const XrdSecEntity *client) { - eInfo.setErrInfo(ENOTSUP, "File based fstcl not supported for a cache."); + eInfo.setErrInfo(ENOTSUP, "File based fstcl not supported for a cache."); return SFS_ERROR; } @@ -95,14 +95,14 @@ int XrdPfcFSctl::FSctl(const int cmd, // Verify command // if (cmd != SFS_FSCTL_PLUGXC) - {eInfo.setErrInfo(EIDRM, "None-cache command issued to a cache."); + {eInfo.setErrInfo(EIDRM, "None-cache command issued to a cache."); return SFS_ERROR; } // Very that we have a command // - if (!xeq || args.Arg1Len < 1) - {eInfo.setErrInfo(EINVAL, "Missing cache command or argument."); + if (!xeq || args.Arg1Len < 1) + {eInfo.setErrInfo(EINVAL, "Missing cache command or argument."); return SFS_ERROR; } diff --git a/src/XrdPfc/XrdPfcFile.cc b/src/XrdPfc/XrdPfcFile.cc index 178d1205c5b..2901ee94aac 100644 --- a/src/XrdPfc/XrdPfcFile.cc +++ b/src/XrdPfc/XrdPfcFile.cc @@ -18,21 +18,23 @@ #include "XrdPfcFile.hh" +#include "XrdPfc.hh" +#include "XrdPfcResourceMonitor.hh" #include "XrdPfcIO.hh" #include "XrdPfcTrace.hh" -#include -#include -#include -#include + #include "XrdCl/XrdClLog.hh" #include "XrdCl/XrdClConstants.hh" #include "XrdCl/XrdClFile.hh" -#include "XrdSys/XrdSysPthread.hh" #include "XrdSys/XrdSysTimer.hh" #include "XrdOss/XrdOss.hh" #include "XrdOuc/XrdOucEnv.hh" #include "XrdSfs/XrdSfsInterface.hh" -#include "XrdPfc.hh" + +#include +#include +#include +#include using namespace XrdPfc; @@ -67,6 +69,7 @@ File::File(const std::string& path, long long iOffset, long long iFileSize) : m_state_cond(0), m_block_size(0), m_num_blocks(0), + m_resmon_token(-1), m_prefetch_state(kOff), m_prefetch_read_cnt(0), m_prefetch_hit_cnt(0), @@ -80,7 +83,7 @@ File::~File() TRACEF(Debug, "~File() close info "); m_info_file->Close(); delete m_info_file; - m_info_file = NULL; + m_info_file = nullptr; } if (m_data_file) @@ -88,7 +91,13 @@ File::~File() TRACEF(Debug, "~File() close output "); m_data_file->Close(); delete m_data_file; - m_data_file = NULL; + m_data_file = nullptr; + } + + if (m_resmon_token >= 0) + { + // Last update of file stats has been sent from the final Sync. + Cache::ResMon().register_file_close(m_resmon_token, time(0), m_stats); } TRACEF(Debug, "~File() ended, prefetch score = " << m_prefetch_score); @@ -134,22 +143,28 @@ void File::initiate_emergency_shutdown() cache()->DeRegisterPrefetchFile(this); } } - } //------------------------------------------------------------------------------ -Stats File::DeltaStatsFromLastCall() +void File::check_delta_stats() { - // Not locked, only used from Cache / Purge thread. - - Stats delta = m_last_stats; - - m_last_stats = m_stats.Clone(); - - delta.DeltaToReference(m_last_stats); + // Called under m_state_cond lock. + // BytesWritten indirectly trigger an unconditional merge through periodic Sync(). + if (m_delta_stats.BytesRead() >= m_resmon_report_threshold) + report_and_merge_delta_stats(); +} - return delta; +void File::report_and_merge_delta_stats() +{ + // Called under m_state_cond lock. + struct stat s; + m_data_file->Fstat(&s); + m_delta_stats.m_StBlocksAdded = s.st_blocks - m_st_blocks; + m_st_blocks = s.st_blocks; + Cache::ResMon().register_file_update_stats(m_resmon_token, m_delta_stats); + m_stats.AddUp(m_delta_stats); + m_delta_stats.Reset(); } //------------------------------------------------------------------------------ @@ -278,8 +293,8 @@ bool File::FinalizeSyncBeforeExit() { if ( ! m_writes_during_sync.empty() || m_non_flushed_cnt > 0 || ! m_detach_time_logged) { - Stats loc_stats = m_stats.Clone(); - m_cfi.WriteIOStatDetach(loc_stats); + report_and_merge_delta_stats(); + m_cfi.WriteIOStatDetach(m_stats); m_detach_time_logged = true; m_in_sync = true; TRACEF(Debug, "FinalizeSyncBeforeExit requesting sync to write detach stats"); @@ -309,7 +324,7 @@ void File::AddIO(IO *io) { m_io_set.insert(io); io->m_attach_time = now; - m_stats.IoAttach(); + m_delta_stats.IoAttach(); insert_remote_location(loc); @@ -348,7 +363,7 @@ void File::RemoveIO(IO *io) ++m_current_io; } - m_stats.IoDetach(now - io->m_attach_time); + m_delta_stats.IoDetach(now - io->m_attach_time); m_io_set.erase(mi); --m_ios_in_detach; @@ -375,7 +390,11 @@ bool File::Open() static const char *tpfx = "Open() "; - TRACEF(Dump, tpfx << "open file for disk cache"); + TRACEF(Dump, tpfx << "entered"); + + // Before touching anything, check with ResourceMonitor if a scan is in progress. + // This function will wait internally if needed until it is safe to proceed. + Cache::ResMon().CrossCheckIfScanIsInProgress(m_filename, m_state_cond); const Configuration &conf = Cache::GetInstance().RefConfiguration(); @@ -412,7 +431,7 @@ bool File::Open() return false; } - myEnv.Put("oss.asize", "64k"); // TODO: Calculate? Get it from configuration? Do not know length of access lists ... + myEnv.Put("oss.asize", "64k"); // Advisory, block-map and access list lengths vary. myEnv.Put("oss.cgroup", conf.m_meta_space.c_str()); if ((res = myOss.Create(myUser, ifn.c_str(), 0600, myEnv, XRDOSS_mkpath)) != XrdOssOK) { @@ -448,6 +467,7 @@ bool File::Open() TRACEF(Warning, tpfx << "Basic sanity checks on data file failed, resetting info file, truncating data file."); m_cfi.ResetAllAccessStats(); m_data_file->Ftruncate(0); + Cache::ResMon().register_file_purge(m_filename, data_stat.st_blocks); } } @@ -460,6 +480,7 @@ bool File::Open() initialize_info_file = true; m_cfi.ResetAllAccessStats(); m_data_file->Ftruncate(0); + Cache::ResMon().register_file_purge(m_filename, data_stat.st_blocks); } else { // TODO: If the file is complete, we don't need to reset net cksums. m_cfi.DowngradeCkSumState(conf.get_cs_Chk()); @@ -475,12 +496,27 @@ bool File::Open() m_info_file->Fsync(); TRACEF(Debug, tpfx << "Creating new file info, data size = " << m_file_size << " num blocks = " << m_cfi.GetNBlocks()); } + else + { + if (futimens(m_info_file->getFD(), NULL)) { + TRACEF(Error, tpfx << "failed setting modification time " << ERRNO_AND_ERRSTR(errno)); + } + } m_cfi.WriteIOStatAttach(); m_state_cond.Lock(); m_block_size = m_cfi.GetBufferSize(); m_num_blocks = m_cfi.GetNBlocks(); m_prefetch_state = (m_cfi.IsComplete()) ? kComplete : kStopped; // Will engage in AddIO(). + + m_data_file->Fstat(&data_stat); + m_st_blocks = data_stat.st_blocks; + + m_resmon_token = Cache::ResMon().register_file_open(m_filename, time(0), data_existed); + m_resmon_report_threshold = std::min(std::max(200ll * 1024, m_file_size / 50), 500ll * 1024 * 1024); + // m_resmon_report_threshold_scaler; // something like 10% of original threshold, to adjust + // actual threshold based on return values from register_file_update_stats(). + m_state_cond.UnLock(); return true; @@ -672,7 +708,11 @@ int File::Read(IO *io, char* iUserBuff, long long iUserOff, int iUserSize, ReadR { m_state_cond.UnLock(); int ret = m_data_file->Read(iUserBuff, iUserOff, iUserSize); - if (ret > 0) m_stats.AddBytesHit(ret); + if (ret > 0) { + XrdSysCondVarHelper _lck(m_state_cond); + m_delta_stats.AddBytesHit(ret); + check_delta_stats(); + } return ret; } @@ -701,7 +741,11 @@ int File::ReadV(IO *io, const XrdOucIOVec *readV, int readVnum, ReadReqRH *rh) { m_state_cond.UnLock(); int ret = m_data_file->ReadV(const_cast(readV), readVnum); - if (ret > 0) m_stats.AddBytesHit(ret); + if (ret > 0) { + XrdSysCondVarHelper _lck(m_state_cond); + m_delta_stats.AddBytesHit(ret); + check_delta_stats(); + } return ret; } @@ -930,10 +974,10 @@ int File::ReadOpusCoalescere(IO *io, const XrdOucIOVec *readV, int readVnum, if (read_req->is_complete()) { // Almost like FinalizeReadRequest(read_req) -- but no callout! + m_delta_stats.AddReadStats(read_req->m_stats); + check_delta_stats(); m_state_cond.UnLock(); - m_stats.AddReadStats(read_req->m_stats); - int ret = read_req->return_value(); delete read_req; return ret; @@ -946,7 +990,8 @@ int File::ReadOpusCoalescere(IO *io, const XrdOucIOVec *readV, int readVnum, } else { - m_stats.m_BytesHit += bytes_read; + m_delta_stats.m_BytesHit += bytes_read; + check_delta_stats(); m_state_cond.UnLock(); // !!! No callout. @@ -1051,7 +1096,12 @@ void File::Sync() bool errorp = false; if (ret == XrdOssOK) { - Stats loc_stats = m_stats.Clone(); + Stats loc_stats; + { + XrdSysCondVarHelper _lck(&m_state_cond); + report_and_merge_delta_stats(); + loc_stats = m_stats; + } m_cfi.WriteIOStat(loc_stats); m_cfi.Write(m_info_file, m_filename.c_str()); int cret = m_info_file->Fsync(); @@ -1265,8 +1315,11 @@ void File::FinalizeReadRequest(ReadRequest *rreq) { // called from ProcessBlockResponse() // NOT under lock -- does callout - - m_stats.AddReadStats(rreq->m_stats); + { + XrdSysCondVarHelper _lck(m_state_cond); + m_delta_stats.AddReadStats(rreq->m_stats); + check_delta_stats(); + } rreq->m_rh->Done(rreq->return_value()); delete rreq; @@ -1335,7 +1388,8 @@ void File::ProcessBlockResponse(Block *b, int res) { // Increase ref-count for the writer. inc_ref_count(b); - m_stats.AddWriteStats(b->get_size(), b->get_n_cksum_errors()); + m_delta_stats.AddWriteStats(b->get_size(), b->get_n_cksum_errors()); + // No check for writes, report-and-merge forced during Sync(). cache()->AddWriteTask(b, true); } diff --git a/src/XrdPfc/XrdPfcFile.hh b/src/XrdPfc/XrdPfcFile.hh index 608e5dd73e3..d626ac8260e 100644 --- a/src/XrdPfc/XrdPfcFile.hh +++ b/src/XrdPfc/XrdPfcFile.hh @@ -18,15 +18,15 @@ // along with XRootD. If not, see . //---------------------------------------------------------------------------------- +#include "XrdPfcTypes.hh" +#include "XrdPfcInfo.hh" +#include "XrdPfcStats.hh" + #include "XrdCl/XrdClXRootDResponses.hh" -#include "XrdCl/XrdClDefaultEnv.hh" #include "XrdOuc/XrdOucCache.hh" #include "XrdOuc/XrdOucIOVec.hh" -#include "XrdPfcInfo.hh" -#include "XrdPfcStats.hh" - #include #include #include @@ -42,6 +42,7 @@ class Log; namespace XrdPfc { +class File; class BlockResponseHandler; class DirectResponseHandler; class IO; @@ -50,12 +51,6 @@ struct ReadVBlockListRAM; struct ReadVChunkListRAM; struct ReadVBlockListDisk; struct ReadVChunkListDisk; -} - - -namespace XrdPfc -{ -class File; struct ReadReqRH : public XrdOucCacheIOCB { @@ -102,7 +97,7 @@ struct ChunkRequest char *m_buf; // Where to place the data chunk. long long m_off; // Offset *within* the corresponding block. int m_size; // Size of the data chunk. - + ChunkRequest(ReadRequest *rreq, char *buf, long long off, int size) : m_read_req(rreq), m_buf(buf), m_off(off), m_size(size) {} @@ -244,7 +239,7 @@ public: //! Used in XrdPosixXrootd::Close() //---------------------------------------------------------------------- bool ioActive(IO *io); - + //---------------------------------------------------------------------- //! \brief Flags that detach stats should be written out in final sync. //! Called from CacheIO upon Detach. @@ -271,20 +266,18 @@ public: //! Log path const char* lPath() const; - std::string& GetLocalPath() { return m_filename; } + const std::string& GetLocalPath() const { return m_filename; } XrdSysError* GetLog(); XrdSysTrace* GetTrace(); - long long GetFileSize() { return m_file_size; } + long long GetFileSize() const { return m_file_size; } void AddIO(IO *io); int GetPrefetchCountOnIO(IO *io); void StopPrefetchingOnIO(IO *io); void RemoveIO(IO *io); - Stats DeltaStatsFromLastCall(); - std::string GetRemoteLocations() const; const Info::AStat* GetLastAccessStats() const { return m_cfi.GetLastAccessStats(); } size_t GetAccessCnt() const { return m_cfi.GetAccessCnt(); } @@ -311,14 +304,14 @@ private: static const char *m_traceID; int m_ref_cnt; //!< number of references from IO or sync - + XrdOssDF *m_data_file; //!< file handle for data file on disk XrdOssDF *m_info_file; //!< file handle for data-info file on disk Info m_cfi; //!< download status of file blocks and access statistics - std::string m_filename; //!< filename of data file on disk - long long m_offset; //!< offset of cached file for block-based / hdfs operation - long long m_file_size; //!< size of cached disk file for block-based operation + const std::string m_filename; //!< filename of data file on disk + const long long m_offset; //!< offset of cached file for block-based / hdfs operation + const long long m_file_size; //!< size of cached disk file for block-based operation // IO objects attached to this file. @@ -350,10 +343,16 @@ private: long long m_block_size; int m_num_blocks; - // Stats + // Stats and ResourceMonitor interface Stats m_stats; //!< cache statistics for this instance - Stats m_last_stats; //!< copy of cache stats during last purge cycle, used for per directory stat reporting + Stats m_delta_stats; //!< unreported updates to stats + long long m_st_blocks; //!< last reported st_blocks + long long m_resmon_report_threshold; + int m_resmon_token; //!< token used in communication with the ResourceMonitor + + void check_delta_stats(); + void report_and_merge_delta_stats(); std::set m_remote_locations; //!< Gathered in AddIO / ioUpdate / ioActive. void insert_remote_location(const std::string &loc); @@ -370,7 +369,7 @@ private: void inc_prefetch_read_cnt(int prc) { if (prc) { m_prefetch_read_cnt += prc; calc_prefetch_score(); } } void inc_prefetch_hit_cnt (int phc) { if (phc) { m_prefetch_hit_cnt += phc; calc_prefetch_score(); } } - void calc_prefetch_score() { m_prefetch_score = float(m_prefetch_hit_cnt) / m_prefetch_read_cnt; } + void calc_prefetch_score() { m_prefetch_score = float(m_prefetch_hit_cnt) / m_prefetch_read_cnt; } // Helpers diff --git a/src/XrdPfc/XrdPfcFsTraversal.cc b/src/XrdPfc/XrdPfcFsTraversal.cc new file mode 100644 index 00000000000..552341cea4e --- /dev/null +++ b/src/XrdPfc/XrdPfcFsTraversal.cc @@ -0,0 +1,215 @@ +#include "XrdPfcFsTraversal.hh" +#include "XrdPfcDirState.hh" +#include "XrdPfc.hh" +#include "XrdPfcTrace.hh" + +#include "XrdOuc/XrdOucEnv.hh" +#include "XrdOss/XrdOssApi.hh" + +// #define TRACE_PURGE(x) std::cout << "PURGE " << x << "\n" +#define TRACE_PURGE(x) + +using namespace XrdPfc; + +namespace +{ + XrdSysTrace* GetTrace() { return Cache::GetInstance().GetTrace(); } +} + +const char *FsTraversal::m_traceID = "FsTraversal"; + +//---------------------------------------------------------------------------- + +FsTraversal::FsTraversal(XrdOss &oss) : + m_oss(oss), m_oss_at(oss) +{} + +FsTraversal::~FsTraversal() +{} + +int FsTraversal::close_delete(XrdOssDF *&ossDF) +{ + int ret = 0; + if (ossDF) { + ret = ossDF->Close(); + delete ossDF; + } + ossDF = nullptr; + return ret; +} + +//---------------------------------------------------------------------------- + +bool FsTraversal::begin_traversal(DirState *root, const char *root_path) +{ + m_maintain_dirstate = true; + m_root_dir_state = m_dir_state = root; + + bool ret = begin_traversal(root_path); + + return ret; +} + +bool FsTraversal::begin_traversal(const char *root_path) +{ + static const char *trc_pfx = "FsTraversal::begin_traversal "; + + assert(root_path && strlen(root_path) > 0 && root_path[0] == '/'); + + m_rel_dir_level = 0; + m_current_path = root_path; + + XrdOssDF* dhp = m_oss.newDir("PfcFsTraversal"); + if (dhp->Opendir(root_path, m_env) != XrdOssOK) { + delete dhp; + TRACE(Error, trc_pfx << "could not opendir [" << root_path << "], " << XrdSysE2T(errno)); + return false; + } + m_dir_handle_stack.push_back(dhp); + + TRACE_PURGE("FPurgeState::begin_traversal cur_path '" << m_current_path << "', rel_level=" << m_rel_dir_level); + + slurp_current_dir(); + return true; +} + +void FsTraversal::end_traversal() +{ + TRACE_PURGE("FPurgeState::end_traversal reporting for '" << m_current_path << "', re_level=" << m_rel_dir_level); + + for (auto &dhp : m_dir_handle_stack) { + dhp->Close(); + delete dhp; + } + m_dir_handle_stack.clear(); + m_current_path.clear(); + m_current_dirs.clear(); + m_current_files.clear(); + + m_rel_dir_level = -1; + m_root_dir_state = m_dir_state = nullptr; + m_maintain_dirstate = false; +} + +//---------------------------------------------------------------------------- + +bool FsTraversal::cd_down(const std::string &dir_name) +{ + static const char *trc_pfx = "FsTraversal::cd_down "; + + XrdOssDF *dhp = 0; + if (m_oss_at.Opendir(*m_dir_handle_stack.back(), dir_name.c_str(), m_env, dhp) != XrdOssOK) { + delete dhp; + TRACE(Error, trc_pfx << "could not opendir [" << m_current_path << dir_name << "], " << XrdSysE2T(errno)); + return false; + } + m_dir_handle_stack.push_back(dhp); + + ++m_rel_dir_level; + m_current_path.append(dir_name); + m_current_path.append("/"); + + if (m_maintain_dirstate) + m_dir_state = m_dir_state->find_dir(dir_name, true); + + slurp_current_dir(); + return true; +} + +void FsTraversal::cd_up() +{ + m_current_dirs.clear(); + m_current_files.clear(); + + m_dir_handle_stack.back()->Close(); + delete m_dir_handle_stack.back(); + m_dir_handle_stack.pop_back(); + + if (m_maintain_dirstate) + m_dir_state = m_dir_state->get_parent(); + + m_current_path.erase(m_current_path.find_last_of('/', m_current_path.size() - 2) + 1); + --m_rel_dir_level; +} + +//---------------------------------------------------------------------------- + +void FsTraversal::slurp_current_dir() +{ + static const char *trc_pfx = "FsTraversal::slurp_current_dir "; + + XrdOssDF &dh = *m_dir_handle_stack.back(); + slurp_dir_ll(dh, m_rel_dir_level, m_current_path.c_str(), trc_pfx); +} + +//---------------------------------------------------------------------------- + +void FsTraversal::slurp_dir_ll(XrdOssDF &dh, int dir_level, const char *path, const char *trc_pfx) +{ + // Low-level implementation of slurp dir. + + char fname[256]; + struct stat fstat; + + dh.StatRet(&fstat); + + const char *info_ext = Info::s_infoExtension; + const size_t info_ext_len = Info::s_infoExtensionLen; + + m_current_dirs.clear(); + m_current_files.clear(); + + while (true) + { + int rc = dh.Readdir(fname, 256); + + if (rc == -ENOENT) + { + TRACE_PURGE(" Skipping ENOENT dir entry [" << fname << "]."); + continue; + } + if (rc != XrdOssOK) + { + TRACE(Error, trc_pfx << "Readdir error at " << path << ", err " << XrdSysE2T(-rc) << "."); + break; + } + + TRACE_PURGE(" Readdir [" << fname << "]"); + + if (fname[0] == 0) + { + TRACE_PURGE(" Finished reading dir [" << path << "]. Break loop."); + break; + } + if (fname[0] == '.' && (fname[1] == 0 || (fname[1] == '.' && fname[2] == 0))) + { + TRACE_PURGE(" Skipping here or parent dir [" << fname << "]. Continue loop."); + continue; + } + + if (S_ISDIR(fstat.st_mode)) + { + if (dir_level == 0 && m_protected_top_dirs.find(fname) != m_protected_top_dirs.end()) + { + // Skip protected top-directories. + continue; + } + m_current_dirs.push_back(fname); + } + else + { + size_t fname_len = strlen(fname); + + if (fname_len > info_ext_len && strncmp(&fname[fname_len - info_ext_len], info_ext, info_ext_len) == 0) + { + // truncate ".cinfo" away + fname[fname_len - info_ext_len] = 0; + m_current_files[fname].set_cinfo(fstat); + } + else + { + m_current_files[fname].set_data(fstat); + } + } + } +} diff --git a/src/XrdPfc/XrdPfcFsTraversal.hh b/src/XrdPfc/XrdPfcFsTraversal.hh new file mode 100644 index 00000000000..183a4de376a --- /dev/null +++ b/src/XrdPfc/XrdPfcFsTraversal.hh @@ -0,0 +1,85 @@ +#ifndef __XRDPFC_FSTRAVERSAL_HH__ +#define __XRDPFC_FSTRAVERSAL_HH__ + +#include "XrdOss/XrdOssAt.hh" +#include "XrdOuc/XrdOucEnv.hh" + +#include +#include +#include +#include +#include + +class XrdOss; +class XrdOssDF; + +namespace XrdPfc { + +class DirState; + +class FsTraversal +{ +public: + struct FilePairStat { + struct stat stat_data, stat_cinfo; + bool has_data = false; + bool has_cinfo = false; + + void set_data (const struct stat &s) { stat_data = s; has_data = true; } + void set_cinfo(const struct stat &s) { stat_cinfo = s; has_cinfo = true; } + bool has_both() const { return has_data && has_cinfo; } + }; + +protected: + XrdOss &m_oss; + XrdOssAt m_oss_at; + XrdOucEnv m_env; + + bool m_maintain_dirstate = false; + +public: + DirState *m_root_dir_state = nullptr; + DirState *m_dir_state = nullptr; // current DirState + + int m_rel_dir_level = -1; // dir level relative to root, 0 ~ at root + std::string m_current_path; // Includes trailing '/' -- needed for printouts and PurgeCandidate creation. + + // Hmmh ... need a stack of those ... or not, if doing tail recursion. + // Can not, OpenDirAt descend can not be like that, ie, i will need the old handle. + std::vector m_dir_handle_stack; + + std::vector m_current_dirs; // swap out into local scope before recursion + std::map m_current_files; // clear when done + + std::set m_protected_top_dirs; // directories that will NOT be traversed at relative level 0. + + static const char *m_traceID; + + void slurp_current_dir(); + void slurp_dir_ll(XrdOssDF &dh, int dir_level, const char *path, const char *trc_pfx); + +public: + FsTraversal(XrdOss &oss); + ~FsTraversal(); + + bool begin_traversal(DirState *root, const char *root_path); + bool begin_traversal(const char *root_path); + void end_traversal(); + + bool cd_down(const std::string &dir_name); + void cd_up(); + + int open_at_ro(const char* fname, XrdOssDF *&ossDF) { + return m_oss_at.OpenRO(*m_dir_handle_stack.back(), fname, m_env, ossDF); + } + int unlink_at(const char* fname) { + return m_oss_at.Unlink(*m_dir_handle_stack.back(), fname); + } + int close_delete(XrdOssDF *&ossDF); + + XrdOucEnv& default_env() { return m_env; } +}; + +} + +#endif diff --git a/src/XrdPfc/XrdPfcIO.hh b/src/XrdPfc/XrdPfcIO.hh index 1e5f5885b71..d4a8ac419b5 100644 --- a/src/XrdPfc/XrdPfcIO.hh +++ b/src/XrdPfc/XrdPfcIO.hh @@ -5,8 +5,6 @@ class XrdSysTrace; #include "XrdPfc.hh" #include "XrdOuc/XrdOucCache.hh" -#include "XrdCl/XrdClDefaultEnv.hh" -#include "XrdSys/XrdSysPthread.hh" #include namespace XrdPfc diff --git a/src/XrdPfc/XrdPfcIOFile.cc b/src/XrdPfc/XrdPfcIOFile.cc index d0b3ebef169..77a1160b7f0 100644 --- a/src/XrdPfc/XrdPfcIOFile.cc +++ b/src/XrdPfc/XrdPfcIOFile.cc @@ -16,20 +16,20 @@ // along with XRootD. If not, see . //---------------------------------------------------------------------------------- -#include -#include - -#include "XrdSys/XrdSysError.hh" -#include "XrdSfs/XrdSfsInterface.hh" -#include "XrdSys/XrdSysPthread.hh" - #include "XrdPfcIOFile.hh" #include "XrdPfcStats.hh" #include "XrdPfcTrace.hh" +#include "XrdOss/XrdOss.hh" +#include "XrdSfs/XrdSfsInterface.hh" +#include "XrdSys/XrdSysError.hh" + #include "XrdOuc/XrdOucEnv.hh" #include "XrdOuc/XrdOucPgrwUtils.hh" +#include +#include + using namespace XrdPfc; //______________________________________________________________________________ diff --git a/src/XrdPfc/XrdPfcIOFile.hh b/src/XrdPfc/XrdPfcIOFile.hh index 7e771dbbfb2..1aebc2cecdd 100644 --- a/src/XrdPfc/XrdPfcIOFile.hh +++ b/src/XrdPfc/XrdPfcIOFile.hh @@ -20,7 +20,6 @@ #include -#include "XrdSys/XrdSysPthread.hh" #include "XrdPfcIO.hh" #include "XrdPfc.hh" #include "XrdPfcStats.hh" @@ -72,7 +71,7 @@ public: //! \brief Abstract virtual method of XrdPfc::IO //! Called to destruct the IO object after it is no longer used. void DetachFinalize() override; - + int Fstat(struct stat &sbuff) override; long long FSize() override; diff --git a/src/XrdPfc/XrdPfcIOFileBlock.cc b/src/XrdPfc/XrdPfcIOFileBlock.cc index deb7ee2a38c..7b72500c0b5 100644 --- a/src/XrdPfc/XrdPfcIOFileBlock.cc +++ b/src/XrdPfc/XrdPfcIOFileBlock.cc @@ -16,23 +16,24 @@ // along with XRootD. If not, see . //---------------------------------------------------------------------------------- -#include -#include -#include -#include -#include -#include - #include "XrdPfcIOFileBlock.hh" #include "XrdPfc.hh" #include "XrdPfcStats.hh" #include "XrdPfcTrace.hh" -#include "XrdSys/XrdSysError.hh" +#include "XrdOss/XrdOss.hh" #include "XrdSfs/XrdSfsInterface.hh" +#include "XrdSys/XrdSysError.hh" #include "XrdOuc/XrdOucEnv.hh" +#include +#include +#include +#include +#include +#include + using namespace XrdPfc; //______________________________________________________________________________ diff --git a/src/XrdPfc/XrdPfcIOFileBlock.hh b/src/XrdPfc/XrdPfcIOFileBlock.hh index 31a3c5e77d6..8b5effc3eae 100644 --- a/src/XrdPfc/XrdPfcIOFileBlock.hh +++ b/src/XrdPfc/XrdPfcIOFileBlock.hh @@ -21,7 +21,6 @@ #include #include "XrdOuc/XrdOucCache.hh" -#include "XrdSys/XrdSysPthread.hh" #include "XrdPfcIO.hh" @@ -56,7 +55,7 @@ public: using XrdOucCacheIO::Read; int Read(char *Buffer, long long Offset, int Length) override; - + int Fstat(struct stat &sbuff) override; long long FSize() override; diff --git a/src/XrdPfc/XrdPfcInfo.cc b/src/XrdPfc/XrdPfcInfo.cc index d063affd33d..218f175ea6d 100644 --- a/src/XrdPfc/XrdPfcInfo.cc +++ b/src/XrdPfc/XrdPfcInfo.cc @@ -66,7 +66,7 @@ struct FpHelper XrdSysTrace* GetTrace() const { return f_trace; } FpHelper(XrdOssDF* fp, off_t off, XrdSysTrace *trace, const char *tid, const TraceHeader &thdr) : - f_fp(fp), f_off(off), f_trace(trace), m_traceID(tid), f_trace_hdr(thdr) + f_fp(fp), f_off(off), f_trace(trace), m_traceID(tid), f_trace_hdr(thdr) {} // Returns true on error diff --git a/src/XrdPfc/XrdPfcInfo.hh b/src/XrdPfc/XrdPfcInfo.hh index 36a5bc37ad0..9d199dbad78 100644 --- a/src/XrdPfc/XrdPfcInfo.hh +++ b/src/XrdPfc/XrdPfcInfo.hh @@ -18,17 +18,13 @@ // along with XRootD. If not, see . //---------------------------------------------------------------------------------- +#include "XrdPfcTypes.hh" + #include #include -#include +#include #include -#include "XrdSys/XrdSysPthread.hh" -#include "XrdCl/XrdClConstants.hh" -#include "XrdCl/XrdClDefaultEnv.hh" - -#include "XrdPfcTypes.hh" - class XrdOssDF; class XrdCksCalc; class XrdSysTrace; @@ -369,7 +365,7 @@ inline void Info::SetBitWritten(int i) inline void Info::SetBitPrefetch(int i) { if (!m_buff_prefetch) return; - + const int cn = i/8; assert(cn < GetBitvecSizeInBytes()); diff --git a/src/XrdPfc/XrdPfcPathParseTools.hh b/src/XrdPfc/XrdPfcPathParseTools.hh new file mode 100644 index 00000000000..be33eaabdaa --- /dev/null +++ b/src/XrdPfc/XrdPfcPathParseTools.hh @@ -0,0 +1,138 @@ +#ifndef __XRDPFC_PATHPARSETOOLS_HH__ +#define __XRDPFC_PATHPARSETOOLS_HH__ + +#include +#include + +#include +#include + +namespace XrdPfc { + +struct SplitParser +{ + char *f_str; + const char *f_delim; + char *f_state; + bool f_first; + + SplitParser(const std::string &s, const char *d) : + f_str(strdup(s.c_str())), f_delim(d), f_state(0), f_first(true) + {} + ~SplitParser() { free(f_str); } + + char* get_token() + { + if (f_first) { f_first = false; return strtok_r(f_str, f_delim, &f_state); } + else { return strtok_r(0, f_delim, &f_state); } + } + + char* get_reminder_with_delim() + { + if (f_first) { return f_str; } + else { *(f_state - 1) = f_delim[0]; return f_state - 1; } + } + + char *get_reminder() + { + return f_first ? f_str : f_state; + } + + int fill_argv(std::vector &argv) + { + if (!f_first) return 0; + int dcnt = 0; { char *p = f_str; while (*p) { if (*(p++) == f_delim[0]) ++dcnt; } } + argv.reserve(dcnt + 1); + int argc = 0; + char *i = strtok_r(f_str, f_delim, &f_state); + while (i) + { + ++argc; + argv.push_back(i); + // printf(" arg %d : '%s'\n", argc, i); + i = strtok_r(0, f_delim, &f_state); + } + return argc; + } +}; + +struct PathTokenizer : private SplitParser +{ + std::vector m_dirs; + const char *m_reminder; + int m_n_dirs; + + PathTokenizer(const std::string &path, int max_depth, bool parse_as_lfn) : + SplitParser(path, "/"), + m_reminder (0), + m_n_dirs (0) + { + // max_depth - maximum number of directories to extract. If < 0, all path elements + // are extracted (well, up to 4096). The rest is in m_reminder. + // If parse_as_lfn is true store final token into m_reminder, regardless of maxdepth. + // This assumes the last token is a file name (and full path is lfn, including the file name). + + if (max_depth < 0) + max_depth = 4096; + m_dirs.reserve(std::min(8, max_depth)); + + char *t = 0; + for (int i = 0; i < max_depth; ++i) + { + t = get_token(); + if (t == 0) break; + m_dirs.emplace_back(t); + } + if (parse_as_lfn && *get_reminder() == 0 && ! m_dirs.empty()) + { + m_reminder = m_dirs.back(); + m_dirs.pop_back(); + } + else + { + m_reminder = get_reminder(); + } + m_n_dirs = (int) m_dirs.size(); + } + + int get_n_dirs() + { + return m_n_dirs; + } + + const char *get_dir(int pos) + { + if (pos >= m_n_dirs) return 0; + return m_dirs[pos]; + } + + std::string make_path() + { + std::string res; + for (std::vector::iterator i = m_dirs.begin(); i != m_dirs.end(); ++i) + { + res += "/"; + res += *i; + } + if (m_reminder != 0) + { + res += "/"; + res += m_reminder; + } + return res; + } + + void print_debug() + { + printf("PathTokenizer::print_debug size=%d\n", m_n_dirs); + for (int i = 0; i < m_n_dirs; ++i) + { + printf(" %2d: %s\n", i, m_dirs[i]); + } + printf(" rem: %s\n", m_reminder); + } +}; + +} + +#endif diff --git a/src/XrdPfc/XrdPfcPurge.cc b/src/XrdPfc/XrdPfcPurge.cc index 8f5fccc046a..cec0d64ab50 100644 --- a/src/XrdPfc/XrdPfcPurge.cc +++ b/src/XrdPfc/XrdPfcPurge.cc @@ -1,972 +1,183 @@ #include "XrdPfc.hh" +#include "XrdPfcDirStateSnapshot.hh" +#include "XrdPfcResourceMonitor.hh" +#include "XrdPfcFPurgeState.hh" +#include "XrdPfcPurgePin.hh" #include "XrdPfcTrace.hh" -#include -#include - -#include "XrdOuc/XrdOucEnv.hh" -#include "XrdOss/XrdOssAt.hh" -#include "XrdSys/XrdSysTrace.hh" - -using namespace XrdPfc; +#include "XrdOss/XrdOss.hh" -namespace XrdPfc -{ +#include -XrdSysTrace* GetTrace() +namespace { - // needed for logging macros - return Cache::GetInstance().GetTrace(); + XrdSysTrace* GetTrace() { return XrdPfc::Cache::GetInstance().GetTrace(); } + const char *m_traceID = "ResourceMonitor"; } -// Temporary, extensive purge tracing -// #define TRACE_PURGE(x) TRACE(Debug, x) -// #define TRACE_PURGE(x) std::cout << "PURGE " << x << "\n" -#define TRACE_PURGE(x) - //============================================================================== -// DirState +// OldStylePurgeDriver //============================================================================== - -class DirState +namespace XrdPfc { - DirState *m_parent; - - Stats m_stats; // access stats from client reads in this directory (and subdirs) - - long long m_usage; // collected / measured during purge traversal - long long m_usage_extra; // collected from write events in this directory and subdirs - long long m_usage_purged; // amount of data purged from this directory (and subdirectories for leaf nodes) - - // begin purge traversal usage \_ so we can have a good estimate of what came in during the traversal - // end purge traversal usage / (should be small, presumably) - - // quota info, enabled? - - int m_depth; - int m_max_depth; // XXXX Do we need this? Should it be passed in to find functions? - bool m_stat_report; // not used yet - storing of stats requested - - typedef std::map DsMap_t; - typedef DsMap_t::iterator DsMap_i; - - DsMap_t m_subdirs; - - void init() - { - m_usage = 0; - m_usage_extra = 0; - m_usage_purged = 0; - } - - DirState* create_child(const std::string &dir) - { - std::pair ir = m_subdirs.insert(std::make_pair(dir, DirState(this))); - return & ir.first->second; - } - - DirState* find_path_tok(PathTokenizer &pt, int pos, bool create_subdirs) - { - if (pos == pt.get_n_dirs()) return this; - - DsMap_i i = m_subdirs.find(pt.m_dirs[pos]); - - DirState *ds = 0; - - if (i != m_subdirs.end()) - { - ds = & i->second; - } - if (create_subdirs && m_depth < m_max_depth) - { - ds = create_child(pt.m_dirs[pos]); - } - if (ds) return ds->find_path_tok(pt, pos + 1, create_subdirs); - - return 0; - } - -public: - - DirState(int max_depth) : m_parent(0), m_depth(0), m_max_depth(max_depth) - { - init(); - } - - DirState(DirState *parent) : m_parent(parent), m_depth(m_parent->m_depth + 1), m_max_depth(m_parent->m_max_depth) - { - init(); - } - - DirState* get_parent() { return m_parent; } - - void set_usage(long long u) { m_usage = u; m_usage_extra = 0; } - void add_up_stats(const Stats& stats) { m_stats.AddUp(stats); } - void add_usage_purged(long long up) { m_usage_purged += up; } - - DirState* find_path(const std::string &path, int max_depth, bool parse_as_lfn, bool create_subdirs) - { - PathTokenizer pt(path, max_depth, parse_as_lfn); - - return find_path_tok(pt, 0, create_subdirs); - } - - DirState* find_dir(const std::string &dir, bool create_subdirs) - { - DsMap_i i = m_subdirs.find(dir); - - if (i != m_subdirs.end()) return & i->second; - - if (create_subdirs && m_depth < m_max_depth) return create_child(dir); - - return 0; - } - - void reset_stats() - { - m_stats.Reset(); - - for (DsMap_i i = m_subdirs.begin(); i != m_subdirs.end(); ++i) - { - i->second.reset_stats(); - } - } - - void upward_propagate_stats() - { - for (DsMap_i i = m_subdirs.begin(); i != m_subdirs.end(); ++i) - { - i->second.upward_propagate_stats(); - - m_stats.AddUp(i->second.m_stats); - } - - m_usage_extra += m_stats.m_BytesWritten; - } - long long upward_propagate_usage_purged() - { - for (DsMap_i i = m_subdirs.begin(); i != m_subdirs.end(); ++i) - { - m_usage_purged += i->second.upward_propagate_usage_purged(); - } - m_usage -= m_usage_purged; - - long long ret = m_usage_purged; - m_usage_purged = 0; - return ret; - } - - void dump_recursively(const char *name) - { - printf("%*d %s usage=%lld usage_extra=%lld usage_total=%lld num_ios=%d duration=%d b_hit=%lld b_miss=%lld b_byps=%lld b_wrtn=%lld\n", - 2 + 2*m_depth, m_depth, name, m_usage, m_usage_extra, m_usage + m_usage_extra, - m_stats.m_NumIos, m_stats.m_Duration, m_stats.m_BytesHit, m_stats.m_BytesMissed, m_stats.m_BytesBypassed, m_stats.m_BytesWritten); - - for (DsMap_i i = m_subdirs.begin(); i != m_subdirs.end(); ++i) - { - i->second.dump_recursively(i->first.c_str()); - } - } -}; - - -//============================================================================== -// DataFsState -//============================================================================== - -class DataFsState +long long UnlinkPurgeStateFilesInMap(FPurgeState& purgeState, long long bytes_to_remove, const std::string& root_path) { - int m_max_depth; - DirState m_root; - time_t m_prev_time; + static const char *trc_pfx = "UnlinkPurgeStateFilesInMap "; -public: - DataFsState() : - m_max_depth ( Cache::Conf().m_dirStatsStoreDepth ), - m_root ( m_max_depth ), - m_prev_time ( time(0) ) - {} + struct stat fstat; + int protected_cnt = 0; + int deleted_file_count = 0; + long long deleted_st_blocks = 0; + long long protected_st_blocks = 0; + long long st_blocks_to_remove = (bytes_to_remove >> 9) + 1ll; + - int get_max_depth() const { return m_max_depth; } + const auto &cache = Cache::TheOne(); + auto &resmon = Cache::ResMon(); + auto &oss = *cache.GetOss(); - DirState* get_root() { return & m_root; } + TRACE(Info, trc_pfx << "Started, root_path = " << root_path << ", bytes_to_remove = " << bytes_to_remove); - DirState* find_dirstate_for_lfn(const std::string& lfn) + // Loop over map and remove files with oldest values of access time. + for (FPurgeState::map_i it = purgeState.refMap().begin(); it != purgeState.refMap().end(); ++it) { - return m_root.find_path(lfn, m_max_depth, true, true); - } - - void reset_stats() { m_root.reset_stats(); } - void upward_propagate_stats() { m_root.upward_propagate_stats(); } - void upward_propagate_usage_purged() { m_root.upward_propagate_usage_purged(); } - - void dump_recursively() - { - time_t now = time(0); - - printf("DataFsState::dump_recursively epoch = %lld delta_t = %lld max_depth = %d\n", - (long long) now, (long long) (now - m_prev_time), m_max_depth); - - m_prev_time = now; - - m_root.dump_recursively("root"); - } -}; - - -//============================================================================== -// FPurgeState -//============================================================================== - -class FPurgeState -{ -public: - struct FS - { - std::string path; - long long nBytes; - time_t time; - DirState *dirState; - - FS(const std::string &dname, const char *fname, long long n, time_t t, DirState *ds) : - path(dname + fname), nBytes(n), time(t), dirState(ds) - {} - }; - - typedef std::multimap map_t; - typedef map_t::iterator map_i; - - map_t m_fmap; // map of files that are purge candidates - - typedef std::list list_t; - typedef list_t::iterator list_i; - - list_t m_flist; // list of files to be removed unconditionally - - long long nBytesReq; - long long nBytesAccum; - long long nBytesTotal; - time_t tMinTimeStamp; - time_t tMinUVKeepTimeStamp; - - // XrdOss *m_oss; - XrdOssAt m_oss_at; - - // ------------------------------------ - // Directory handling & stat collection - // ------------------------------------ - - DirState *m_dir_state; - std::string m_current_path; // Includes trailing '/' - int m_dir_level; - const int m_max_dir_level_for_stat_collection; // until we honor globs from pfc.dirstats - - std::vector m_dir_names_stack; - std::vector m_dir_usage_stack; - - const char *m_info_ext; - const size_t m_info_ext_len; - XrdSysTrace *m_trace; - - static const char *m_traceID; - - - void begin_traversal(DirState *root, const char *root_path = "/") - { - m_dir_state = root; - m_dir_level = 0; - m_current_path = std::string(root_path); - m_dir_usage_stack.push_back(0); - - TRACE_PURGE("FPurgeState::begin_traversal cur_path '" << m_current_path << "', usage=" << m_dir_usage_stack.back() << ", level=" << m_dir_level); - } - - void end_traversal() - { - TRACE_PURGE("FPurgeState::end_traversal reporting for '" << m_current_path << "', usage=" << m_dir_usage_stack.back() << ", nBytesTotal=" << nBytesTotal << ", level=" << m_dir_level); - - m_dir_state->set_usage(m_dir_usage_stack.back()); - - m_dir_state = 0; - } - - void cd_down(const std::string& dir_name) - { - ++m_dir_level; - - if (m_dir_level <= m_max_dir_level_for_stat_collection) + // Finish when enough space has been freed but not while age-based purging is in progress. + // Those files are marked with time-stamp = 0. + if (st_blocks_to_remove <= 0 && it->first != 0) { - m_dir_usage_stack.push_back(0); - m_dir_state = m_dir_state->find_dir(dir_name, true); + break; } - m_dir_names_stack.push_back(dir_name); - m_current_path.append(dir_name); - m_current_path.append("/"); - } + std::string &infoPath = it->second.path; + std::string dataPath = infoPath.substr(0, infoPath.size() - Info::s_infoExtensionLen); - void cd_up() - { - if (m_dir_level <= m_max_dir_level_for_stat_collection) + if (cache.IsFileActiveOrPurgeProtected(dataPath)) { - long long tail = m_dir_usage_stack.back(); - m_dir_usage_stack.pop_back(); - - TRACE_PURGE("FPurgeState::cd_up reporting for '" << m_current_path << "', usage=" << tail << ", level=" << m_dir_level); - - m_dir_state->set_usage(tail); - m_dir_state = m_dir_state->get_parent(); - - m_dir_usage_stack.back() += tail; + ++protected_cnt; + protected_st_blocks += it->second.nStBlocks; + TRACE(Debug, trc_pfx << "File is active or purge-protected: " << dataPath << " size: " << 512ll * it->second.nStBlocks); + continue; } - // remove trailing / and last dir but keep the new trailing / in place. - m_current_path.erase(m_current_path.find_last_of('/', m_current_path.size() - 2) + 1); - m_dir_names_stack.pop_back(); - - --m_dir_level; - } - - // ------------------------------------------------------------------------ - // ------------------------------------------------------------------------ - - FPurgeState(long long iNBytesReq, XrdOss &oss) : - nBytesReq(iNBytesReq), nBytesAccum(0), nBytesTotal(0), tMinTimeStamp(0), tMinUVKeepTimeStamp(0), - // m_oss(oss), - m_oss_at(oss), - m_dir_state(0), m_dir_level(0), - m_max_dir_level_for_stat_collection(Cache::Conf().m_dirStatsStoreDepth), - m_info_ext(XrdPfc::Info::s_infoExtension), - m_info_ext_len(strlen(XrdPfc::Info::s_infoExtension)), - m_trace(Cache::GetInstance().GetTrace()) - { - m_current_path.reserve(256); - m_dir_names_stack.reserve(32); - m_dir_usage_stack.reserve(m_max_dir_level_for_stat_collection + 1); - } - - // ------------------------------------------------------------------------ - - void setMinTime(time_t min_time) { tMinTimeStamp = min_time; } - time_t getMinTime() const { return tMinTimeStamp; } - void setUVKeepMinTime(time_t min_time) { tMinUVKeepTimeStamp = min_time; } - long long getNBytesTotal() const { return nBytesTotal; } - - void MoveListEntriesToMap() - { - for (list_i i = m_flist.begin(); i != m_flist.end(); ++i) + // remove info file + if (oss.Stat(infoPath.c_str(), &fstat) == XrdOssOK) { - m_fmap.insert(std::make_pair(i->time, *i)); + oss.Unlink(infoPath.c_str()); + TRACE(Dump, trc_pfx << "Removed file: '" << infoPath << "' size: " << 512ll * fstat.st_size); } - m_flist.clear(); - } - - /* - void UnlinkInfoAndData(const char *fname, long long nbytes, XrdOssDF *iOssDF) - { - fname[fname_len - m_info_ext_len] = 0; - if (nbytes > 0) + else { - if ( ! Cache.GetInstance().IsFileActiveOrPurgeProtected(dataPath)) - { - m_n_purged++; - m_bytes_purged += nbytes; - } else - { - m_n_purge_protected++; - m_bytes_purge_protected += nbytes; - m_dir_state->add_usage_purged(nbytes); - // XXXX should also tweak other stuff? - fname[fname_len - m_info_ext_len] = '.'; - return; - } + TRACE(Error, trc_pfx << "Can't locate file " << dataPath); } - m_oss_at.Unlink(*iOssDF, fname); - fname[fname_len - m_info_ext_len] = '.'; - m_oss_at.Unlink(*iOssDF, fname); - } - */ - - void CheckFile(const char *fname, Info &info, struct stat &fstat /*, XrdOssDF *iOssDF*/) - { - static const char *trc_pfx = "FPurgeState::CheckFile "; - long long nbytes = info.GetNDownloadedBytes(); - time_t atime; - if ( ! info.GetLatestDetachTime(atime)) + // remove data file + if (oss.Stat(dataPath.c_str(), &fstat) == XrdOssOK) { - // cinfo file does not contain any known accesses, use fstat.mtime instead. - TRACE(Debug, trc_pfx << "could not get access time for " << m_current_path << fname << ", using mtime from stat instead."); - atime = fstat.st_mtime; - } - // TRACE(Dump, trc_pfx << "checking " << fname << " accessTime " << atime); - - nBytesTotal += nbytes; + st_blocks_to_remove -= it->second.nStBlocks; + deleted_st_blocks += it->second.nStBlocks; + ++deleted_file_count; - m_dir_usage_stack.back() += nbytes; + oss.Unlink(dataPath.c_str()); + TRACE(Dump, trc_pfx << "Removed file: '" << dataPath << "' size: " << 512ll * it->second.nStBlocks << ", time: " << it->first); - // XXXX Should remove aged-out files here ... but I have trouble getting - // the DirState and purge report set up consistently. - // Need some serious code reorganization here. - // Biggest problem is maintaining overall state a traversal state consistently. - // Sigh. - - // In first two cases we lie about FS time (set to 0) to get them all removed early. - // The age-based purge atime would also be good as there should be nothing - // before that time in the map anyway. - // But we use 0 as a test in purge loop to make sure we continue even if enough - // disk-space has been freed. - - if (tMinTimeStamp > 0 && atime < tMinTimeStamp) - { - m_flist.push_back(FS(m_current_path, fname, nbytes, 0, m_dir_state)); - nBytesAccum += nbytes; - } - else if (tMinUVKeepTimeStamp > 0 && - Cache::Conf().does_cschk_have_missing_bits(info.GetCkSumState()) && - info.GetNoCkSumTimeForUVKeep() < tMinUVKeepTimeStamp) - { - m_flist.push_back(FS(m_current_path, fname, nbytes, 0, m_dir_state)); - nBytesAccum += nbytes; - } - else if (nBytesAccum < nBytesReq || ( ! m_fmap.empty() && atime < m_fmap.rbegin()->first)) - { - m_fmap.insert(std::make_pair(atime, FS(m_current_path, fname, nbytes, atime, m_dir_state))); - nBytesAccum += nbytes; - - // remove newest files from map if necessary - while ( ! m_fmap.empty() && nBytesAccum - m_fmap.rbegin()->second.nBytes >= nBytesReq) - { - nBytesAccum -= m_fmap.rbegin()->second.nBytes; - m_fmap.erase(--(m_fmap.rbegin().base())); - } + resmon.register_file_purge(dataPath, it->second.nStBlocks); } } - - void TraverseNamespace(XrdOssDF *iOssDF) + if (protected_cnt > 0) { - static const char *trc_pfx = "FPurgeState::TraverseNamespace "; - - char fname[256]; - struct stat fstat; - XrdOucEnv env; - - TRACE_PURGE("Starting to read dir [" << m_current_path << "], iOssDF->getFD()=" << iOssDF->getFD() << "."); - - iOssDF->StatRet(&fstat); - - while (true) - { - int rc = iOssDF->Readdir(fname, 256); - - if (rc == -ENOENT) { - TRACE_PURGE(" Skipping ENOENT dir entry [" << fname << "]."); - continue; - } - if (rc != XrdOssOK) { - TRACE(Error, trc_pfx << "Readdir error at " << m_current_path << ", err " << XrdSysE2T(-rc) << "."); - break; - } - - TRACE_PURGE(" Readdir [" << fname << "]"); - - if (fname[0] == 0) { - TRACE_PURGE(" Finished reading dir [" << m_current_path << "]. Break loop."); - break; - } - if (fname[0] == '.' && (fname[1] == 0 || (fname[1] == '.' && fname[2] == 0))) { - TRACE_PURGE(" Skipping here or parent dir [" << fname << "]. Continue loop."); - continue; - } - - size_t fname_len = strlen(fname); - XrdOssDF *dfh = 0; - - if (S_ISDIR(fstat.st_mode)) - { - if (m_oss_at.Opendir(*iOssDF, fname, env, dfh) == XrdOssOK) - { - cd_down(fname); TRACE_PURGE(" cd_down -> [" << m_current_path << "]."); - TraverseNamespace(dfh); - cd_up(); TRACE_PURGE(" cd_up -> [" << m_current_path << "]."); - } - else - TRACE(Warning, trc_pfx << "could not opendir [" << m_current_path << fname << "], " << XrdSysE2T(errno)); - } - else if (fname_len > m_info_ext_len && strncmp(&fname[fname_len - m_info_ext_len], m_info_ext, m_info_ext_len) == 0) - { - // Check if the file is currently opened / purge-protected is done before unlinking of the file. - - Info cinfo(m_trace); - - if (m_oss_at.OpenRO(*iOssDF, fname, env, dfh) == XrdOssOK && cinfo.Read(dfh, m_current_path.c_str(), fname)) - { - CheckFile(fname, cinfo, fstat); - } - else - { - TRACE(Warning, trc_pfx << "can't open or read " << m_current_path << fname << ", err " << XrdSysE2T(errno) << "; purging."); - m_oss_at.Unlink(*iOssDF, fname); - fname[fname_len - m_info_ext_len] = 0; - m_oss_at.Unlink(*iOssDF, fname); - } - } - else // XXXX devel debug only, to be removed - { - TRACE_PURGE(" Ignoring [" << fname << "], not a dir or cinfo."); - } - - delete dfh; - } + TRACE(Info, trc_pfx << "Encountered " << protected_cnt << " protected files, sum of their size: " << 512ll * protected_st_blocks); } -}; - -const char *FPurgeState::m_traceID = "Purge"; - - -//============================================================================== -// ResourceMonitor -//============================================================================== - -// Encapsulates local variables used withing the previous mega-function Purge(). -// -// This will be used within the continuously/periodically ran heart-beat / breath -// function ... and then parts of it will be passed to invoked FS scan and purge -// jobs (which will be controlled throught this as well). - -class ResourceMonitor -{ - -}; - - -//============================================================================== -// -//============================================================================== -namespace -{ - -class ScanAndPurgeJob : public XrdJob -{ -public: - ScanAndPurgeJob(const char *desc = "") : XrdJob(desc) {} - - void DoIt() {} // { Cache::GetInstance().ScanAndPurge(); } -}; + TRACE(Info, trc_pfx << "Finished, removed " << deleted_file_count << " data files, removed total size " << 512ll * deleted_st_blocks) + return deleted_st_blocks; } -//============================================================================== -// Cache methods -//============================================================================== +// ------------------------------------------------------------------------------------- -void Cache::copy_out_active_stats_and_update_data_fs_state() +void OldStylePurgeDriver(DataFsPurgeshot &ps) { - static const char *trc_pfx = "copy_out_active_stats_and_update_data_fs_state() "; - - StatsMMap_t updates; - { - XrdSysCondVarHelper lock(&m_active_cond); - - // Slurp in stats from files closed since last cycle. - updates.swap( m_closed_files_stats ); - - for (ActiveMap_i i = m_active.begin(); i != m_active.end(); ++i) - { - if (i->second != 0) - { - updates.insert(std::make_pair(i->first, i->second->DeltaStatsFromLastCall())); + static const char *trc_pfx = "OldStylePurgeDriver "; + const auto &cache = Cache::TheOne(); + const auto &conf = Cache::Conf(); + auto &oss = *cache.GetOss(); + + time_t purge_start = time(0); + + ///////////////////////////////////////////////////////////// + /// PurgePin + ///////////////////////////////////////////////////////////// + PurgePin *purge_pin = cache.GetPurgePin(); + long long std_blocks_removed_by_pin = 0; + if (purge_pin) + { + // set dir stat for each path and calculate nBytes to recover for each path + // return total bytes to recover within the plugin + long long clearVal = purge_pin->GetBytesToRecover(ps); + if (clearVal) + { + TRACE(Debug, "PurgePin remove total " << clearVal << " bytes"); + PurgePin::list_t &dpl = purge_pin->refDirInfos(); + // iterate through the plugin paths + for (PurgePin::list_i ppit = dpl.begin(); ppit != dpl.end(); ++ppit) + { + TRACE(Debug, trc_pfx << "PurgePin scanning dir " << ppit->path.c_str() << " to remove " << ppit->nBytesToRecover << " bytes"); + + FPurgeState fps(ppit->nBytesToRecover, oss); + bool scan_ok = fps.TraverseNamespace(ppit->path.c_str()); + if ( ! scan_ok) { + TRACE(Warning, trc_pfx << "purge-pin scan of directory failed for " << ppit->path); + continue; + } + + fps.MoveListEntriesToMap(); + std_blocks_removed_by_pin += UnlinkPurgeStateFilesInMap(fps, ppit->nBytesToRecover, ppit->path); } } } - m_fs_state->reset_stats(); // XXXX-CKSUM rethink how to do this if we keep some purge entries for next time + ///////////////////////////////////////////////////////////// + /// Default purge + ///////////////////////////////////////////////////////////// - for (StatsMMap_i i = updates.begin(); i != updates.end(); ++i) + // check if the default pargue is still needed after purge pin + long long pin_removed_bytes = std_blocks_removed_by_pin * 512ll; + long long default_purge_blocks_removed = 0; + if (ps.m_bytes_to_remove > pin_removed_bytes) { - DirState *ds = m_fs_state->find_dirstate_for_lfn(i->first); + // init default purge + long long bytes_to_remove = ps.m_bytes_to_remove - pin_removed_bytes; + FPurgeState purgeState(2 * bytes_to_remove, oss); // prepare twice more volume than required - if (ds == 0) + if (ps.m_age_based_purge) { - TRACE(Error, trc_pfx << "Failed finding DirState for file '" << i->first << "'."); - continue; - } - - ds->add_up_stats(i->second); - } - - m_fs_state->upward_propagate_stats(); -} - - -//============================================================================== - -void Cache::ResourceMonitorHeartBeat() -{ - // static const char *trc_pfx = "ResourceMonitorHeartBeat() "; - - // Pause before initial run - sleep(1); - - // XXXX Setup initial / constant stats (total RAM, total disk, ???) - - XrdOucCacheStats &S = Statistics; - XrdOucCacheStats::CacheStats &X = Statistics.X; - - S.Lock(); - - X.DiskSize = m_configuration.m_diskTotalSpace; - - X.MemSize = m_configuration.m_RamAbsAvailable; - - S.UnLock(); - - // XXXX Schedule initial disk scan, time it! - // - // TRACE(Info, trc_pfx << "scheduling intial disk scan."); - // schedP->Schedule( new ScanAndPurgeJob("XrdPfc::ScanAndPurge") ); - // - // bool scan_and_purge_running = true; - - // XXXX Could we really hold last-usage for all files in memory? - - // XXXX Think how to handle disk-full, scan/purge not finishing: - // - start dropping things out of write queue, but only when RAM gets near full; - // - monitoring this then becomes a high-priority job, inner loop with sleep of, - // say, 5 or 10 seconds. - - while (true) - { - time_t heartbeat_start = time(0); - - // TRACE(Info, trc_pfx << "HeartBeat starting ..."); - - // if sumary monitoring configured, pupulate OucCacheStats: - S.Lock(); - - // - available / used disk space (files usage calculated elsewhere (maybe)) - - // - RAM usage - { XrdSysMutexHelper lck(&m_RAM_mutex); - X.MemUsed = m_RAM_used; - X.MemWriteQ = m_RAM_write_queue; + purgeState.setMinTime(time(0) - conf.m_purgeColdFilesAge); } - // - files opened / closed etc - - // do estimate of available space - S.UnLock(); - - // if needed, schedule purge in a different thread. - // purge is: - // - deep scan + gather FSPurgeState - // - actual purge - // - // this thread can continue running and, if needed, stop writing to disk - // if purge is taking too long. - - // think how data is passed / synchronized between this and purge thread - - // !!!! think how stat collection is done and propgated upwards; - // until now it was done once per purge-interval. - // now stats will be added up more often, but purge will be done - // only occasionally. - // also, do we report cumulative values or deltas? cumulative should - // be easier and consistent with summary data. - // still, some are state - like disk usage, num of files. - - // Do we take care of directories that need to be newly added into DirState hierarchy? - // I.e., when user creates new directories and these are covered by either full - // spec or by root + depth declaration. - - int heartbeat_duration = time(0) - heartbeat_start; - - // TRACE(Info, trc_pfx << "HeartBeat finished, heartbeat_duration " << heartbeat_duration); - - // int sleep_time = m_configuration.m_purgeInterval - heartbeat_duration; - int sleep_time = 60 - heartbeat_duration; - if (sleep_time > 0) + if (conf.is_uvkeep_purge_in_effect()) { - sleep(sleep_time); + purgeState.setUVKeepMinTime(time(0) - conf.m_cs_UVKeep); } - } -} - -//============================================================================== - -void Cache::Purge() -{ - static const char *trc_pfx = "Purge() "; - - XrdOucEnv env; - long long disk_usage; - long long estimated_file_usage = m_configuration.m_diskUsageHWM; - - // Pause before initial run - sleep(1); - - m_fs_state = new DataFsState; - - // { PathTokenizer p("/a/b/c/f.root", 2, true); p.deboog(); } - // { PathTokenizer p("/a/b/f.root", 2, true); p.deboog(); } - // { PathTokenizer p("/a/f.root", 2, true); p.deboog(); } - // { PathTokenizer p("/f.root", 2, true); p.deboog(); } - - int age_based_purge_countdown = 0; // enforce on first purge loop entry. - bool is_first = true; - - while (true) - { - time_t purge_start = time(0); + // Make a map of file paths, sorted by access time. + bool scan_ok = purgeState.TraverseNamespace("/"); + if (!scan_ok) { - XrdSysCondVarHelper lock(&m_active_cond); - - m_in_purge = true; + TRACE(Error, trc_pfx << "default purge namespace traversal failed at top-directory, this should not happen."); + return; } - TRACE(Info, trc_pfx << "Started."); - - // Bytes to remove based on total disk usage (d) and file usage (f). - long long bytesToRemove_d = 0, bytesToRemove_f = 0; + TRACE(Debug, trc_pfx << "default purge usage measured from cinfo files " << purgeState.getNBytesTotal() << " bytes."); - // get amount of space to potentially erase based on total disk usage - XrdOssVSInfo sP; // Make sure we start when a clean slate in each loop - if (m_oss->StatVS(&sP, m_configuration.m_data_space.c_str(), 1) < 0) - { - TRACE(Error, trc_pfx << "can't get StatVS for oss space " << m_configuration.m_data_space); - continue; - } - else - { - disk_usage = sP.Total - sP.Free; - TRACE(Debug, trc_pfx << "used disk space " << disk_usage << " bytes."); - - if (disk_usage > m_configuration.m_diskUsageHWM) - { - bytesToRemove_d = disk_usage - m_configuration.m_diskUsageLWM; - } - } - - // estimate amount of space to erase based on file usage - if (m_configuration.are_file_usage_limits_set()) - { - long long estimated_writes_since_last_purge; - { - XrdSysCondVarHelper lock(&m_writeQ.condVar); - - estimated_writes_since_last_purge = m_writeQ.writes_between_purges; - m_writeQ.writes_between_purges = 0; - } - estimated_file_usage += estimated_writes_since_last_purge; - - TRACE(Debug, trc_pfx << "estimated usage by files " << estimated_file_usage << " bytes."); - - bytesToRemove_f = std::max(estimated_file_usage - m_configuration.m_fileUsageNominal, 0ll); - - // Here we estimate fractional usages -- to decide if full scan is necessary before actual purge. - double frac_du = 0, frac_fu = 0; - m_configuration.calculate_fractional_usages(disk_usage, estimated_file_usage, frac_du, frac_fu); - - if (frac_fu > 1.0 - frac_du) - { - bytesToRemove_f = std::max(bytesToRemove_f, disk_usage - m_configuration.m_diskUsageLWM); - } - } - - long long bytesToRemove = std::max(bytesToRemove_d, bytesToRemove_f); - - bool enforce_age_based_purge = false; - if (m_configuration.is_age_based_purge_in_effect() || m_configuration.is_uvkeep_purge_in_effect()) - { - // XXXX ... I could collect those guys in larger vectors (maps?) and do traversal when - // they are empty. - if (--age_based_purge_countdown <= 0) - { - enforce_age_based_purge = true; - age_based_purge_countdown = m_configuration.m_purgeAgeBasedPeriod; - } - } - - bool enforce_traversal_for_usage_collection = is_first; - // XXX Other conditions? Periodic checks? - - copy_out_active_stats_and_update_data_fs_state(); - - TRACE(Debug, trc_pfx << "Precheck:"); - TRACE(Debug, "\tbytes_to_remove_disk = " << bytesToRemove_d << " B"); - TRACE(Debug, "\tbytes_to remove_files = " << bytesToRemove_f << " B (" << (is_first ? "max possible for initial run" : "estimated") << ")"); - TRACE(Debug, "\tbytes_to_remove = " << bytesToRemove << " B"); - TRACE(Debug, "\tenforce_age_based_purge = " << enforce_age_based_purge); - is_first = false; - - long long bytesToRemove_at_start = 0; // set after file scan - int deleted_file_count = 0; - - bool purge_required = (bytesToRemove > 0 || enforce_age_based_purge); - - // XXXX-PurgeOpt Need to retain this state between purges so I can avoid doing - // the traversal more often than really needed. - FPurgeState purgeState(2 * bytesToRemove, *m_oss); // prepare twice more volume than required - - if (purge_required || enforce_traversal_for_usage_collection) - { - // Make a sorted map of file paths sorted by access time. - - if (m_configuration.is_age_based_purge_in_effect()) - { - purgeState.setMinTime(time(0) - m_configuration.m_purgeColdFilesAge); - } - if (m_configuration.is_uvkeep_purge_in_effect()) - { - purgeState.setUVKeepMinTime(time(0) - m_configuration.m_cs_UVKeep); - } - - XrdOssDF* dh = m_oss->newDir(m_configuration.m_username.c_str()); - if (dh->Opendir("/", env) == XrdOssOK) - { - purgeState.begin_traversal(m_fs_state->get_root()); - - purgeState.TraverseNamespace(dh); - - purgeState.end_traversal(); - - dh->Close(); - } - delete dh; dh = 0; - - estimated_file_usage = purgeState.getNBytesTotal(); - - TRACE(Debug, trc_pfx << "actual usage by files " << estimated_file_usage << " bytes."); - - // Adjust bytesToRemove_f and then bytesToRemove based on actual file usage, - // possibly retreating below nominal file usage (but not below baseline file usage). - if (m_configuration.are_file_usage_limits_set()) - { - bytesToRemove_f = std::max(estimated_file_usage - m_configuration.m_fileUsageNominal, 0ll); - - double frac_du = 0, frac_fu = 0; - m_configuration.calculate_fractional_usages(disk_usage, estimated_file_usage, frac_du, frac_fu); - - if (frac_fu > 1.0 - frac_du) - { - bytesToRemove = std::max(bytesToRemove_f, disk_usage - m_configuration.m_diskUsageLWM); - bytesToRemove = std::min(bytesToRemove, estimated_file_usage - m_configuration.m_fileUsageBaseline); - } - else - { - bytesToRemove = std::max(bytesToRemove_d, bytesToRemove_f); - } - } - else - { - bytesToRemove = std::max(bytesToRemove_d, bytesToRemove_f); - } - bytesToRemove_at_start = bytesToRemove; - - TRACE(Debug, trc_pfx << "After scan:"); - TRACE(Debug, "\tbytes_to_remove_disk = " << bytesToRemove_d << " B"); - TRACE(Debug, "\tbytes_to remove_files = " << bytesToRemove_f << " B (measured)"); - TRACE(Debug, "\tbytes_to_remove = " << bytesToRemove << " B"); - TRACE(Debug, "\tenforce_age_based_purge = " << enforce_age_based_purge); - TRACE(Debug, "\tmin_time = " << purgeState.getMinTime()); - - if (enforce_age_based_purge) - { - purgeState.MoveListEntriesToMap(); - } - } - - // Dump statistcs before actual purging so maximum usage values get recorded. - // Should really go to gstream --- and should really go from Heartbeat. - if (m_configuration.is_dir_stat_reporting_on()) - { - m_fs_state->dump_recursively(); - } - - if (purge_required) - { - // Loop over map and remove files with oldest values of access time. - struct stat fstat; - size_t info_ext_len = strlen(Info::s_infoExtension); - int protected_cnt = 0; - long long protected_sum = 0; - for (FPurgeState::map_i it = purgeState.m_fmap.begin(); it != purgeState.m_fmap.end(); ++it) - { - // Finish when enough space has been freed but not while age-based purging is in progress. - // Those files are marked with time-stamp = 0. - if (bytesToRemove <= 0 && ! (enforce_age_based_purge && it->first == 0)) - { - break; - } - - std::string &infoPath = it->second.path; - std::string dataPath = infoPath.substr(0, infoPath.size() - info_ext_len); - - if (IsFileActiveOrPurgeProtected(dataPath)) - { - ++protected_cnt; - protected_sum += it->second.nBytes; - TRACE(Debug, trc_pfx << "File is active or purge-protected: " << dataPath << " size: " << it->second.nBytes); - continue; - } - - // remove info file - if (m_oss->Stat(infoPath.c_str(), &fstat) == XrdOssOK) - { - // cinfo file can be on another oss.space, do not subtract for now. - // Could be relevant for very small block sizes. - // bytesToRemove -= fstat.st_size; - // estimated_file_usage -= fstat.st_size; - // ++deleted_file_count; - - m_oss->Unlink(infoPath.c_str()); - TRACE(Dump, trc_pfx << "Removed file: '" << infoPath << "' size: " << fstat.st_size); - } - - // remove data file - if (m_oss->Stat(dataPath.c_str(), &fstat) == XrdOssOK) - { - bytesToRemove -= it->second.nBytes; - estimated_file_usage -= it->second.nBytes; - ++deleted_file_count; - - m_oss->Unlink(dataPath.c_str()); - TRACE(Dump, trc_pfx << "Removed file: '" << dataPath << "' size: " << it->second.nBytes << ", time: " << it->first); - - if (it->second.dirState != 0) // XXXX This should now always be true. - it->second.dirState->add_usage_purged(it->second.nBytes); - else - TRACE(Error, trc_pfx << "DirState not set for file '" << dataPath << "'."); - } - } - if (protected_cnt > 0) - { - TRACE(Info, trc_pfx << "Encountered " << protected_cnt << " protected files, sum of their size: " << protected_sum); - } - - m_fs_state->upward_propagate_usage_purged(); - } - - { - XrdSysCondVarHelper lock(&m_active_cond); - - m_purge_delay_set.clear(); - m_in_purge = false; - } - - int purge_duration = time(0) - purge_start; - - TRACE(Info, trc_pfx << "Finished, removed " << deleted_file_count << " data files, total size " << - bytesToRemove_at_start - bytesToRemove << ", bytes to remove at end " << bytesToRemove << ", purge duration " << purge_duration); - - int sleep_time = m_configuration.m_purgeInterval - purge_duration; - if (sleep_time > 0) - { - sleep(sleep_time); - } + purgeState.MoveListEntriesToMap(); + default_purge_blocks_removed = UnlinkPurgeStateFilesInMap(purgeState, bytes_to_remove, "/"); } + + // print the total summary + ///////////////////////////////////////////////// + int purge_duration = time(0) - purge_start; + long long total_bytes_removed = (default_purge_blocks_removed + std_blocks_removed_by_pin) * 512ll; + TRACE(Info, trc_pfx << "Finished, removed total size " << total_bytes_removed << ", purge duration " << purge_duration); } -} // end XrdPfc namespace +} // end namespace XrdPfc diff --git a/src/XrdPfc/XrdPfcPurgePin.hh b/src/XrdPfc/XrdPfcPurgePin.hh new file mode 100644 index 00000000000..63e440b5197 --- /dev/null +++ b/src/XrdPfc/XrdPfcPurgePin.hh @@ -0,0 +1,79 @@ +#ifndef __XRDPFC_PURGEPLG_HH__ +#define __XRDPFC_PURGEPLG_HH__ + +#include +#include + +namespace XrdPfc +{ +class DataFsPurgeshot; +class DirUsage; + +//---------------------------------------------------------------------------- +//! Base class for reguesting directory space to obtain. +//---------------------------------------------------------------------------- +class PurgePin +{ +public: + struct DirInfo + { + std::string path; + long long nBytesQuota{0}; + long long nBytesToRecover{0}; + + // internal use by the Cache purge thread. to be revisited, maybe an access token is more appropriate. + const DirUsage* dirUsage{nullptr}; + }; + + typedef std::vector list_t; + typedef list_t::iterator list_i; + +protected: + list_t m_list; + +public: + virtual ~PurgePin() {} + + + //--------------------------------------------------------------------- + //! + //! + //! @return total number of bytes + //--------------------------------------------------------------------- + virtual bool CallPeriodically() { return true; }; + + + //--------------------------------------------------------------------- + //! Provide erase information from directory statistics + //! + //! @param & XrdPfc::DirState vector, exported from the tree version. + // To be revisited -- can have a multi-step approach where + // cache periodically sends udates. + //! + //! @return total number of bytes + //--------------------------------------------------------------------- + virtual long long GetBytesToRecover(const DataFsPurgeshot&) = 0; + + //------------------------------------------------------------------------------ + //! Parse configuration arguments. + //! + //! @param params configuration parameters + //! + //! @return status of configuration + //------------------------------------------------------------------------------ + virtual bool ConfigPurgePin(const char* params) // ?? AMT should this be abstract + { + (void) params; + return true; + } + + //----------------------------------------------- + //! + //! Get quotas for the given paths. Used in the XrdPfc:Cache::Purge() thread. + //! + //------------------------------------------------------------------------------ + list_t &refDirInfos() { return m_list; } +}; +} + +#endif diff --git a/src/XrdPfc/XrdPfcPurgeQuota.cc b/src/XrdPfc/XrdPfcPurgeQuota.cc new file mode 100644 index 00000000000..cb74d84ccc9 --- /dev/null +++ b/src/XrdPfc/XrdPfcPurgeQuota.cc @@ -0,0 +1,137 @@ +#include "XrdPfc.hh" +#include "XrdPfcPurgePin.hh" +#include "XrdPfcDirStateSnapshot.hh" + +#include "XrdOuc/XrdOucEnv.hh" +#include "XrdOuc/XrdOucUtils.hh" +#include "XrdOuc/XrdOucStream.hh" +#include "XrdOuc/XrdOuca2x.hh" + +#include + +class XrdPfcPurgeQuota : public XrdPfc::PurgePin +{ + XrdSysError *log; +public: + XrdPfcPurgeQuota() : log(XrdPfc::Cache::GetInstance().GetLog()) {} + + //---------------------------------------------------------------------------- + //! Set directory statistics + //---------------------------------------------------------------------------- + void InitDirStatesForLocalPaths(const XrdPfc::DataFsPurgeshot &purge_shot) + { + for (list_i it = m_list.begin(); it != m_list.end(); ++it) + { + it->dirUsage = purge_shot.find_dir_usage_for_dir_path(it->path); + } + } + + //---------------------------------------------------------------------------- + //! Provide bytes to erase from dir quota listed in a text file + //---------------------------------------------------------------------------- + long long GetBytesToRecover(const XrdPfc::DataFsPurgeshot &purge_shot) override + { + // setup diskusage for each dir path + InitDirStatesForLocalPaths(purge_shot); + + long long totalToRemove = 0; + // get bytes to remove + for (list_i it = m_list.begin(); it != m_list.end(); ++it) + { + if (it->dirUsage == nullptr) + { + log->Emsg("PurgeQuotaPin--GetBytesToRecover", "directory not found:", it->path.c_str()); + continue; + } + long long cv = 512ll * it->dirUsage->m_StBlocks - it->nBytesQuota; + if (cv > 0) + it->nBytesToRecover = cv; + else + it->nBytesToRecover = 0; + + totalToRemove += it->nBytesToRecover; + } + + return totalToRemove; + } + + //---------------------------------------------------------------------------- + //! Provide bytes to erase from dir quota listed in a text file + //---------------------------------------------------------------------------- + bool ConfigPurgePin(const char *parms) override + { + // retrive configuration file name + if (!parms || !parms[0] || (strlen(parms) == 0)) + { + log->Emsg("ConfigPurgePin", "Quota file not specified."); + return false; + } + log->Emsg("ConfigPurgePin", "Using directory list", parms); + + // parse the file to get directory quotas + const char *config_filename = parms; + const char *theINS = getenv("XRDINSTANCE"); + XrdOucEnv myEnv; + XrdOucStream Config(log, theINS, &myEnv, "=====> PurgeQuota "); + + int fd; + if ((fd = open(config_filename, O_RDONLY, 0)) < 0) + { + log->Emsg("ConfigPurgePin() can't open configuration file ", config_filename); + } + + Config.Attach(fd); + static const char *cvec[] = {"*** pfc purge plugin :", 0}; + Config.Capture(cvec); + + char *var; + while ((var = Config.GetMyFirstWord())) + { + std::string dirpath = var; + const char *val; + + if (!(val = Config.GetWord())) + { + log->Emsg("PurgeQuota plugin", "quota not specified"); + continue; + } + + std::string tmpc = val; + long long quota = 0; + if (::isalpha(*(tmpc.rbegin()))) + { + if (XrdOuca2x::a2sz(*log, "Error getting quota", tmpc.c_str(), "a)) + { + continue; + } + } + else + { + if (XrdOuca2x::a2ll(*log, "Error getting quota", tmpc.c_str(), "a)) + { + continue; + } + } + + DirInfo d; + d.path = dirpath; + d.nBytesQuota = quota; + m_list.push_back(d); + } + + return true; + } +}; + +/******************************************************************************/ +/* XrdPfcGetPurgePin */ +/******************************************************************************/ + +// Return a purge object to use. +extern "C" +{ + XrdPfc::PurgePin *XrdPfcGetPurgePin(XrdSysError &) + { + return new XrdPfcPurgeQuota(); + } +} diff --git a/src/XrdPfc/XrdPfcResourceMonitor.cc b/src/XrdPfc/XrdPfcResourceMonitor.cc new file mode 100644 index 00000000000..69e0b223adf --- /dev/null +++ b/src/XrdPfc/XrdPfcResourceMonitor.cc @@ -0,0 +1,940 @@ +#include "XrdPfcResourceMonitor.hh" +#include "XrdPfc.hh" +#include "XrdPfcPathParseTools.hh" +#include "XrdPfcFsTraversal.hh" +#include "XrdPfcDirState.hh" +#include "XrdPfcDirStateSnapshot.hh" +#include "XrdPfcTrace.hh" +#include "XrdPfcPurgePin.hh" + +#include "XrdOss/XrdOss.hh" + +#include + +// #define RM_DEBUG +#ifdef RM_DEBUG +#define dprintf(...) printf(__VA_ARGS__) +#else +#define dprintf(...) (void(0)) +#endif + +using namespace XrdPfc; + +namespace +{ + XrdSysTrace* GetTrace() { return Cache::GetInstance().GetTrace(); } + const char *m_traceID = "ResourceMonitor"; +} + +//------------------------------------------------------------------------------ + +ResourceMonitor::ResourceMonitor(XrdOss& oss) : + m_fs_state(* new DataFsState), + m_oss(oss) +{} + +ResourceMonitor::~ResourceMonitor() +{ + delete &m_fs_state; +} + +//------------------------------------------------------------------------------ +// Initial scan +//------------------------------------------------------------------------------ + +void ResourceMonitor::CrossCheckIfScanIsInProgress(const std::string &lfn, XrdSysCondVar &cond) +{ + m_dir_scan_mutex.Lock(); + if (m_dir_scan_in_progress) { + m_dir_scan_open_requests.push_back({lfn, cond}); + LfnCondRecord &lcr = m_dir_scan_open_requests.back(); + cond.Lock(); + m_dir_scan_mutex.UnLock(); + while ( ! lcr.f_checked) + cond.Wait(); + cond.UnLock(); + } else { + m_dir_scan_mutex.UnLock(); + } +} + +void ResourceMonitor::process_inter_dir_scan_open_requests(FsTraversal &fst) +{ + m_dir_scan_mutex.Lock(); + while ( ! m_dir_scan_open_requests.empty()) + { + LfnCondRecord &lcr = m_dir_scan_open_requests.front(); + m_dir_scan_mutex.UnLock(); + + cross_check_or_process_oob_lfn(lcr.f_lfn, fst); + lcr.f_cond.Lock(); + lcr.f_checked = true; + lcr.f_cond.Signal(); + lcr.f_cond.UnLock(); + + m_dir_scan_mutex.Lock(); + m_dir_scan_open_requests.pop_front(); + } + m_dir_scan_mutex.UnLock(); +} + +void ResourceMonitor::cross_check_or_process_oob_lfn(const std::string &lfn, FsTraversal &fst) +{ + // Check if lfn has already been processed ... or process it now and mark + // the DirState accordingly (partially processed oob). + static const char *trc_pfx = "cross_check_or_process_oob_lfn() "; + + DirState *last_existing_ds = nullptr; + DirState *ds = m_fs_state.find_dirstate_for_lfn(lfn, &last_existing_ds); + if (ds->m_scanned) + return; + + size_t pos = lfn.find_last_of("/"); + std::string dir = (pos == std::string::npos) ? "" : lfn.substr(0, pos); + + XrdOssDF *dhp = m_oss.newDir(trc_pfx); + if (dhp->Opendir(dir.c_str(), fst.default_env()) == XrdOssOK) + { + fst.slurp_dir_ll(*dhp, ds->m_depth, dir.c_str(), trc_pfx); + + // XXXX clone of function below .... move somewhere? Esp. removal of non-paired files? + DirUsage &here = ds->m_here_usage; + for (auto it = fst.m_current_files.begin(); it != fst.m_current_files.end(); ++it) + { + if (it->second.has_data && it->second.has_cinfo) { + here.m_StBlocks += it->second.stat_data.st_blocks; + here.m_NFiles += 1; + } + } + } + delete dhp; + ds->m_scanned = true; +} + +void ResourceMonitor::scan_dir_and_recurse(FsTraversal &fst) +{ + dprintf("In scan_dir_and_recurse for '%s', size of dir_vec = %d, file_stat_map = %d\n", + fst.m_current_path.c_str(), + (int)fst.m_current_dirs.size(), (int)fst.m_current_files.size()); + + // Breadth first, accumulate into "here", unless it was already scanned via an + // OOB open file request. + if ( ! fst.m_dir_state->m_scanned) + { + DirUsage &here = fst.m_dir_state->m_here_usage; + for (auto it = fst.m_current_files.begin(); it != fst.m_current_files.end(); ++it) + { + dprintf("would be doing something with %s ... has_data=%d, has_cinfo=%d\n", + it->first.c_str(), it->second.has_data, it->second.has_cinfo); + + // XXX Make some of these optional? + // Remove files that do not have both cinfo and data? + // Remove empty directories before even descending? + // Leave this for some consistency pass? + // Note that FsTraversal supports ignored paths ... some details (config, N2N) to be clarified. + + if (it->second.has_data && it->second.has_cinfo) { + here.m_StBlocks += it->second.stat_data.st_blocks; + here.m_NFiles += 1; + } + } + fst.m_dir_state->m_scanned = true; + } + + // Swap-out directories as inter_dir_scan can use the FsTraversal. + std::vector dirs; + dirs.swap(fst.m_current_dirs); + + if (++m_dir_scan_check_counter >= 100) + { + process_inter_dir_scan_open_requests(fst); + m_dir_scan_check_counter = 0; + } + + // Descend into sub-dirs, do not accumulate into recursive_subdir_usage yet. This is done + // in a separate pass to allow for proper accounting of files being opened during the initial scan. + for (auto &dname : dirs) + { + if (fst.cd_down(dname)) + { + scan_dir_and_recurse(fst); + fst.cd_up(); + } + // XXX else try to remove it? + } +} + +bool ResourceMonitor::perform_initial_scan() +{ + // Called after PFC configuration is complete, but before full startup of the daemon. + // Base line usages are accumulated as part of the file-system, traversal. + + update_vs_and_file_usage_info(); + + DirState *root_ds = m_fs_state.get_root(); + FsTraversal fst(m_oss); + fst.m_protected_top_dirs.insert("pfc-stats"); // XXXX This should come from config. Also: N2N? + + if ( ! fst.begin_traversal(root_ds, "/")) + return false; + + { + XrdSysMutexHelper _lock(m_dir_scan_mutex); + m_dir_scan_in_progress = true; + m_dir_scan_check_counter = 0; // recheck oob file-open requests periodically. + } + + scan_dir_and_recurse(fst); + + fst.end_traversal(); + + // We have all directories scanned, available in DirState tree, let all remaining files go + // and then we shall do the upward propagation of usages. + { + XrdSysMutexHelper _lock(m_dir_scan_mutex); + m_dir_scan_in_progress = false; + m_dir_scan_check_counter = 0; + + while ( ! m_dir_scan_open_requests.empty()) + { + LfnCondRecord &lcr = m_dir_scan_open_requests.front(); + lcr.f_cond.Lock(); + lcr.f_checked = true; + lcr.f_cond.Signal(); + lcr.f_cond.UnLock(); + + m_dir_scan_open_requests.pop_front(); + } + } + + // Do upward propagation of usages. + root_ds->upward_propagate_initial_scan_usages(); + m_current_usage_in_st_blocks = root_ds->m_here_usage.m_StBlocks + + root_ds->m_recursive_subdir_usage.m_StBlocks; + update_vs_and_file_usage_info(); + + return true; +} + +//------------------------------------------------------------------------------ +// Processing of queues +//------------------------------------------------------------------------------ + +int ResourceMonitor::process_queues() +{ + static const char *trc_pfx = "process_queues() "; + + // Assure that we pick up only entries that are present now. + // We really want all open records to be processed before file-stats updates + // and all those before the close records. + // Purges are sort of tangential as they really just modify bytes / number + // of files in a directory and do not deal with any persistent file id tokens. + + int n_records = 0; + { + XrdSysMutexHelper _lock(&m_queue_mutex); + n_records += m_file_open_q.swap_queues(); + n_records += m_file_update_stats_q.swap_queues(); + n_records += m_file_close_q.swap_queues(); + n_records += m_file_purge_q1.swap_queues(); + n_records += m_file_purge_q2.swap_queues(); + n_records += m_file_purge_q3.swap_queues(); + ++m_queue_swap_u1; + } + + for (auto &i : m_file_open_q.read_queue()) + { + // i.id: LFN, i.record: OpenRecord + AccessToken &at = token(i.id); + dprintf("process file open for token %d, time %ld -- %s\n", + i.id, i.record.m_open_time, at.m_filename.c_str()); + + // Resolve fname into DirState. + // We could clear the filename after this ... or keep it, should we need it later on. + // For now it is just used for debug printouts. + DirState *last_existing_ds = nullptr; + DirState *ds = m_fs_state.find_dirstate_for_lfn(at.m_filename, &last_existing_ds); + at.m_dir_state = ds; + ds->m_here_stats.m_NFilesOpened += 1; + + // If this is a new file figure out how many new parent dirs got created along the way. + if ( ! i.record.m_existing_file) { + ds->m_here_stats.m_NFilesCreated += 1; + DirState *pp = ds; + while (pp != last_existing_ds) { + pp = pp->get_parent(); + pp->m_here_stats.m_NDirectoriesCreated += 1; + } + } + + ds->m_here_usage.m_LastOpenTime = i.record.m_open_time; + } + + for (auto &i : m_file_update_stats_q.read_queue()) + { + // i.id: token, i.record: Stats + AccessToken &at = token(i.id); + // Stats + DirState *ds = at.m_dir_state; + dprintf("process file update for token %d, %p -- %s\n", + i.id, ds, at.m_filename.c_str()); + + ds->m_here_stats.AddUp(i.record); + m_current_usage_in_st_blocks += i.record.m_StBlocksAdded; + } + + for (auto &i : m_file_close_q.read_queue()) + { + // i.id: token, i.record: CloseRecord + AccessToken &at = token(i.id); + dprintf("process file close for token %d, time %ld -- %s\n", + i.id, i.record.m_close_time, at.m_filename.c_str()); + + DirState *ds = at.m_dir_state; + ds->m_here_stats.m_NFilesClosed += 1; + ds->m_here_usage.m_LastCloseTime = i.record.m_close_time; + + at.clear(); + } + { // Release the AccessToken slots under lock. + XrdSysMutexHelper _lock(&m_queue_mutex); + for (auto &i : m_file_close_q.read_queue()) + m_access_tokens_free_slots.push_back(i.id); + } + + for (auto &i : m_file_purge_q1.read_queue()) + { + // i.id: DirState*, i.record: PurgeRecord + DirState *ds = i.id; + ds->m_here_stats.m_StBlocksRemoved += i.record.m_size_in_st_blocks; + ds->m_here_stats.m_NFilesRemoved += i.record.m_n_files; + m_current_usage_in_st_blocks -= i.record.m_size_in_st_blocks; + } + for (auto &i : m_file_purge_q2.read_queue()) + { + // i.id: directory-path, i.record: PurgeRecord + DirState *ds = m_fs_state.get_root()->find_path(i.id, -1, false, false); + if ( ! ds) { + TRACE(Error, trc_pfx << "DirState not found for directory path '" << i.id << "'."); + // find_path can return the last dir found ... but this clearly isn't a valid purge record. + continue; + } + ds->m_here_stats.m_StBlocksRemoved += i.record.m_size_in_st_blocks; + ds->m_here_stats.m_NFilesRemoved += i.record.m_n_files; + m_current_usage_in_st_blocks -= i.record.m_size_in_st_blocks; + } + for (auto &i : m_file_purge_q3.read_queue()) + { + // i.id: LFN, i.record: size of file in st_blocks + DirState *ds = m_fs_state.get_root()->find_path(i.id, -1, true, false); + if ( ! ds) { + TRACE(Error, trc_pfx << "DirState not found for LFN path '" << i.id << "'."); + continue; + } + ds->m_here_stats.m_StBlocksRemoved += i.record; + ds->m_here_stats.m_NFilesRemoved += 1; + m_current_usage_in_st_blocks -= i.record; + } + + // Read queues / vectors are cleared at swap time. + // We might consider reducing their capacity by half if, say, their usage is below 25%. + + return n_records; +} + +//------------------------------------------------------------------------------ +// Heart beat +//------------------------------------------------------------------------------ + +void ResourceMonitor::heart_beat() +{ + static const char *tpfx = "heart_beat() "; + + const Configuration &conf = Cache::Conf(); + + const int s_queue_proc_interval = 10; + // const s_stats_up_prop_interval = 60; -- for when we have dedicated purge / stat report structs + const int s_sshot_report_interval = 60; // to be bumped (300s?) or made configurable. + const int s_purge_check_interval = 60; + const int s_purge_report_interval = conf.m_purgeInterval; + const int s_purge_cold_files_interval = conf.m_purgeInterval * conf.m_purgeAgeBasedPeriod; + + // initial scan performed as part of config + + time_t now = time(0); + time_t next_queue_proc_time = now + s_queue_proc_interval; + time_t next_sshot_report_time = now + s_sshot_report_interval; + time_t next_purge_check_time = now + s_purge_check_interval; + time_t next_purge_report_time = now + s_purge_report_interval; + time_t next_purge_cold_files_time = now + s_purge_cold_files_interval; + + // XXXXX On initial entry should reclaim space from queues as they might have grown + // very large during the initial scan. + + while (true) + { + time_t start = time(0); + time_t next_event = std::min({ next_queue_proc_time, next_sshot_report_time, + next_purge_check_time, next_purge_report_time, next_purge_cold_files_time }); + + if (next_event > start) + { + unsigned int t_sleep = next_event - start; + TRACE(Debug, tpfx << "sleeping for " << t_sleep << " seconds until the next beat."); + sleep(t_sleep); + } + + // Check if purge has been running and has completed yet. + // For now this is only used to prevent removal of empty leaf directories + // during stat propagation so we do not need to wait for the condition in + // the above sleep. + if (m_purge_task_active) { + MutexHolder _lck(m_purge_task_cond); + if (m_purge_task_complete) { + m_purge_task_active = m_purge_task_complete = false; + } + } + + // Always process the queues. + int n_processed = process_queues(); + next_queue_proc_time += s_queue_proc_interval; + TRACE(Debug, tpfx << "process_queues -- n_records=" << n_processed); + + // Always update basic info on m_fs_state (space, usage, file_usage). + update_vs_and_file_usage_info(); + + now = time(0); + if (next_sshot_report_time <= now) + { + next_sshot_report_time += s_sshot_report_interval; + + // XXXX pass in m_purge_task_active as control over "should empty dirs be purged"; + // Or should this be separate pass or variant in purge? + m_fs_state.upward_propagate_stats_and_times(); + + m_fs_state.apply_stats_to_usages(); + + // Dump statistics before actual purging so maximum usage values get recorded. + // This should dump out binary snapshot into /pfc-stats/, if so configured. + // Also, optionally, json. + // Could also go to gstream but this easily gets too large. + if (conf.is_dir_stat_reporting_on()) + { + const int store_depth = conf.m_dirStatsStoreDepth; + #ifdef RM_DEBUG + const DirState &root_ds = *m_fs_state.get_root(); + dprintf("Snapshot n_dirs=%d, total n_dirs=%d\n", root_ds.count_dirs_to_level(store_depth), + root_ds.m_here_usage.m_NDirectories + root_ds.m_recursive_subdir_usage.m_NDirectories + 1); + #endif + m_fs_state.dump_recursively(store_depth); + + /* + // json dump to std::out for debug purpose + DataFsSnapshot ss(m_fs_state); + ss.m_dir_states.reserve(n_sshot_dirs); + + ss.m_dir_states.emplace_back( DirStateElement(root_ds, -1) ); + fill_sshot_vec_children(root_ds, 0, ss.m_dir_states, store_depth); + + // This should really be export to a file (preferably binary, but then bin->json command is needed, too). + ss.dump(); + */ + } + + m_fs_state.reset_stats(); + + now = time(0); + } + + bool do_purge_check = next_purge_check_time <= now; + bool do_purge_report = next_purge_report_time <= now; + bool do_purge_cold_files = next_purge_cold_files_time <= now; + if (do_purge_check || do_purge_report || do_purge_cold_files) + { + perform_purge_check(do_purge_cold_files, do_purge_report ? TRACE_Info : TRACE_Debug); + + next_purge_check_time = now + s_purge_check_interval; + if (do_purge_report) next_purge_report_time = now + s_purge_report_interval; + if (do_purge_cold_files) next_purge_cold_files_time = now + s_purge_cold_files_interval; + } + + } // end while forever +} + +//------------------------------------------------------------------------------ +// DirState export helpers +//------------------------------------------------------------------------------ + +void ResourceMonitor::fill_sshot_vec_children(const DirState &parent_ds, + int parent_idx, + std::vector &vec, + int max_depth) +{ + int pos = vec.size(); + int n_children = parent_ds.m_subdirs.size(); + + for (auto const & [name, child] : parent_ds.m_subdirs) + { + vec.emplace_back( DirStateElement(child, parent_idx) ); + } + + if (parent_ds.m_depth < max_depth) + { + DirStateElement &parent_dse = vec[parent_idx]; + parent_dse.m_daughters_begin = pos; + parent_dse.m_daughters_end = pos + n_children; + + for (auto const & [name, child] : parent_ds.m_subdirs) + { + if (n_children > 0) + fill_sshot_vec_children(child, pos, vec, max_depth); + ++pos; + } + } +} + +void ResourceMonitor::fill_pshot_vec_children(const DirState &parent_ds, + int parent_idx, + std::vector &vec, + int max_depth) +{ + int pos = vec.size(); + int n_children = parent_ds.m_subdirs.size(); + + for (auto const & [name, child] : parent_ds.m_subdirs) + { + vec.emplace_back( DirPurgeElement(child, parent_idx) ); + } + + if (parent_ds.m_depth < max_depth) + { + DirPurgeElement &parent_dpe = vec[parent_idx]; + parent_dpe.m_daughters_begin = pos; + parent_dpe.m_daughters_end = pos + n_children; + + for (auto const & [name, child] : parent_ds.m_subdirs) + { + if (n_children > 0) + fill_pshot_vec_children(child, pos, vec, max_depth); + ++pos; + } + } +} + +//------------------------------------------------------------------------------ +// Purge helpers, drivers, etc. +//------------------------------------------------------------------------------ + +void ResourceMonitor::update_vs_and_file_usage_info() +{ + static const char *trc_pfx = "update_vs_and_file_usage_info() "; + + const auto &conf = Cache::Conf(); + XrdOssVSInfo vsi; + + // StatVS error (after it succeeded in config) implies a memory corruption (according to Mr. H). + if (m_oss.StatVS(&vsi, conf.m_data_space.c_str(), 1) < 0) { + TRACE(Error, trc_pfx << "can't get StatVS for oss space '" << conf.m_data_space << "'. This is a fatal error."); + _exit(1); + } + m_fs_state.m_disk_total = vsi.Total; + m_fs_state.m_disk_used = vsi.Total - vsi.Free; + m_fs_state.m_file_usage = 512ll * m_current_usage_in_st_blocks; + if (m_oss.StatVS(&vsi, conf.m_meta_space.c_str(), 1) < 0) { + TRACE(Error, trc_pfx << "can't get StatVS for oss space '" << conf.m_meta_space << "'. This is a fatal error."); + _exit(1); + } + m_fs_state.m_meta_total = vsi.Total; + m_fs_state.m_meta_used = vsi.Total - vsi.Free; +} + +long long ResourceMonitor::get_file_usage_bytes_to_remove(const DataFsPurgeshot &ps, long long write_estimate, int tl) +{ + // short names from config values + const Configuration &conf = Cache::Conf(); + long long f0 = conf.m_fileUsageBaseline; + long long f1 = conf.m_fileUsageNominal; + long long f2 = conf.m_fileUsageMax; + long long w1 = conf.m_diskUsageLWM; + long long w2 = conf.m_diskUsageHWM; + + // get usage from purge snapshot + long long T = ps.m_disk_total; + long long x = ps.m_file_usage; + long long u = ps.m_disk_used; + + // get file usage increase from the previous time interval check + long long delta = write_estimate; + TRACE_INT(tl, "file usage increased since the previous purge interval in bytes: " << delta ); + + long long bytes_to_remove = 0; + + // helper lambda function + auto clamp = [&x, &bytes_to_remove](long long lowval, long long highval) + { + long long val = x; + long long newval = val - bytes_to_remove; + + // removed too much + if (newval < lowval) + { + return lowval - val; + } + + // removed too little + if (newval > highval) + { + return highval - val; + } + // keep the original value + return bytes_to_remove; + }; + + // under file quota, nothing to do + if (x < f0) + return 0; + + // total disk usage exceeds highWatermark + if (u >= w2) + { + TRACE_INT(tl, "Disk usage: " << ps.m_disk_used << " exceed highWatermark " << conf.m_diskUsageHWM); + float frac_u = static_cast(u - w2) / (T - w2); + float frac_x = static_cast(x - f0) / (f1 - f0); + + if (w2 == T) + { + bytes_to_remove = u -w1; + } + else + { + if (frac_x > frac_u) + { + // the cache is the reason for going out of w2 range + bytes_to_remove = (frac_x - frac_u) * (f1 - f0); + bytes_to_remove += delta; + bytes_to_remove = clamp(f0, f1); + } + else + { + // someone else is filling disk space, go to f1 + bytes_to_remove = clamp(f0, f2); + } + return bytes_to_remove; + } + } + + // file quota and total disk usage is within normal range, check if this space usage is + // proportinal to disk usage and correct it + if (u > w1 && x > f1) + { + float frac_u = static_cast(u - w1) / (w2 - w1); + float frac_x = static_cast(x - f1) / (f2 - f1); + if (frac_x > frac_u) + { + TRACE_INT(tl, "Disproportional file quota usage comapared to disc usage (frac_x/frac_u) = " << frac_x << "/"<< frac_u); + bytes_to_remove = (frac_x - frac_u) * (f2 - f1); + bytes_to_remove += delta; + } + + // check the new x val will not be below f0 + bytes_to_remove = clamp(f0, f2); + return bytes_to_remove; + } + + // final check: disk useage is lower that w1, check if exceed the max file usage f2 + if (x > f2) + { + // drop usage to f2 + // compare with global disk usage in the previous purge cycle (default 300s) + // check delta is not overflowing f2, else set numver of bytes to remove according remove to f0 + + TRACE_INT(tl, "File usage exceeds maxim file usage. Total disk usage is under lowWatermark. Clearing to low file usage."); + long long f2delta = std::max(f2 - delta, f0); + bytes_to_remove = clamp(f0, f2delta); + return bytes_to_remove; + } + + return bytes_to_remove; +} + +void ResourceMonitor::perform_purge_check(bool purge_cold_files, int tl) +{ + static const char *trc_pfx = "perform_purge_check() "; + const Configuration &conf = Cache::Conf(); + + std::unique_ptr psp( new DataFsPurgeshot(m_fs_state) ); + DataFsPurgeshot &ps = *psp; + + ps.m_file_usage = 512ll * m_current_usage_in_st_blocks; + // These are potentially wrong as cache might be writing over preallocated byte ranges. + ps.m_estimated_writes_from_writeq = Cache::GetInstance().WritesSinceLastCall(); + // Can have another estimate based on eiter writes or st-blocks from purge-stats, once we have them. + + TRACE_INT(tl, trc_pfx << "Purge check:"); + + ps.m_bytes_to_remove = 0; + if (conf.are_file_usage_limits_set()) + { + ps.m_bytes_to_remove = get_file_usage_bytes_to_remove(ps, ps.m_estimated_writes_from_writeq, tl); + } + else + { + if (ps.m_disk_used > conf.m_diskUsageHWM) + { + TRACE_INT(tl, "Disk usage: " << ps.m_disk_used << " exceed highWatermark."); + ps.m_bytes_to_remove = ps.m_disk_used - conf.m_diskUsageLWM; + } + } + + ps.m_space_based_purge = ps.m_bytes_to_remove ? 1 : 0; + + // Purge precheck -- check if age-based purge is required + // We ignore uvkeep time, it requires reading of cinfo files and it is enforced in File::Open() anyway. + + if (purge_cold_files && conf.is_age_based_purge_in_effect()) // || conf.is_uvkeep_purge_in_effect()) + { + ps.m_age_based_purge = true; + } + + TRACE_INT(tl, "\tbytes_to_remove = " << ps.m_bytes_to_remove << " B"); + TRACE_INT(tl, "\tspace_based_purge = " << ps.m_space_based_purge); + TRACE_INT(tl, "\tage_based_purge = " << ps.m_age_based_purge); + + bool periodic = Cache::GetInstance().GetPurgePin() ? + Cache::GetInstance().GetPurgePin()->CallPeriodically() : false; + + if ( ! ps.m_space_based_purge && ! ps.m_age_based_purge && !periodic ) { + TRACE(Info, trc_pfx << "purge not required."); + Cache::GetInstance().ClearPurgeProtectedSet(); + return; + } + if (m_purge_task_active) { + TRACE(Warning, trc_pfx << "purge required but previous purge task is still active!"); + return; + } + + TRACE(Info, trc_pfx << "scheduling purge task."); + + // At this point we have all the information: report, decide on action. + // There is still some missing infrastructure, especially as regards to purge-plugin: + // - at what point do we start bugging the pu-pin to start coughing up purge lists? + // - have a new parameter or just do it "one cycle before full"? + // - what if it doesn't -- when do we do the old-stlye scan & purge? + // - how do we do age-based purge and uvkeep purge? + // - they are really quite different -- and could run separately, registering + // files into a purge-candidate list. This has to be rechecked before the actual + // deletion -- eg, by comparing stat time of cinfo + doing the is-active / is-purge-protected. + + const DirState &root_ds = *m_fs_state.get_root(); + const int n_calc_dirs = 1 + root_ds.m_here_usage.m_NDirectories + root_ds.m_recursive_subdir_usage.m_NDirectories; +#ifdef RM_DEBUG + const int n_pshot_dirs = root_ds.count_dirs_to_level(9999); + dprintf("purge dir count recursive=%d vs from_usage=%d\n", n_pshot_dirs, n_calc_dirs); +#endif + ps.m_dir_vec.reserve(n_calc_dirs); + ps.m_dir_vec.emplace_back( DirPurgeElement(root_ds, -1) ); + fill_pshot_vec_children(root_ds, 0, ps.m_dir_vec, 9999); + + m_purge_task_active = true; + + struct PurgeDriverJob : public XrdJob + { + DataFsPurgeshot *m_purge_shot_ptr; + + PurgeDriverJob(DataFsPurgeshot *psp) : + XrdJob("XrdPfc::ResourceMonitor::PurgeDriver"), + m_purge_shot_ptr(psp) + {} + + void DoIt() override + { + Cache::ResMon().perform_purge_task(*m_purge_shot_ptr); + Cache::ResMon().perform_purge_task_cleanup(); + + delete m_purge_shot_ptr; + delete this; + } + }; + + Cache::schedP->Schedule( new PurgeDriverJob(psp.release()) ); +} + +namespace XrdPfc +{ + void OldStylePurgeDriver(DataFsPurgeshot &ps); +} + +void ResourceMonitor::perform_purge_task(DataFsPurgeshot &ps) +{ + // BEWARE: Runs in a dedicated thread - is only to communicate back to the + // hear_beat() / data structs via the purge queues and condition variable. + + // const char *tpfx = "perform_purge_task "; + + { + MutexHolder _lck(m_purge_task_cond); + m_purge_task_start = time(0); + } + + // For now, fall back to the old purge ... to be improved with: + // - new scan, following the DataFsPurgeshot; + // - usage of cinfo stat mtime for time of last access (touch already done at output); + // - use DirState* to report back purged files. + // Already changed to report back purged files --- but using the string / path variant. + OldStylePurgeDriver(ps); // In XrdPfcPurge.cc +} + +void ResourceMonitor::perform_purge_task_cleanup() +{ + // Separated out so the purge_task can exit without post-checks. + + { + MutexHolder _lck(m_purge_task_cond); + m_purge_task_end = time(0); + m_purge_task_complete = true; + m_purge_task_cond.Signal(); + } + Cache::GetInstance().ClearPurgeProtectedSet(); +} + +//============================================================================== +// Main thread function, do initial test, then enter heart_beat(). +//============================================================================== + +void ResourceMonitor::init_before_main() +{ + // setup for in-scan -- this is called from initial setup. + MutexHolder _lck(m_dir_scan_mutex); + m_dir_scan_in_progress = true; +} + +void ResourceMonitor::main_thread_function() +{ + const char *tpfx = "main_thread_function "; + { + time_t is_start = time(0); + TRACE(Info, tpfx << "Stating initial directory scan."); + + if ( ! perform_initial_scan()) { + TRACE(Error, tpfx << "Initial directory scan has failed. This is a terminal error, aborting.") + _exit(1); + } + // Reset of m_dir_scan_in_progress is done in perform_initial_scan() + + time_t is_duration = time(0) - is_start; + TRACE(Info, tpfx << "Initial directory scan complete, duration=" << is_duration <<"s"); + + // run first process queues + int n_proc_is = process_queues(); + TRACE(Info, tpfx << "First process_queues finished, n_records=" << n_proc_is); + + // shrink queues if scan time was longer than 30s. + if (is_duration > 30 || n_proc_is > 3000) + { + m_file_open_q.shrink_read_queue(); + m_file_update_stats_q.shrink_read_queue(); + m_file_close_q.shrink_read_queue(); + m_file_purge_q1.shrink_read_queue(); + m_file_purge_q2.shrink_read_queue(); + m_file_purge_q3.shrink_read_queue(); + } + } + heart_beat(); +} + +//============================================================================== +// Old prototype from Cache / Purge, now to go into heart_beat() here, above. +//============================================================================== + +void Proto_ResourceMonitorHeartBeat() +{ + // static const char *trc_pfx = "ResourceMonitorHeartBeat() "; + + // Pause before initial run + sleep(1); + + // XXXX Setup initial / constant stats (total RAM, total disk, ???) + + XrdOucCacheStats &S = Cache::GetInstance().Statistics; + XrdOucCacheStats::CacheStats &X = S.X; + + S.Lock(); + + X.DiskSize = Cache::Conf().m_diskTotalSpace; + + X.MemSize = Cache::Conf().m_RamAbsAvailable; + + S.UnLock(); + + // XXXX Schedule initial disk scan, time it! + // + // TRACE(Info, trc_pfx << "scheduling intial disk scan."); + // schedP->Schedule( new ScanAndPurgeJob("XrdPfc::ScanAndPurge") ); + // + // bool scan_and_purge_running = true; + + // XXXX Could we really hold last-usage for all files in memory? + + // XXXX Think how to handle disk-full, scan/purge not finishing: + // - start dropping things out of write queue, but only when RAM gets near full; + // - monitoring this then becomes a high-priority job, inner loop with sleep of, + // say, 5 or 10 seconds. + + while (true) + { + time_t heartbeat_start = time(0); + + // TRACE(Info, trc_pfx << "HeartBeat starting ..."); + + // if sumary monitoring configured, pupulate OucCacheStats: + S.Lock(); + + // - available / used disk space (files usage calculated elsewhere (maybe)) + + // - RAM usage + /* XXXX From Cache + { XrdSysMutexHelper lck(&m_RAM_mutex); + X.MemUsed = m_RAM_used; + X.MemWriteQ = m_RAM_write_queue; + } + */ + + // - files opened / closed etc + + // do estimate of available space + S.UnLock(); + + // if needed, schedule purge in a different thread. + // purge is: + // - deep scan + gather FSPurgeState + // - actual purge + // + // this thread can continue running and, if needed, stop writing to disk + // if purge is taking too long. + + // think how data is passed / synchronized between this and purge thread + + // !!!! think how stat collection is done and propgated upwards; + // until now it was done once per purge-interval. + // now stats will be added up more often, but purge will be done + // only occasionally. + // also, do we report cumulative values or deltas? cumulative should + // be easier and consistent with summary data. + // still, some are state - like disk usage, num of files. + + // Do we take care of directories that need to be newly added into DirState hierarchy? + // I.e., when user creates new directories and these are covered by either full + // spec or by root + depth declaration. + + int heartbeat_duration = time(0) - heartbeat_start; + + // TRACE(Info, trc_pfx << "HeartBeat finished, heartbeat_duration " << heartbeat_duration); + + // int sleep_time = m_fs_state..m_purgeInterval - heartbeat_duration; + int sleep_time = 60 - heartbeat_duration; + if (sleep_time > 0) + { + sleep(sleep_time); + } + } +} diff --git a/src/XrdPfc/XrdPfcResourceMonitor.hh b/src/XrdPfc/XrdPfcResourceMonitor.hh new file mode 100644 index 00000000000..770c7ed7b15 --- /dev/null +++ b/src/XrdPfc/XrdPfcResourceMonitor.hh @@ -0,0 +1,264 @@ +#ifndef __XRDPFC_RESOURCEMONITOR_HH__ +#define __XRDPFC_RESOURCEMONITOR_HH__ + +#include "XrdPfcStats.hh" + +#include "XrdSys/XrdSysPthread.hh" + +#include +#include +#include + +class XrdOss; + +namespace XrdPfc { + +class DataFsState; +class DirState; +class DirStateElement; +class DataFsSnapshot; +class DirPurgeElement; +class DataFsPurgeshot; +class FsTraversal; + +//============================================================================== +// ResourceMonitor +//============================================================================== + +// Encapsulates local variables used withing the previous mega-function Purge(). +// +// This will be used within the continuously/periodically ran heart-beat / breath +// function ... and then parts of it will be passed to invoked FS scan and purge +// jobs (which will be controlled throught this as well). + +// Andy: XRDADMINPATH Is the directory for administrative files (i.e. all.adminpath) +// Also: XrdOucEnv::Export("XRDLOGDIR", logParms.logfn); (in XrdOucLogging::configLog) + +class ResourceMonitor +{ + template + class Queue { + public: + struct Entry { + ID id; + RECORD record; + }; + using queue_type = std::vector; + using iterator = typename queue_type::iterator; + + Queue() = default; + + int write_queue_size() const { return m_write_queue.size(); } + bool read_queue_empty() const { return m_read_queue.empty(); } + int read_queue_size() const { return m_read_queue.size(); } + + // Writer / producer access + void push(ID id, RECORD stat) { m_write_queue.push_back({ id, stat }); } + // Existing entry access for updating Stats + RECORD& write_record(int pos) { return m_write_queue[pos].record; } + + // Reader / consumer access + int swap_queues() { m_read_queue.clear(); m_write_queue.swap(m_read_queue); return read_queue_size(); } + const queue_type& read_queue() const { return m_read_queue; } + iterator begin() const { return m_read_queue.begin(); } + iterator end() const { return m_read_queue.end(); } + + // Shrinkage of overgrown queues + void shrink_read_queue() { m_read_queue.clear(); m_read_queue.shrink_to_fit(); } + + private: + queue_type m_write_queue, m_read_queue; + }; + + struct AccessToken { + std::string m_filename; + unsigned int m_last_queue_swap_u1 = 0xffffffff; + int m_last_write_queue_pos = -1; + DirState *m_dir_state = nullptr; + + void clear() { + m_filename.clear(); + m_last_queue_swap_u1 = 0xffffffff; + m_last_write_queue_pos = -1; + m_dir_state = nullptr; + } + }; + std::vector m_access_tokens; + std::vector m_access_tokens_free_slots; + + struct OpenRecord { + time_t m_open_time; + bool m_existing_file; + }; + + struct CloseRecord { + time_t m_close_time; + Stats m_full_stats; + }; + + struct PurgeRecord { + long long m_size_in_st_blocks; + int m_n_files; + }; + + Queue m_file_open_q; + Queue m_file_update_stats_q; + Queue m_file_close_q; + Queue m_file_purge_q1; + Queue m_file_purge_q2; + Queue m_file_purge_q3; + // DirPurge queue -- not needed? But we do need last-change timestamp in DirState. + + long long m_current_usage_in_st_blocks = 0; // aggregate disk usage by files + + XrdSysMutex m_queue_mutex; // mutex shared between queues + unsigned int m_queue_swap_u1 = 0u; // identifier of current swap cycle + + DataFsState &m_fs_state; + XrdOss &m_oss; + + // Requests for File opens during name-space scans. Such LFNs are processed + // with some priority + struct LfnCondRecord + { + const std::string &f_lfn; + XrdSysCondVar &f_cond; + bool f_checked = false; + }; + + XrdSysMutex m_dir_scan_mutex; + std::list m_dir_scan_open_requests; + int m_dir_scan_check_counter; + bool m_dir_scan_in_progress = false; + + void process_inter_dir_scan_open_requests(FsTraversal &fst); + void cross_check_or_process_oob_lfn(const std::string &lfn, FsTraversal &fst); + long long get_file_usage_bytes_to_remove(const DataFsPurgeshot &ps, long long previous_file_usage, int logLeve); + +public: + ResourceMonitor(XrdOss& oss); + ~ResourceMonitor(); + + // --- Initial scan, building of DirState tree + + void scan_dir_and_recurse(FsTraversal &fst); + bool perform_initial_scan(); + + // --- Event registration + + int register_file_open(const std::string& filename, time_t open_timestamp, bool existing_file) { + // Simply return a token, we will resolve it in the actual processing of the queue. + XrdSysMutexHelper _lock(&m_queue_mutex); + int token_id; + if ( ! m_access_tokens_free_slots.empty()) { + token_id = m_access_tokens_free_slots.back(); + m_access_tokens_free_slots.pop_back(); + m_access_tokens[token_id].m_filename = filename; + m_access_tokens[token_id].m_last_queue_swap_u1 = m_queue_swap_u1 - 1; + } else { + token_id = (int) m_access_tokens.size(); + m_access_tokens.push_back({filename, m_queue_swap_u1 - 1}); + } + + m_file_open_q.push(token_id, {open_timestamp, existing_file}); + return token_id; + } + + void register_file_update_stats(int token_id, const Stats& stats) { + XrdSysMutexHelper _lock(&m_queue_mutex); + AccessToken &at = token(token_id); + // Check if this is the first update within this queue swap cycle. + if (at.m_last_queue_swap_u1 != m_queue_swap_u1) { + m_file_update_stats_q.push(token_id, stats); + at.m_last_queue_swap_u1 = m_queue_swap_u1; + at.m_last_write_queue_pos = m_file_update_stats_q.write_queue_size() - 1; + } else { + Stats &existing_stats = m_file_update_stats_q.write_record(at.m_last_write_queue_pos); + existing_stats.AddUp(stats); + } + // Optionally, one could return "scaler" to moodify stat-reporting + // frequency in the file ... if it comes too often or too rarely. + // See also the logic for determining reporting interval (in N_bytes_read) + // in File::Open(). + } + + void register_file_close(int token_id, time_t close_timestamp, const Stats& full_stats) { + XrdSysMutexHelper _lock(&m_queue_mutex); + m_file_close_q.push(token_id, {close_timestamp, full_stats}); + } + + // deletions can come from purge and from direct requests (Cache::UnlinkFile), the latter + // also covering the emergency shutdown of a file. + void register_file_purge(DirState* target, long long size_in_st_blocks) { + XrdSysMutexHelper _lock(&m_queue_mutex); + m_file_purge_q1.push(target, {size_in_st_blocks, 1}); + } + void register_multi_file_purge(DirState* target, long long size_in_st_blocks, int n_files) { + XrdSysMutexHelper _lock(&m_queue_mutex); + m_file_purge_q1.push(target, {size_in_st_blocks, n_files}); + } + void register_multi_file_purge(const std::string& target, long long size_in_st_blocks, int n_files) { + XrdSysMutexHelper _lock(&m_queue_mutex); + m_file_purge_q2.push(target, {size_in_st_blocks, n_files}); + } + void register_file_purge(const std::string& filename, long long size_in_st_blocks) { + XrdSysMutexHelper _lock(&m_queue_mutex); + m_file_purge_q3.push(filename, size_in_st_blocks); + } + + // void register_dir_purge(DirState* target); + // target assumed to be empty at this point, triggered by a file_purge removing the last file in it. + // hmmh, this is actually tricky ... who will purge the dirs? we should now at export-to-vector time + // and can prune leaf directories. This might fail if a file has been created in there in the meantime, which is ok. + // However, is there a race condition between rmdir and creation of a new file in that dir? Ask Andy. + + // --- Helpers for event processing and actions + + AccessToken& token(int i) { return m_access_tokens[i]; } + + // --- Actions + + int process_queues(); + + void heart_beat(); + + // --- Helpers for export of DirState vector snapshot. + + void fill_sshot_vec_children(const DirState &parent_ds, + int parent_idx, + std::vector &vec, + int max_depth); + + void fill_pshot_vec_children(const DirState &parent_ds, + int parent_idx, + std::vector &vec, + int max_depth); + + // Interface to other part of XCache -- note the CamelCase() notation. + void CrossCheckIfScanIsInProgress(const std::string &lfn, XrdSysCondVar &cond); + + // main function, steers startup then enters heart_beat. does not die. + void init_before_main(); // called from startup thread / configuration processing + void main_thread_function(); // run in dedicated thread + + XrdSysCondVar m_purge_task_cond {0}; + // The following variables are set under the above lock, purge task signals to heart_beat. + time_t m_purge_task_start {0}; + time_t m_purge_task_end {0}; + bool m_purge_task_active {false}; // from the perspective of heart-beat, set only in heartbeat + bool m_purge_task_complete {false}; // from the perspective of the task, reset in heartbeat, set in task + // When m_purge_task_active == true, DirState entries are not removed from the tree to + // allow purge thread to report cleared files directly via DirState ptr. + // Note, DirState removal happens during stat propagation traversal. + + // Purge helpers etc. + void update_vs_and_file_usage_info(); + void perform_purge_check(bool purge_cold_files, int tl); + + void perform_purge_task(DataFsPurgeshot &ps); + void perform_purge_task_cleanup(); +}; + +} + +#endif diff --git a/src/XrdPfc/XrdPfcStats.hh b/src/XrdPfc/XrdPfcStats.hh index ae06f194dd2..1a94426bf63 100644 --- a/src/XrdPfc/XrdPfcStats.hh +++ b/src/XrdPfc/XrdPfcStats.hh @@ -19,47 +19,53 @@ // along with XRootD. If not, see . //---------------------------------------------------------------------------------- -#include "XrdOuc/XrdOucCache.hh" -#include "XrdSys/XrdSysPthread.hh" - namespace XrdPfc { + //---------------------------------------------------------------------------- //! Statistics of cache utilisation by a File object. +// Used both as aggregation of usage by a single file as well as for +// collecting per-directory statistics on time-interval basis. In this second +// case they are used as "deltas" ... differences in respect to a previous +// reference value. +// For running averages / deltas, one might need a version with doubles, so +// it might make sense to template this. And add some timestamp. //---------------------------------------------------------------------------- class Stats { public: - int m_NumIos; //!< number of IO objects attached during this access - int m_Duration; //!< total duration of all IOs attached - long long m_BytesHit; //!< number of bytes served from disk - long long m_BytesMissed; //!< number of bytes served from remote and cached - long long m_BytesBypassed; //!< number of bytes served directly through XrdCl - long long m_BytesWritten; //!< number of bytes written to disk - int m_NCksumErrors; //!< number of checksum errors while getting data from remote + int m_NumIos = 0; //!< number of IO objects attached during this access + int m_Duration = 0; //!< total duration of all IOs attached + long long m_BytesHit = 0; //!< number of bytes served from disk + long long m_BytesMissed = 0; //!< number of bytes served from remote and cached + long long m_BytesBypassed = 0; //!< number of bytes served directly through XrdCl + long long m_BytesWritten = 0; //!< number of bytes written to disk + long long m_StBlocksAdded = 0; //!< number of 512-byte blocks the file has grown by + int m_NCksumErrors = 0; //!< number of checksum errors while getting data from remote //---------------------------------------------------------------------- - Stats() : - m_NumIos (0), m_Duration(0), - m_BytesHit(0), m_BytesMissed(0), m_BytesBypassed(0), - m_BytesWritten(0), m_NCksumErrors(0) - {} + Stats() = default; - Stats(const Stats& s) : - m_NumIos (s.m_NumIos), m_Duration(s.m_Duration), - m_BytesHit(s.m_BytesHit), m_BytesMissed(s.m_BytesMissed), m_BytesBypassed(s.m_BytesBypassed), - m_BytesWritten(s.m_BytesWritten), m_NCksumErrors(s.m_NCksumErrors) - {} + Stats(const Stats& s) = default; Stats& operator=(const Stats&) = default; + Stats(const Stats& a, const Stats& b) : + m_NumIos (a.m_NumIos + b.m_NumIos), + m_Duration (a.m_Duration + b.m_Duration), + m_BytesHit (a.m_BytesHit + b.m_BytesHit), + m_BytesMissed (a.m_BytesMissed + b.m_BytesMissed), + m_BytesBypassed (a.m_BytesBypassed + b.m_BytesBypassed), + m_BytesWritten (a.m_BytesWritten + b.m_BytesWritten), + m_StBlocksAdded (a.m_StBlocksAdded + b.m_StBlocksAdded), + m_NCksumErrors (a.m_NCksumErrors + b.m_NCksumErrors) + {} + //---------------------------------------------------------------------- void AddReadStats(const Stats &s) { - XrdSysMutexHelper _lock(&m_Mutex); - m_BytesHit += s.m_BytesHit; m_BytesMissed += s.m_BytesMissed; m_BytesBypassed += s.m_BytesBypassed; @@ -67,82 +73,143 @@ public: void AddBytesHit(long long bh) { - XrdSysMutexHelper _lock(&m_Mutex); - m_BytesHit += bh; } void AddWriteStats(long long bytes_written, int n_cks_errs) { - XrdSysMutexHelper _lock(&m_Mutex); - m_BytesWritten += bytes_written; m_NCksumErrors += n_cks_errs; } void IoAttach() { - XrdSysMutexHelper _lock(&m_Mutex); - ++m_NumIos; } void IoDetach(int duration) { - XrdSysMutexHelper _lock(&m_Mutex); - m_Duration += duration; } - Stats Clone() - { - XrdSysMutexHelper _lock(&m_Mutex); + //---------------------------------------------------------------------- - return Stats(*this); + long long BytesRead() const + { + return m_BytesHit + m_BytesMissed + m_BytesBypassed; } - //---------------------------------------------------------------------- - void DeltaToReference(const Stats& ref) { - // Not locked, only used from Cache / Purge thread. m_NumIos = ref.m_NumIos - m_NumIos; m_Duration = ref.m_Duration - m_Duration; m_BytesHit = ref.m_BytesHit - m_BytesHit; m_BytesMissed = ref.m_BytesMissed - m_BytesMissed; m_BytesBypassed = ref.m_BytesBypassed - m_BytesBypassed; m_BytesWritten = ref.m_BytesWritten - m_BytesWritten; + m_StBlocksAdded = ref.m_StBlocksAdded - m_StBlocksAdded; m_NCksumErrors = ref.m_NCksumErrors - m_NCksumErrors; } void AddUp(const Stats& s) { - // Not locked, only used from Cache / Purge thread. m_NumIos += s.m_NumIos; m_Duration += s.m_Duration; m_BytesHit += s.m_BytesHit; m_BytesMissed += s.m_BytesMissed; m_BytesBypassed += s.m_BytesBypassed; m_BytesWritten += s.m_BytesWritten; + m_StBlocksAdded += s.m_StBlocksAdded; m_NCksumErrors += s.m_NCksumErrors; } void Reset() { - // Not locked, only used from Cache / Purge thread. m_NumIos = 0; m_Duration = 0; m_BytesHit = 0; m_BytesMissed = 0; m_BytesBypassed = 0; m_BytesWritten = 0; + m_StBlocksAdded = 0; m_NCksumErrors = 0; } +}; + +//============================================================================== + +class DirStats : public Stats +{ +public: + long long m_StBlocksRemoved = 0; // number of 512-byte blocks removed from the directory + int m_NFilesOpened = 0; + int m_NFilesClosed = 0; + int m_NFilesCreated = 0; + int m_NFilesRemoved = 0; // purged or otherwise (error, direct requests) + int m_NDirectoriesCreated = 0; + int m_NDirectoriesRemoved = 0; -private: - XrdSysMutex m_Mutex; + //---------------------------------------------------------------------- + + DirStats() = default; + + DirStats(const DirStats& s) = default; + + DirStats& operator=(const DirStats&) = default; + + DirStats(const DirStats& a, const DirStats& b) : + Stats(a, b), + m_StBlocksRemoved (a.m_StBlocksRemoved + b.m_StBlocksRemoved), + m_NFilesOpened (a.m_NFilesOpened + b.m_NFilesOpened), + m_NFilesClosed (a.m_NFilesClosed + b.m_NFilesClosed), + m_NFilesCreated (a.m_NFilesCreated + b.m_NFilesCreated), + m_NFilesRemoved (a.m_NFilesRemoved + b.m_NFilesRemoved), + m_NDirectoriesCreated (a.m_NDirectoriesCreated + b.m_NDirectoriesCreated), + m_NDirectoriesRemoved (a.m_NDirectoriesRemoved + b.m_NDirectoriesRemoved) + {} + + //---------------------------------------------------------------------- + + using Stats::DeltaToReference; // activate overload based on arg + void DeltaToReference(const DirStats& ref) + { + Stats::DeltaToReference(ref); + m_StBlocksRemoved = ref.m_StBlocksRemoved - m_StBlocksRemoved; + m_NFilesOpened = ref.m_NFilesOpened - m_NFilesOpened; + m_NFilesClosed = ref.m_NFilesClosed - m_NFilesClosed; + m_NFilesCreated = ref.m_NFilesCreated - m_NFilesCreated; + m_NFilesRemoved = ref.m_NFilesRemoved - m_NFilesRemoved; + m_NDirectoriesCreated = ref.m_NDirectoriesCreated - m_NDirectoriesCreated; + m_NDirectoriesRemoved = ref.m_NDirectoriesRemoved - m_NDirectoriesRemoved; + } + + using Stats::AddUp; // activate overload based on arg + void AddUp(const DirStats& s) + { + Stats::AddUp(s); + m_StBlocksRemoved += s.m_StBlocksRemoved; + m_NFilesOpened += s.m_NFilesOpened; + m_NFilesClosed += s.m_NFilesClosed; + m_NFilesCreated += s.m_NFilesCreated; + m_NFilesRemoved += s.m_NFilesRemoved; + m_NDirectoriesCreated += s.m_NDirectoriesCreated; + m_NDirectoriesRemoved += s.m_NDirectoriesRemoved; + } + + using Stats::Reset; // activate overload based on arg + void Reset() + { + Stats::Reset(); + m_StBlocksRemoved = 0; + m_NFilesOpened = 0; + m_NFilesClosed = 0; + m_NFilesCreated = 0; + m_NFilesRemoved = 0; + m_NDirectoriesCreated = 0; + m_NDirectoriesRemoved = 0; + } }; + } #endif - diff --git a/src/XrdPfc/XrdPfcTypes.hh b/src/XrdPfc/XrdPfcTypes.hh index 5a9a45e62ff..a5c632c8f07 100644 --- a/src/XrdPfc/XrdPfcTypes.hh +++ b/src/XrdPfc/XrdPfcTypes.hh @@ -1,5 +1,6 @@ #ifndef __XRDPFC_TYPES_HH__ #define __XRDPFC_TYPES_HH__ + //---------------------------------------------------------------------------------- // Copyright (c) 2014 by Board of Trustees of the Leland Stanford, Jr., University // Author: Alja Mrak-Tadel, Matevz Tadel, Brian Bockelman @@ -18,6 +19,9 @@ // along with XRootD. If not, see . //---------------------------------------------------------------------------------- +#include +#include + namespace XrdPfc { enum CkSumCheck_e { CSChk_Unknown = -1, CSChk_None = 0, CSChk_Cache = 1, CSChk_Net = 2, CSChk_Both = 3, diff --git a/src/XrdVersionPlugin.hh b/src/XrdVersionPlugin.hh index 121949e6826..1643cf0dbfe 100644 --- a/src/XrdVersionPlugin.hh +++ b/src/XrdVersionPlugin.hh @@ -223,7 +223,8 @@ XrdVERSIONPLUGIN_Mapd(ofs.cmslib, XrdCmsGetClient )\ XrdVERSIONPLUGIN_Mapd(cms.vnid, XrdCmsgetVnId )\ XrdVERSIONPLUGIN_Mapd(cms.perf, XrdCmsPerfMonitor )\ - XrdVERSIONPLUGIN_Mapd(pfc.decisionlib, XrdPfcGetDecision )\ + XrdVERSIONPLUGIN_Mapd(pfc.decisionlib, XrdPfcGetDecision )\ + XrdVERSIONPLUGIN_Mapd(pfc.purgelib, XrdPfcGetPurgePin )\ XrdVERSIONPLUGIN_Mapd(xrd.protocol, XrdgetProtocol )\ XrdVERSIONPLUGIN_Mapd(http.secxtractor, XrdHttpGetSecXtractor )\ XrdVERSIONPLUGIN_Mapd(http.exthandler, XrdHttpGetExtHandler )\