Skip to content

Commit

Permalink
[Pfc] Reduce verbosity of errors originating from remote block reads.
Browse files Browse the repository at this point in the history
Report each error only once for each IO object, then report total counts for
each error type once the IO object is detached.

Previous repeating errors have been downgraded to Debug level.
  • Loading branch information
osschar committed Nov 3, 2024
1 parent ae3391f commit c609ebc
Show file tree
Hide file tree
Showing 6 changed files with 55 additions and 14 deletions.
17 changes: 12 additions & 5 deletions src/XrdPfc/XrdPfcFile.cc
Original file line number Diff line number Diff line change
Expand Up @@ -923,7 +923,8 @@ int File::ReadOpusCoalescere(IO *io, const XrdOucIOVec *readV, int readVnum,
if (read_req)
{
read_req->m_bytes_read += bytes_read;
read_req->update_error_cond(error_cond);
if (error_cond)
read_req->update_error_cond(error_cond);
read_req->m_stats.m_BytesHit += bytes_read;
read_req->m_sync_done = true;

Expand Down Expand Up @@ -1217,7 +1218,7 @@ void File::ProcessBlockError(Block *b, ReadRequest *rreq)
// Does not manage m_read_req.
// Will not complete the request.

TRACEF(Error, "ProcessBlockError() io " << b->m_io << ", block "<< b->m_offset/m_block_size <<
TRACEF(Debug, "ProcessBlockError() io " << b->m_io << ", block "<< b->m_offset/m_block_size <<
" finished with error " << -b->get_error() << " " << XrdSysE2T(-b->get_error()));

rreq->update_error_cond(b->get_error());
Expand Down Expand Up @@ -1353,9 +1354,15 @@ void File::ProcessBlockResponse(Block *b, int res)
else
{
if (res < 0) {
TRACEF(Error, tpfx << "block " << b << ", idx=" << b->m_offset/m_block_size << ", off=" << b->m_offset << " error=" << res);
bool new_error = b->get_io()->register_block_error(res);
int tlvl = new_error ? TRACE_Error : TRACE_Debug;
TRACEF_INT(tlvl, tpfx << "block " << b << ", idx=" << b->m_offset/m_block_size << ", off=" << b->m_offset
<< ", io=" << b->get_io() << ", error=" << res);
} else {
TRACEF(Error, tpfx << "block " << b << ", idx=" << b->m_offset/m_block_size << ", off=" << b->m_offset << " incomplete, got " << res << " expected " << b->get_size());
bool first_p = b->get_io()->register_incomplete_read();
int tlvl = first_p ? TRACE_Error : TRACE_Debug;
TRACEF_INT(tlvl, tpfx << "block " << b << ", idx=" << b->m_offset/m_block_size << ", off=" << b->m_offset
<< ", io=" << b->get_io() << " incomplete, got " << res << " expected " << b->get_size());
#if defined(__APPLE__) || defined(__GNU__) || (defined(__FreeBSD_kernel__) && defined(__GLIBC__)) || defined(__FreeBSD__)
res = -EIO;
#else
Expand Down Expand Up @@ -1393,7 +1400,7 @@ void File::ProcessBlockResponse(Block *b, int res)
{
ReadRequest *rreq = creqs_to_keep.front().m_read_req;

TRACEF(Info, "ProcessBlockResponse() requested block " << (void*)b << " failed with another io " <<
TRACEF(Debug, "ProcessBlockResponse() requested block " << (void*)b << " failed with another io " <<
b->get_io() << " - reissuing request with my io " << rreq->m_io);

b->reset_error_and_set_io(rreq->m_io, rreq);
Expand Down
5 changes: 3 additions & 2 deletions src/XrdPfc/XrdPfcFile.hh
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ struct ReadRequest

long long m_bytes_read = 0;
int m_error_cond = 0; // to be set to -errno
int m_error_count = 0;
Stats m_stats;

int m_n_chunk_reqs = 0;
Expand All @@ -88,7 +89,7 @@ struct ReadRequest
m_io(io), m_rh(rh)
{}

void update_error_cond(int ec) { if (m_error_cond == 0 ) m_error_cond = ec; }
void update_error_cond(int ec) { ++m_error_count; if (m_error_cond == 0 ) m_error_cond = ec; }

bool is_complete() const { return m_n_chunk_reqs == 0 && m_sync_done && m_direct_done; }
int return_value() const { return m_error_cond ? m_error_cond : m_bytes_read; }
Expand Down Expand Up @@ -311,7 +312,7 @@ private:
static const char *m_traceID;

int m_ref_cnt; //!< number of references from IO or sync

XrdOssDF *m_data_file; //!< file handle for data file on disk
XrdOssDF *m_info_file; //!< file handle for data-info file on disk
Info m_cfi; //!< download status of file blocks and access statistics
Expand Down
10 changes: 10 additions & 0 deletions src/XrdPfc/XrdPfcIO.hh
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ private:

void SetInput(XrdOucCacheIO*);

protected:
// Variables used by File to store IO-relates state. Managed under
// File::m_state_cond mutex.
friend class File;
Expand All @@ -85,6 +86,15 @@ private:
int m_active_prefetches {0};
bool m_allow_prefetching {true};
bool m_in_detach {false};

int m_incomplete_count {0};
std::map<int, int> m_error_counts;
bool register_incomplete_read() {
return m_incomplete_count++ == 0;
}
bool register_block_error(int res) {
return m_error_counts[res]++ == 0;
}
};
}

Expand Down
29 changes: 24 additions & 5 deletions src/XrdPfc/XrdPfcIOFile.cc
Original file line number Diff line number Diff line change
Expand Up @@ -153,11 +153,30 @@ void IOFile::DetachFinalize()
{
// Effectively a destructor.

TRACE(Info, "DetachFinalize() " << this);
TRACE(Debug, "DetachFinalize() " << this);

m_file->RequestSyncOfDetachStats();
Cache::GetInstance().ReleaseFile(m_file, this);

#ifndef NODEBUG
if (( ! m_error_counts.empty() || m_incomplete_count > 0) && XRD_TRACE What >= TRACE_Error) {
char info[1024];
int pos = 0, cap = 1024;
bool truncated = false;
for (auto [err, cnt] : m_error_counts) {
int len = snprintf(&info[pos], cap, " ( %d : %d)", err, cnt);
if (len >= cap) {
truncated = true;
break;
}
pos += len;
cap -= len;
}
TRACE(Error, "DetachFinalize() " << this << " n_incomplete_reads=" << m_incomplete_count
<< ", block (error : count) report:" << info << (truncated ? " - truncated" : ""));
}
#endif

delete this;
}

Expand Down Expand Up @@ -273,9 +292,9 @@ int IOFile::ReadEnd(int retval, ReadReqRH *rh)
TRACEIO(Dump, "ReadEnd() " << (rh->m_iocb ? "a" : "") << "sync " << this << " sid: " << Xrd::hex1 << rh->m_seq_id << " retval: " << retval << " expected_size: " << rh->m_expected_size);

if (retval < 0) {
TRACEIO(Warning, "ReadEnd() error in File::Read(), exit status=" << retval << ", error=" << XrdSysE2T(-retval) << " sid: " << Xrd::hex1 << rh->m_seq_id);
TRACEIO(Debug, "ReadEnd() error in File::Read(), exit status=" << retval << ", error=" << XrdSysE2T(-retval) << " sid: " << Xrd::hex1 << rh->m_seq_id);
} else if (retval < rh->m_expected_size) {
TRACEIO(Warning, "ReadEnd() bytes missed " << rh->m_expected_size - retval << " sid: " << Xrd::hex1 << rh->m_seq_id);
TRACEIO(Debug, "ReadEnd() bytes missed " << rh->m_expected_size - retval << " sid: " << Xrd::hex1 << rh->m_seq_id);
}
if (rh->m_iocb)
rh->m_iocb->Done(retval);
Expand Down Expand Up @@ -362,9 +381,9 @@ int IOFile::ReadVEnd(int retval, ReadReqRH *rh)
" retval: " << retval << " n_chunks: " << rh->m_n_chunks << " expected_size: " << rh->m_expected_size);

if (retval < 0) {
TRACEIO(Warning, "ReadVEnd() error in File::ReadV(), exit status=" << retval << ", error=" << XrdSysE2T(-retval));
TRACEIO(Debug, "ReadVEnd() error in File::ReadV(), exit status=" << retval << ", error=" << XrdSysE2T(-retval));
} else if (retval < rh->m_expected_size) {
TRACEIO(Warning, "ReadVEnd() bytes missed " << rh->m_expected_size - retval);
TRACEIO(Debug, "ReadVEnd() bytes missed " << rh->m_expected_size - retval);
}
if (rh->m_iocb)
rh->m_iocb->Done(retval);
Expand Down
2 changes: 0 additions & 2 deletions src/XrdPfc/XrdPfcIOFile.hh
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,6 @@ private:

struct stat *m_localStat;
int initCachedStat();


};

}
Expand Down
6 changes: 6 additions & 0 deletions src/XrdPfc/XrdPfcTrace.hh
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,19 @@
if (XRD_TRACE What >= TRACE_ ## act) SYSTRACE(XRD_TRACE, 0, m_traceID, 0, \
TRACE_STR_ ## act << x << " " << GetLocalPath())

#define TRACEF_INT(act, x) \
if (XRD_TRACE What >= act) \
{static const char* t_what[]={"","error ","warning ","info ","debug ","dump "};\
SYSTRACE(XRD_TRACE, 0, m_traceID, 0, t_what[act] << x << " " << GetLocalPath())}

#else

#define ERRNO_AND_ERRSTR(err_code)
#define TRACE(act,x)
#define TRACE_PC(act, pre_code, x)
#define TRACEIO(act, x)
#define TRACEF(act, x)
#define TRACEF_INT(act, x)

#endif

Expand Down

0 comments on commit c609ebc

Please sign in to comment.