Skip to content

Commit

Permalink
Fix TSlay and THarakiri in ReadOnly mode (#13162)
Browse files Browse the repository at this point in the history
  • Loading branch information
SammyVimes authored Jan 6, 2025
1 parent 63b0998 commit 2876ba4
Show file tree
Hide file tree
Showing 8 changed files with 345 additions and 96 deletions.
61 changes: 59 additions & 2 deletions ydb/apps/dstool/lib/dstool_cmd_cluster_workload_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ def add_options(p):
p.add_argument('--enable-kill-tablets', action='store_true', help='Enable tablet killer')
p.add_argument('--enable-kill-blob-depot', action='store_true', help='Enable BlobDepot killer')
p.add_argument('--enable-restart-pdisks', action='store_true', help='Enable PDisk restarter')
p.add_argument('--enable-readonly-pdisks', action='store_true', help='Enable SetPDiskReadOnly requests')
p.add_argument('--kill-signal', type=str, default='KILL', help='Kill signal to send to restart node')
p.add_argument('--sleep-before-rounds', type=float, default=1, help='Seconds to sleep before rounds')
p.add_argument('--no-fail-model-check', action='store_true', help='Do not check VDisk states before taking action')
Expand Down Expand Up @@ -104,6 +105,12 @@ def do(args):
if vslot.ReadOnly
}

pdisk_readonly = {
(pdisk.NodeId, pdisk.PDiskId)
for pdisk in base_config.PDisk
if pdisk.ReadOnly
}

if (len(pdisk_keys) == 0):
# initialize pdisk_keys and pdisk_key_versions
for node_id in {pdisk.NodeId for pdisk in base_config.PDisk}:
Expand Down Expand Up @@ -144,6 +151,25 @@ def match(x):
return False
return True

def can_act_on_pdisk(node_id, pdisk_id):
def match(x):
return node_id == x[0] and pdisk_id == x[1]

for group in base_config.Group:
if any(map(match, map(common.get_vslot_id, group.VSlotId))):
if not common.is_dynamic_group(group.GroupId):
return False

content = {
common.get_vdisk_id_short(vslot): not match(vslot_id) and vslot.Ready and vdisk_status[vslot_id + common.get_vdisk_id(vslot)]
for vslot_id in map(common.get_vslot_id, group.VSlotId)
for vslot in [vslot_map[vslot_id]]
}
common.print_if_verbose(args, content, file=sys.stderr)
if not grouptool.check_fail_model(content, group.ErasureSpecies):
return False
return True

def do_restart(node_id):
host = node_fqdn_map[node_id]
if args.enable_pdisk_encryption_keys_changes:
Expand All @@ -166,6 +192,20 @@ def do_restart_pdisk(node_id, pdisk_id):
if not response.Success:
raise Exception('Unexpected error from BSC: %s' % response.ErrorDescription)

def do_readonly_pdisk(node_id, pdisk_id, readonly):
assert can_act_on_vslot(node_id, pdisk_id)
request = common.kikimr_bsconfig.TConfigRequest(IgnoreDegradedGroupsChecks=True)
cmd = request.Command.add().SetPDiskReadOnly
cmd.TargetPDiskId.NodeId = node_id
cmd.TargetPDiskId.PDiskId = pdisk_id
cmd.Value = readonly
try:
response = common.invoke_bsc_request(request)
except Exception as e:
raise Exception('failed to perform SetPDiskReadOnly request: %s' % e)
if not response.Success:
raise Exception('Unexpected error from BSC: %s' % response.ErrorDescription)

def do_evict(vslot_id):
assert can_act_on_vslot(*vslot_id)
try:
Expand Down Expand Up @@ -253,15 +293,16 @@ def do_kill_blob_depot():
readonlies = []
unreadonlies = []
pdisk_restarts = []
make_pdisks_readonly = []
make_pdisks_not_readonly = []

for vslot in base_config.VSlot:
if common.is_dynamic_group(vslot.GroupId):
vslot_id = common.get_vslot_id(vslot.VSlotId)
node_id, pdisk_id = vslot_id[:2]
vdisk_id = '[%08x:%d:%d:%d]' % (vslot.GroupId, vslot.FailRealmIdx, vslot.FailDomainIdx, vslot.VDiskIdx)
if vslot_id in vslot_readonly and not args.disable_readonly:
unreadonlies.append(('un-readonly vslot id: %s, vdisk id: %s' % (vslot_id, vdisk_id), (do_readonly, vslot, False)))
if can_act_on_vslot(*vslot_id[:2]) and args.enable_restart_pdisks:
pdisk_restarts.append(('restart pdisk node_id: %d, pdisk_id: %d' % vslot_id[:2], (do_restart_pdisk, *vslot_id[:2])))
if can_act_on_vslot(*vslot_id) and (recent_restarts or args.disable_restarts):
if not args.disable_evicts:
evicts.append(('evict vslot id: %s, vdisk id: %s' % (vslot_id, vdisk_id), (do_evict, vslot_id)))
Expand All @@ -270,6 +311,18 @@ def do_kill_blob_depot():
if not args.disable_readonly:
readonlies.append(('readonly vslot id: %s, vdisk id: %s' % (vslot_id, vdisk_id), (do_readonly, vslot, True)))

for pdisk in base_config.PDisk:
node_id, pdisk_id = pdisk.NodeId, pdisk.PDiskId

if can_act_on_pdisk(node_id, pdisk_id):
if args.enable_restart_pdisks:
pdisk_restarts.append(('restart pdisk node_id: %d, pdisk_id: %d' % (node_id, pdisk_id), (do_restart_pdisk, node_id, pdisk_id)))
if args.enable_readonly_pdisks:
make_pdisks_readonly.append(('readonly pdisk node_id: %d, pdisk_id: %d' % (node_id, pdisk_id), (do_readonly_pdisk, node_id, pdisk_id, True)))

if (node_id, pdisk_id) in pdisk_readonly and args.enable_readonly_pdisks:
make_pdisks_not_readonly.append(('un-readonly pdisk node_id: %d, pdisk_id: %d' % (node_id, pdisk_id), (do_readonly_pdisk, node_id, pdisk_id, False)))

def pick(v):
action_name, action = random.choice(v)
print(action_name)
Expand All @@ -285,6 +338,10 @@ def pick(v):
possible_actions.append(('un-readonly', (pick, unreadonlies)))
if pdisk_restarts:
possible_actions.append(('restart-pdisk', (pick, pdisk_restarts)))
if make_pdisks_readonly:
possible_actions.append(('make-pdisks-readonly', (pick, make_pdisks_readonly)))
if make_pdisks_not_readonly:
possible_actions.append(('make-pdisks-not-readonly', (pick, make_pdisks_not_readonly)))

restarts = []

Expand Down
17 changes: 8 additions & 9 deletions ydb/core/blobstorage/nodewarden/node_warden_pdisk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -410,21 +410,20 @@ namespace NKikimr::NStorage {

const TPDiskKey key(pdisk);

if (pdisk.HasReadOnly()) {
if (auto it = LocalPDisks.find({pdisk.GetNodeID(), pdisk.GetPDiskID()}); it != LocalPDisks.end()) {
auto& record = it->second;
auto localPdiskIt = LocalPDisks.find({pdisk.GetNodeID(), pdisk.GetPDiskID()});
if (localPdiskIt != LocalPDisks.end()) {
auto& record = localPdiskIt->second;

if (!record.Record.HasReadOnly() || record.Record.GetReadOnly() != pdisk.GetReadOnly()) {
// Changing read-only flag requires restart.
entityStatus = NKikimrBlobStorage::RESTART;
}
if (record.Record.GetReadOnly() != pdisk.GetReadOnly()) {
// Changing read-only flag requires restart.
entityStatus = NKikimrBlobStorage::RESTART;
}
}

switch (entityStatus) {
case NKikimrBlobStorage::RESTART:
if (auto it = LocalPDisks.find({pdisk.GetNodeID(), pdisk.GetPDiskID()}); it != LocalPDisks.end()) {
it->second.Record = pdisk;
if (localPdiskIt != LocalPDisks.end()) {
localPdiskIt->second.Record = pdisk;
}
DoRestartLocalPDisk(pdisk);
[[fallthrough]];
Expand Down
24 changes: 17 additions & 7 deletions ydb/core/blobstorage/pdisk/blobstorage_pdisk_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3788,8 +3788,6 @@ bool TPDisk::HandleReadOnlyIfWrite(TRequestBase *request) {
case ERequestType::RequestChunkReadPiece:
case ERequestType::RequestYardInit:
case ERequestType::RequestCheckSpace:
case ERequestType::RequestHarakiri:
case ERequestType::RequestYardSlay:
case ERequestType::RequestYardControl:
case ERequestType::RequestWhiteboartReport:
case ERequestType::RequestHttpInfo:
Expand All @@ -3805,16 +3803,16 @@ bool TPDisk::HandleReadOnlyIfWrite(TRequestBase *request) {

// Can't be processed in read-only mode.
case ERequestType::RequestLogWrite: {
TLogWrite &ev = *static_cast<TLogWrite*>(request);
TLogWrite &req = *static_cast<TLogWrite*>(request);
NPDisk::TEvLogResult* result = new NPDisk::TEvLogResult(NKikimrProto::CORRUPTED, 0, errorReason);
result->Results.push_back(NPDisk::TEvLogResult::TRecord(ev.Lsn, ev.Cookie));
result->Results.push_back(NPDisk::TEvLogResult::TRecord(req.Lsn, req.Cookie));
PCtx->ActorSystem->Send(sender, result);
ev.Replied = true;
req.Replied = true;
return true;
}
case ERequestType::RequestChunkWrite: {
TChunkWrite &ev = *static_cast<TChunkWrite*>(request);
SendChunkWriteError(ev, errorReason, NKikimrProto::CORRUPTED);
TChunkWrite &req = *static_cast<TChunkWrite*>(request);
SendChunkWriteError(req, errorReason, NKikimrProto::CORRUPTED);
return true;
}
case ERequestType::RequestChunkReserve:
Expand All @@ -3829,6 +3827,18 @@ bool TPDisk::HandleReadOnlyIfWrite(TRequestBase *request) {
case ERequestType::RequestChunkForget:
PCtx->ActorSystem->Send(sender, new NPDisk::TEvChunkForgetResult(NKikimrProto::CORRUPTED, 0, errorReason));
return true;
case ERequestType::RequestHarakiri:
PCtx->ActorSystem->Send(sender, new NPDisk::TEvHarakiriResult(NKikimrProto::CORRUPTED, 0, errorReason));
return true;
case ERequestType::RequestYardSlay: {
TSlay &req = *static_cast<TSlay*>(request);
// We send NOTREADY, since BSController can't handle CORRUPTED or ERROR.
// If for some reason the disk will become *not* read-only, the request will be retried and VDisk will be slain.
// If not, we will be retrying the request until the disk is replaced during maintenance.
PCtx->ActorSystem->Send(sender, new NPDisk::TEvSlayResult(NKikimrProto::NOTREADY, 0,
req.VDiskId, req.SlayOwnerRound, req.PDiskId, req.VSlotId, errorReason));
return true;
}

case ERequestType::RequestWriteMetadata:
case ERequestType::RequestWriteMetadataResult:
Expand Down
90 changes: 65 additions & 25 deletions ydb/core/blobstorage/pdisk/blobstorage_pdisk_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1359,34 +1359,74 @@ Y_UNIT_TEST_SUITE(ReadOnlyPDisk) {

vdisk.Init(); // Should start ok.
vdisk.ReadLog(); // Should be able to read log.
{
// Should fail on writing log.
auto evLog = MakeHolder<NPDisk::TEvLog>(vdisk.PDiskParams->Owner, vdisk.PDiskParams->OwnerRound, 0, TRcBuf(PrepareData(1)),
TLsnSeg(), nullptr);
auto res = testCtx.TestResponse<NPDisk::TEvLogResult>(evLog.Release(), NKikimrProto::CORRUPTED);

UNIT_ASSERT_STRING_CONTAINS(res->ErrorReason, "PDisk is in read-only mode");
}
{
// Should fail on reserving chunk.
auto res = testCtx.TestResponse<NPDisk::TEvChunkReserveResult>(
new NPDisk::TEvChunkReserve(vdisk.PDiskParams->Owner, vdisk.PDiskParams->OwnerRound, 1),
NKikimrProto::CORRUPTED);
}

UNIT_ASSERT_STRING_CONTAINS(res->ErrorReason, "PDisk is in read-only mode");
}
{
// Should fail on writing chunk.
TString chunkWriteData = PrepareData(1);
auto counter = MakeIntrusive<::NMonitoring::TCounterForPtr>();
TMemoryConsumer consumer(counter);
TTrackableBuffer buffer(std::move(consumer), chunkWriteData.data(), chunkWriteData.size());
auto res = testCtx.TestResponse<NPDisk::TEvChunkWriteResult>(
new NPDisk::TEvChunkWrite(vdisk.PDiskParams->Owner, vdisk.PDiskParams->OwnerRound,
0, 0, new NPDisk::TEvChunkWrite::TBufBackedUpParts(std::move(buffer)), nullptr, false, 0),
NKikimrProto::CORRUPTED);
template <class Request, class Response, NKikimrProto::EReplyStatus ExpectedStatus = NKikimrProto::CORRUPTED, class... Args>
auto CheckReadOnlyRequest(Args&&... args) {
return [args = std::make_tuple(std::forward<Args>(args)...)](TActorTestContext& testCtx) {
Request* req = std::apply([](auto&&... unpackedArgs) {
return new Request(std::forward<decltype(unpackedArgs)>(unpackedArgs)...);
}, args);

THolder<Response> res = testCtx.TestResponse<Response>(req);

UNIT_ASSERT_VALUES_EQUAL(res->Status, ExpectedStatus);
UNIT_ASSERT_STRING_CONTAINS(res->ErrorReason, "PDisk is in read-only mode");
};
}

Y_UNIT_TEST(ReadOnlyPDiskEvents) {
TActorTestContext testCtx{{}};
TVDiskMock vdisk(&testCtx);
vdisk.InitFull();
vdisk.SendEvLogSync();

auto cfg = testCtx.GetPDiskConfig();
cfg->ReadOnly = true;
testCtx.UpdateConfigRecreatePDisk(cfg);

vdisk.Init();
vdisk.ReadLog();

std::vector<std::function<void(TActorTestContext&)>> eventSenders = {
// Should fail on writing log. (ERequestType::RequestLogWrite)
CheckReadOnlyRequest<NPDisk::TEvLog, NPDisk::TEvLogResult>(
vdisk.PDiskParams->Owner, vdisk.PDiskParams->OwnerRound, 0,
TRcBuf(PrepareData(1)), TLsnSeg(), nullptr
),
// Should fail on writing chunk. (ERequestType::RequestChunkWrite)
CheckReadOnlyRequest<NPDisk::TEvChunkWrite, NPDisk::TEvChunkWriteResult>(
vdisk.PDiskParams->Owner, vdisk.PDiskParams->OwnerRound,
0, 0, nullptr, nullptr, false, 0
),
// Should fail on reserving chunk. (ERequestType::RequestChunkReserve)
CheckReadOnlyRequest<NPDisk::TEvChunkReserve, NPDisk::TEvChunkReserveResult>(
vdisk.PDiskParams->Owner, vdisk.PDiskParams->OwnerRound, 1
),
// Should fail on chunk lock. (ERequestType::RequestChunkLock)
CheckReadOnlyRequest<NPDisk::TEvChunkLock, NPDisk::TEvChunkLockResult>(
NPDisk::TEvChunkLock::ELockFrom::LOG, 0, NKikimrBlobStorage::TPDiskSpaceColor::YELLOW
),
// Should fail on chunk unlock. (ERequestType::RequestChunkUnlock)
CheckReadOnlyRequest<NPDisk::TEvChunkUnlock, NPDisk::TEvChunkUnlockResult>(
NPDisk::TEvChunkLock::ELockFrom::LOG
),
// Should fail on chunk forget. (ERequestType::RequestChunkForget)
CheckReadOnlyRequest<NPDisk::TEvChunkForget, NPDisk::TEvChunkForgetResult>(
vdisk.PDiskParams->Owner, vdisk.PDiskParams->OwnerRound + 1
),
// Should fail on harakiri. (ERequestType::RequestHarakiri)
CheckReadOnlyRequest<NPDisk::TEvHarakiri, NPDisk::TEvHarakiriResult>(
vdisk.PDiskParams->Owner, vdisk.PDiskParams->OwnerRound + 1
),
// Should fail on slaying vdisk. (ERequestType::RequestYardSlay)
CheckReadOnlyRequest<NPDisk::TEvSlay, NPDisk::TEvSlayResult, NKikimrProto::NOTREADY>(
vdisk.VDiskID, vdisk.PDiskParams->OwnerRound + 1, 0, 0
)
};

for (auto sender : eventSenders) {
sender(testCtx);
}
}
}
Expand Down
9 changes: 9 additions & 0 deletions ydb/core/blobstorage/pdisk/mock/pdisk_mock.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -447,8 +447,17 @@ class TPDiskMockActor : public TActorBootstrapped<TPDiskMockActor> {
void Handle(NPDisk::TEvSlay::TPtr ev) {
auto *msg = ev->Get();
PDISK_MOCK_LOG(INFO, PDM17, "received TEvSlay", (Msg, msg->ToString()));

auto res = std::make_unique<NPDisk::TEvSlayResult>(NKikimrProto::OK, GetStatusFlags(), msg->VDiskId,
msg->SlayOwnerRound, msg->PDiskId, msg->VSlotId, TString());

if (Impl.IsDiskReadOnly) {
res->Status = NKikimrProto::NOTREADY;
res->ErrorReason = "PDisk is in read-only mode";
Send(ev->Sender, res.release());
return;
}

bool found = false;
for (auto& [ownerId, owner] : Impl.Owners) {
if (!owner.VDiskId.SameExceptGeneration(msg->VDiskId)) {
Expand Down
Loading

0 comments on commit 2876ba4

Please sign in to comment.