Skip to content

Commit

Permalink
filestore-client findgarbage paging fix + some other improvements and…
Browse files Browse the repository at this point in the history
… cleanup (#2225)

* filestore-client findgarbage paging fix

* filestore-client - outputting garbage file sizes in the findgarbage command output; tablet - cleanup: MaxResponseBytes config param was unused, using it

* filestore-client - outputting garbage file sizes in the findgarbage command output; tablet - cleanup: MaxResponseBytes config param was unused, using it - recanon test
  • Loading branch information
qkrorlqr authored Oct 6, 2024
1 parent 741e3e7 commit fc33948
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 15 deletions.
53 changes: 47 additions & 6 deletions cloud/filestore/apps/client/lib/find_garbage.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

#include <cloud/filestore/public/api/protos/fs.pb.h>

#include <cloud/storage/core/libs/common/format.h>

#include <sys/stat.h>

namespace NCloud::NFileStore::NClient {
Expand All @@ -25,13 +27,18 @@ class TFindGarbageCommand final
{
private:
TVector<TString> Shards;
ui32 PageSize = 0;

public:
TFindGarbageCommand()
{
Opts.AddLongOption("shard")
.RequiredArgument("STR")
.AppendTo(&Shards);

Opts.AddLongOption("page-size")
.RequiredArgument("NUM")
.StoreResult(&PageSize);
}

NProto::TListNodesResponse ListAll(const TString& fsId, ui64 parentId)
Expand All @@ -43,7 +50,10 @@ class TFindGarbageCommand final
request->SetFileSystemId(fsId);
request->SetNodeId(parentId);
request->MutableHeaders()->SetDisableMultiTabletForwarding(true);
// TODO: traverse all pages
if (PageSize) {
request->SetMaxBytes(PageSize);
}
request->SetCookie(cookie);
// TODO: async listing

auto response = WaitFor(Client->ListNodes(
Expand Down Expand Up @@ -95,7 +105,10 @@ class TFindGarbageCommand final
}
}

bool Exists(const TString& fsId, ui64 parentId, const TString& name)
TMaybe<NProto::TNodeAttr> Stat(
const TString& fsId,
ui64 parentId,
const TString& name)
{
auto request = CreateRequest<NProto::TGetNodeAttrRequest>();
request->SetFileSystemId(fsId);
Expand All @@ -107,15 +120,15 @@ class TFindGarbageCommand final
std::move(request)));

if (response.GetError().GetCode() == E_FS_NOENT) {
return false;
return {};
}

Y_ENSURE_EX(
!HasError(response.GetError()),
yexception() << "GetNodeAttr error: "
<< FormatError(response.GetError()));

return true;
return std::move(*response.MutableNode());
}

bool Execute() override
Expand All @@ -138,16 +151,44 @@ class TFindGarbageCommand final
followerNames.insert(node.FollowerNodeName);
}

struct TResult
{
TString Shard;
TString Name;
ui64 Size = 0;

bool operator<(const TResult& rhs) const
{
const auto s = Max<ui64>() - Size;
const auto rs = Max<ui64>() - rhs.Size;
return std::tie(Shard, s, Name)
< std::tie(rhs.Shard, rs, rhs.Name);
}
};

TVector<TResult> results;

for (const auto& [shard, nodes]: shard2Nodes) {
for (const auto& node: nodes) {
if (!followerNames.contains(node.Name)) {
if (Exists(shard, RootNodeId, node.Name)) {
Cout << shard << "\t" << node.Name << "\n";
auto stat = Stat(shard, RootNodeId, node.Name);

if (stat) {
results.push_back({shard, node.Name, stat->GetSize()});
}
}
}
}

Sort(results.begin(), results.end());
for (const auto& result: results) {
Cout << result.Shard
<< "\t" << result.Name
<< "\t" << FormatByteSize(result.Size)
<< " (" << result.Size << ")"
<< "\n";
}

return true;
}
};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,9 @@ void TIndexTabletActor::HandleListNodes(

AddTransaction<TEvService::TListNodesMethod>(*requestInfo);

auto maxBytes = Config->GetMaxResponseEntries() * MaxName;
auto maxBytes = Min(
Config->GetMaxResponseEntries() * MaxName,
Config->GetMaxResponseBytes());
if (auto bytes = msg->Record.GetMaxBytes()) {
maxBytes = Min(bytes, maxBytes);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{}
{}
{}
fs0-shard1 garbage1_1
fs0-shard2 garbage2_1
fs0-shard2 garbage2_2
fs0-shard1 garbage1_1 9 B (9)
fs0-shard2 garbage2_3 17.92 KiB (18346)
fs0-shard2 garbage2_1 9 B (9)
fs0-shard2 garbage2_2 9 B (9)
14 changes: 10 additions & 4 deletions cloud/filestore/tests/client/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,11 @@ def test_multitablet_findgarbage():
with open(data_file, "w") as f:
f.write("some data")

big_data_file = os.path.join(common.output_path(), "big_data.txt")
with open(big_data_file, "w") as f:
for i in range(1024):
f.write("some big data %s\n" % i)

fs_id = "fs0"
shard1_id = fs_id + "-shard1"
shard2_id = fs_id + "-shard2"
Expand Down Expand Up @@ -416,14 +421,15 @@ def test_multitablet_findgarbage():
"FollowerFileSystemIds": [shard1_id, shard2_id],
})

client.write(fs_id, "/xxx", "--data", data_file)
client.write(fs_id, "/xxx1", "--data", data_file)
client.write(fs_id, "/xxx2", "--data", data_file)
# let's generate multiple "pages" for listing
for i in range(100):
client.write(fs_id, "/xxx%s" % i, "--data", data_file)
client.write(shard1_id, "/garbage1_1", "--data", data_file)
client.write(shard2_id, "/garbage2_1", "--data", data_file)
client.write(shard2_id, "/garbage2_2", "--data", data_file)
client.write(shard2_id, "/garbage2_3", "--data", big_data_file)
# TODO: teach the client to fetch shard list by itself
out += client.find_garbage(fs_id, [shard1_id, shard2_id])
out += client.find_garbage(fs_id, [shard1_id, shard2_id], page_size=1024)

client.destroy(fs_id)
client.destroy(shard1_id)
Expand Down
3 changes: 2 additions & 1 deletion cloud/filestore/tests/python/lib/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,13 +210,14 @@ def stat(self, fs, path):

return common.execute(cmd, env=self.__env, check_exit_code=self.__check_exit_code).stdout

def find_garbage(self, fs, shards):
def find_garbage(self, fs, shards, page_size):
shard_params = []
for shard in shards:
shard_params += ["--shard", shard]
cmd = [
self.__binary_path, "findgarbage",
"--filesystem", fs,
"--page-size", str(page_size),
] + shard_params + self.__cmd_opts()

return common.execute(cmd, env=self.__env, check_exit_code=self.__check_exit_code).stdout
Expand Down

0 comments on commit fc33948

Please sign in to comment.