diff --git a/cloud/filestore/apps/client/lib/find_garbage.cpp b/cloud/filestore/apps/client/lib/find_garbage.cpp index 44d966b7ce0..4edf3f4188f 100644 --- a/cloud/filestore/apps/client/lib/find_garbage.cpp +++ b/cloud/filestore/apps/client/lib/find_garbage.cpp @@ -2,6 +2,8 @@ #include +#include + #include namespace NCloud::NFileStore::NClient { @@ -25,6 +27,7 @@ class TFindGarbageCommand final { private: TVector Shards; + ui32 PageSize = 0; public: TFindGarbageCommand() @@ -32,6 +35,10 @@ class TFindGarbageCommand final Opts.AddLongOption("shard") .RequiredArgument("STR") .AppendTo(&Shards); + + Opts.AddLongOption("page-size") + .RequiredArgument("NUM") + .StoreResult(&PageSize); } NProto::TListNodesResponse ListAll(const TString& fsId, ui64 parentId) @@ -43,7 +50,10 @@ class TFindGarbageCommand final request->SetFileSystemId(fsId); request->SetNodeId(parentId); request->MutableHeaders()->SetDisableMultiTabletForwarding(true); - // TODO: traverse all pages + if (PageSize) { + request->SetMaxBytes(PageSize); + } + request->SetCookie(cookie); // TODO: async listing auto response = WaitFor(Client->ListNodes( @@ -95,7 +105,10 @@ class TFindGarbageCommand final } } - bool Exists(const TString& fsId, ui64 parentId, const TString& name) + TMaybe Stat( + const TString& fsId, + ui64 parentId, + const TString& name) { auto request = CreateRequest(); request->SetFileSystemId(fsId); @@ -107,7 +120,7 @@ class TFindGarbageCommand final std::move(request))); if (response.GetError().GetCode() == E_FS_NOENT) { - return false; + return {}; } Y_ENSURE_EX( @@ -115,7 +128,7 @@ class TFindGarbageCommand final yexception() << "GetNodeAttr error: " << FormatError(response.GetError())); - return true; + return std::move(*response.MutableNode()); } bool Execute() override @@ -138,16 +151,44 @@ class TFindGarbageCommand final followerNames.insert(node.FollowerNodeName); } + struct TResult + { + TString Shard; + TString Name; + ui64 Size = 0; + + bool operator<(const TResult& rhs) const + { + const auto s = Max() - Size; + const auto rs = Max() - rhs.Size; + return std::tie(Shard, s, Name) + < std::tie(rhs.Shard, rs, rhs.Name); + } + }; + + TVector results; + for (const auto& [shard, nodes]: shard2Nodes) { for (const auto& node: nodes) { if (!followerNames.contains(node.Name)) { - if (Exists(shard, RootNodeId, node.Name)) { - Cout << shard << "\t" << node.Name << "\n"; + auto stat = Stat(shard, RootNodeId, node.Name); + + if (stat) { + results.push_back({shard, node.Name, stat->GetSize()}); } } } } + Sort(results.begin(), results.end()); + for (const auto& result: results) { + Cout << result.Shard + << "\t" << result.Name + << "\t" << FormatByteSize(result.Size) + << " (" << result.Size << ")" + << "\n"; + } + return true; } }; diff --git a/cloud/filestore/libs/storage/tablet/tablet_actor_listnodes.cpp b/cloud/filestore/libs/storage/tablet/tablet_actor_listnodes.cpp index be446904fbd..c3d79d35abf 100644 --- a/cloud/filestore/libs/storage/tablet/tablet_actor_listnodes.cpp +++ b/cloud/filestore/libs/storage/tablet/tablet_actor_listnodes.cpp @@ -64,7 +64,9 @@ void TIndexTabletActor::HandleListNodes( AddTransaction(*requestInfo); - auto maxBytes = Config->GetMaxResponseEntries() * MaxName; + auto maxBytes = Min( + Config->GetMaxResponseEntries() * MaxName, + Config->GetMaxResponseBytes()); if (auto bytes = msg->Record.GetMaxBytes()) { maxBytes = Min(bytes, maxBytes); } diff --git a/cloud/filestore/tests/client/canondata/test.test_multitablet_findgarbage/results.txt b/cloud/filestore/tests/client/canondata/test.test_multitablet_findgarbage/results.txt index b407c121da3..c55edfbcb8d 100644 --- a/cloud/filestore/tests/client/canondata/test.test_multitablet_findgarbage/results.txt +++ b/cloud/filestore/tests/client/canondata/test.test_multitablet_findgarbage/results.txt @@ -1,6 +1,7 @@ {} {} {} -fs0-shard1 garbage1_1 -fs0-shard2 garbage2_1 -fs0-shard2 garbage2_2 +fs0-shard1 garbage1_1 9 B (9) +fs0-shard2 garbage2_3 17.92 KiB (18346) +fs0-shard2 garbage2_1 9 B (9) +fs0-shard2 garbage2_2 9 B (9) diff --git a/cloud/filestore/tests/client/test.py b/cloud/filestore/tests/client/test.py index d745af69705..a1a2b9ac57e 100644 --- a/cloud/filestore/tests/client/test.py +++ b/cloud/filestore/tests/client/test.py @@ -376,6 +376,11 @@ def test_multitablet_findgarbage(): with open(data_file, "w") as f: f.write("some data") + big_data_file = os.path.join(common.output_path(), "big_data.txt") + with open(big_data_file, "w") as f: + for i in range(1024): + f.write("some big data %s\n" % i) + fs_id = "fs0" shard1_id = fs_id + "-shard1" shard2_id = fs_id + "-shard2" @@ -416,14 +421,15 @@ def test_multitablet_findgarbage(): "FollowerFileSystemIds": [shard1_id, shard2_id], }) - client.write(fs_id, "/xxx", "--data", data_file) - client.write(fs_id, "/xxx1", "--data", data_file) - client.write(fs_id, "/xxx2", "--data", data_file) + # let's generate multiple "pages" for listing + for i in range(100): + client.write(fs_id, "/xxx%s" % i, "--data", data_file) client.write(shard1_id, "/garbage1_1", "--data", data_file) client.write(shard2_id, "/garbage2_1", "--data", data_file) client.write(shard2_id, "/garbage2_2", "--data", data_file) + client.write(shard2_id, "/garbage2_3", "--data", big_data_file) # TODO: teach the client to fetch shard list by itself - out += client.find_garbage(fs_id, [shard1_id, shard2_id]) + out += client.find_garbage(fs_id, [shard1_id, shard2_id], page_size=1024) client.destroy(fs_id) client.destroy(shard1_id) diff --git a/cloud/filestore/tests/python/lib/client.py b/cloud/filestore/tests/python/lib/client.py index e178cc235b6..8bf390071fb 100644 --- a/cloud/filestore/tests/python/lib/client.py +++ b/cloud/filestore/tests/python/lib/client.py @@ -210,13 +210,14 @@ def stat(self, fs, path): return common.execute(cmd, env=self.__env, check_exit_code=self.__check_exit_code).stdout - def find_garbage(self, fs, shards): + def find_garbage(self, fs, shards, page_size): shard_params = [] for shard in shards: shard_params += ["--shard", shard] cmd = [ self.__binary_path, "findgarbage", "--filesystem", fs, + "--page-size", str(page_size), ] + shard_params + self.__cmd_opts() return common.execute(cmd, env=self.__env, check_exit_code=self.__check_exit_code).stdout