From 1ddd52ff8d6c67e70abeec56fece198bc0eadfc2 Mon Sep 17 00:00:00 2001 From: Angshuman Parashar Date: Tue, 19 Sep 2023 19:30:34 -0400 Subject: [PATCH] Fix post-processing bug that was destroying network stats (incl. distributed stats) for outermost masked levels. --- src/loop-analysis/tiling.cpp | 46 ++++++++++++++++++++++++++++++++---- 1 file changed, 41 insertions(+), 5 deletions(-) diff --git a/src/loop-analysis/tiling.cpp b/src/loop-analysis/tiling.cpp index 8a5b7973..05ed9f30 100644 --- a/src/loop-analysis/tiling.cpp +++ b/src/loop-analysis/tiling.cpp @@ -179,6 +179,12 @@ void MaskTiles(std::vector& tile_nest, std::bitset& tile_nest, std::bit // Work on all outermost masked levels until we find an unmasked level. if (!mask[cur]) { - // Blow up *all* stats (including network stats). + // Blow up all stats (including network stats), *except* distributed + // network stats (because distributed network traffic is transferred + // on behalf of the child level. + auto saved_distributed_stats = tile_nest[cur].distributed_access_stats; tile_nest[cur].Reset(); + tile_nest[cur].distributed_access_stats = saved_distributed_stats; } else { @@ -845,6 +855,33 @@ ComputeNest CollapseComputeNest(analysis::CompoundComputeNest& tiles, int num_ti return solution; } +void PrintTiles(std::vector& tile_nest) +{ + int num_tiling_levels = tile_nest.size(); + + for (int cur = 0; cur < num_tiling_levels; cur++) + { + auto& tile = tile_nest[cur]; + + std::cout << "Level " << cur << std::endl; + std::cout << " size = " << tile.size << std::endl; + std::cout << " ca = " << tile.content_accesses << std::endl; + std::cout << " access[" << tile.access_stats.stats.size() << "]:\n"; + for (auto& x: tile.access_stats.stats) + { + if (x.second.accesses > 0) + std::cout << " [" << x.first.first << "," << x.first.second << "]: accesses = " + << x.second.accesses << " hops = " << x.second.hops << std::endl; + } + std::cout << " dist access[" << tile.access_stats.stats.size() << "]:\n"; + for (auto& x: tile.distributed_access_stats.stats) + { + if (x.second.accesses > 0) + std::cout << " [" << x.first.first << "," << x.first.second << "]: accesses = " + << x.second.accesses << " hops = " << x.second.hops << std::endl; + } + } +} // Collapse tiles into a given number of levels. // Input and output are both arrays of tile nests, @@ -945,6 +982,9 @@ CompoundDataMovementNest CollapseDataMovementNest(analysis::CompoundDataMovement // Calculate share of parent accesses (used to compute fills). ComputeParentAccessShare(solution[pv]); + // Perform distributed-multicast if supported. + DistributeTiles(solution[pv], distribution_supported[pv]); + // Mask each solution according to the provided bit mask. MaskTiles(solution[pv], tile_mask[pv]); @@ -954,9 +994,6 @@ CompoundDataMovementNest CollapseDataMovementNest(analysis::CompoundDataMovement // Set backing storage fill to zero place holder ResetBackingStorageFillsPlaceHolder(solution[pv]); - // Perform distributed-multicast if supported. - DistributeTiles(solution[pv], distribution_supported[pv]); - // Calculate the extra accesses and fills due to link transfers ComputePeerAccesses(solution[pv]); @@ -969,7 +1006,6 @@ CompoundDataMovementNest CollapseDataMovementNest(analysis::CompoundDataMovement // Find the parent and child levels for later compression/decompression logic SetParentLevel(solution[pv]); SetChildLevel(solution[pv]); - } // flip the workload tensor set flag if necessary