Skip to content

Commit

Permalink
Updates from main (pre-refactor) merged to refactored version.
Browse files Browse the repository at this point in the history
  • Loading branch information
kssheridan committed Aug 27, 2024
1 parent 002b194 commit 452f089
Show file tree
Hide file tree
Showing 7 changed files with 204 additions and 127 deletions.
14 changes: 5 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,5 @@
# Description
Memory analysis tool for finding gather / scatter (gs) accesses from DynamoRio & NVBit traces.
gs_patterns discovers gather/scatters from analyzing access patterns in memory traces (doesn't just look for gs instructions). gs_patterns writes the "subtraces" to a binary trace and spatter yaml format.
The source lines of the top aggressors are reported.

For CPU applications use the provided pin client in pin_tracing folder (or DynamoRio). Pin tends to be more reliable for larger applications.
Memory analysis tool for finding nontrivial gather / scatter (g/s) accesses from DynamoRio & NVBit traces. gs_patterns writes the subtraces to binary traces and a spatter yaml formatted file. The source lines of the top aggressors are reported. Use the provided pin clients in the pin_tracing folder or use DynamoRio. Pin tends to be more reliable for larger applications.

For CUDA kernels use the provided nvbit client in the nvbit_tracing folder.

Expand Down Expand Up @@ -31,16 +27,16 @@ gs_pattern <pin_trace.gz> <binary>
gs_pattern <nvbit_trace.gz> -nv
```

Trace file should be gzipped. For Pin or DynamoRio, Binary file should be compiled with symbols turned on (-g).
Trace file should be gzipped (not tar + gz). For Pin or DynamoRio, binary file should be compiled with symbols turned on (-g).

For NVBit tracing the kernel must be compiled with line numbers (--generate-line-info). Please see nvbit_tracing/README.md for detailed information on how to extract traces for CUDA kernels which are compatible with gs_patterns.

# How gs_patterns works
g/s accesses are found by looking at repeated instruction addresses (loops) that are memory instructions (scalar and vector).
The first pass finds the top g/s's. The second pass focuses on the top g/s accesses and records the normalized address distances to a binary file and spatter yaml.
g/s accesses are found by looking at repeated instruction addresses (loops) that are memory instructions (scalar and vector). The first pass finds the top g/s's and filters out instructions with trivial access patterns. The second pass focuses on the top g/s accesses and records the normalized address array indices to a binary file and spatter yaml file.

# License
BSD-3 License. See [the LICENSE file](https://github.com/lanl/gs_patterns/blob/main/LICENSE).

# Author
# Authors
Kevin Sheridan, <kss@lanl.gov>
Christopher Scott, <christopher.scott@gatech.edu>
38 changes: 25 additions & 13 deletions gs_patterns.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,31 +6,37 @@
#include <cstring>
#include <vector>

#define MAX(X, Y) (((X) < (Y)) ? Y : X)
#define MIN(X, Y) (((X) > (Y)) ? Y : X)
#define ABS(X) (((X) < 0) ? (-1) * (X) : X)
//symbol lookup options
#if !defined(SYMBOLS_ONLY)
#define SYMBOLS_ONLY 1 //Filter out instructions that have no symbol
#endif

//triggers
#define SAMPLE 0
#define PERSAMPLE 10000000
//#define PERSAMPLE 1000

//info
#define CLSIZE (64)
#define NBUFS (1LL<<10)
#define IWINDOW (1024)
#define NGS (8096)
#define CLSIZE (64) //cacheline bytes
#define NBUFS (1LL<<10) //trace reading buffer size
#define IWINDOW (1024) //number of iaddrs per window
#define NGS (8096) //max number for gathers and scatters
#define OBOUNDS (512) //histogram positive max
#define OBOUNDS_ALLOC (2*OBOUNDS + 3)

//patterns
#define USTRIDES 1024 //Threshold for number of accesses
#define NSTRIDES 15 //Threshold for number of unique distances
#define OUTTHRESH (0.5) //Threshold for percentage of distances at boundaries of histogram
#define NTOP (10)
#define NTOP (10) //Final gather / scatters to keep
#define INITIAL_PSIZE (1<<15)
#define MAX_PSIZE (1<<30)
#define MAX_PSIZE (1<<30) //Max number of indices recorded per gather/scatter

#define MAX_LINE_LENGTH 1024

#if !defined(VBITS)
# define VBITS (512L)
# define VBYTES (VBITS/8)
#endif

namespace gs_patterns
{
typedef uintptr_t addr_t;
Expand Down Expand Up @@ -160,12 +166,18 @@ namespace gs_patterns
std::string type_as_string() { return !_mType ? "GATHER" : "SCATTER"; }
std::string getName() { return !_mType ? "Gather" : "Scatter"; }
std::string getShortName() { return !_mType ? "G" : "S"; }
std::string getShortNameLower() { return !_mType ? "g" : "s"; }

auto get_srcline() { return srcline[_mType]; }

int ntop = 0;
int64_t iaddrs_nosym = 0;
int64_t indices_nosym = 0;
int64_t iaddrs_sym = 0;
int64_t indices_sym = 0;
double cnt = 0.0;
int offset[NTOP] = {0};
int size[NTOP] = {0};

addr_t tot[NTOP] = {0};
addr_t top[NTOP] = {0};
Expand Down Expand Up @@ -201,8 +213,8 @@ namespace gs_patterns

private:
addr_t (*iaddrs)[NGS] = new addr_t[2][NGS];
int64_t (*icnt)[NGS] = new int64_t[2][NGS];
int64_t (*occ)[NGS] = new int64_t[2][NGS];
int64_t (*icnt)[NGS] = new int64_t[2][NGS]; //vector instances
int64_t (*occ)[NGS] = new int64_t[2][NGS]; //load/store instances

mem_access_type _mType;
};
Expand Down
147 changes: 90 additions & 57 deletions gs_patterns_core.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,11 @@ namespace gs_patterns_core

//Create stride histogram and create spatter
int sidx;
int firstgs = 1;
int unique_strides;
int64_t n_stride[1027];
// double outbounds;
int64_t hbin = 0;
int64_t n_stride[OBOUNDS_ALLOC];
double outbounds;

if (file_prefix.empty()) throw GSFileError ("Empty file prefix provided.");

Expand All @@ -60,73 +62,99 @@ namespace gs_patterns_core
printf("***************************************************************************************\n");

unique_strides = 0;
for (j = 0; j < 1027; j++)
for (j = 0; j < OBOUNDS_ALLOC; j++)
n_stride[j] = 0;

for (j = 1; j < target_metrics.offset[i]; j++) {
sidx = target_metrics.patterns[i][j] - target_metrics.patterns[i][j - 1] + 513;
sidx = target_metrics.patterns[i][j] - target_metrics.patterns[i][j - 1] + OBOUNDS + 1;
sidx = (sidx < 1) ? 0 : sidx;
sidx = (sidx > 1025) ? 1026 : sidx;
sidx = (sidx > OBOUNDS_ALLOC - 1) ? OBOUNDS_ALLOC - 1 : sidx;
n_stride[sidx]++;
}

for (j = 0; j < 1027; j++) {
for (j = 0; j < OBOUNDS_ALLOC; j++) {
if (n_stride[j] > 0) {
unique_strides++;
}
}

//outbounds = (double) (n_stride[0] + n_stride[1026]) / (double) target_metrics.offset[i];
outbounds = (double) (n_stride[0] + n_stride[OBOUNDS_ALLOC-1]) / (double) target_metrics.offset[i];

//if ( ( (unique_strides > NSTRIDES) || (outbounds > OUTTHRESH) ) && (gather_offset[i] > USTRIDES ) ){
if (true) {
if (((unique_strides > NSTRIDES) || (outbounds > OUTTHRESH) && (target_metrics.offset[i] > USTRIDES ) )) {
//if (true) {

if (firstgs) {
firstgs = 0;
printf("***************************************************************************************\n");
printf("%sS\n", target_metrics.type_as_string().c_str());
}
printf("***************************************************************************************\n");
//create a binary file
FILE *fp_bin;
std::string bin_name = file_prefix + ".sbin";
printf("%s\n", bin_name.c_str());
fp_bin = fopen(bin_name.c_str(), "w");
if (NULL == fp_bin) {
throw GSFileError("Could not open " + std::string(bin_name) + "!");
}
FILE * fp_bin;

char bin_name[1024];
sprintf(bin_name, "%s.%s.%03d.%02dB.sbin", file_prefix.c_str(), target_metrics.getShortNameLower().c_str(), \
i, target_metrics.size[i]);
printf("%s\n", bin_name);
//std::string bin_name = \
// file_prefix + "." + target_metrics.getShortNameLower().c_str() + "." + std::to_string(i) + "." + \
// std::to_string(target_metrics.size[i]) + "B.sbin";

fp_bin = fopen(bin_name, "w");
if (NULL == fp_bin)
throw GSFileError("Could not open " + std::string(bin_name) + "!");

printf("%sIADDR -- %p\n", target_metrics.getShortName().c_str(), (void*) target_metrics.top[i]);
printf("SRCLINE -- %s\n", target_metrics.get_srcline()[target_metrics.top_idx[i]]);
printf("%s %c -- %6.3f%c (512-bit chunks)\n", target_metrics.type_as_string().c_str(),
'%', 100.0 * (double) target_metrics.tot[i] / target_metrics.cnt, '%');
printf("NDISTS -- %ld\n", (long int)target_metrics.offset[i]);

printf("GATHER %c -- %6.3f%c (%4ld-bit chunks)\n",
'%', 100.0 * (double) target_metrics.tot[i] / target_metrics.cnt, '%', VBITS);
printf("DTYPE -- %d bytes\n", target_metrics.size[i]);
printf("NINDICES -- %d\n", target_metrics.offset[i]);
printf("INDICES:\n");

int64_t nlcnt = 0;
for (j = 0; j < target_metrics.offset[i]; j++) {

if (j < 39) {
printf("%10ld ", target_metrics.patterns[i][j]);
fflush(stdout);
if (0 == (++nlcnt % 13))
printf("\n");

} else if (j >= (target_metrics.offset[i] - 39)) {
printf("%10ld ", target_metrics.patterns[i][j]);
fflush(stdout);
if (0 == (++nlcnt % 13))
printf("\n");

} else if (39 == j)
printf("...\n");
if (j <= 49) {
printf("%10ld ", target_metrics.patterns[i][j]);
fflush(stdout);
if (( ++nlcnt % 10) == 0)
printf("\n");
} else if (j >= (target_metrics.offset[i] - 50)) {
printf("%10ld ", target_metrics.patterns[i][j]);
fflush(stdout);
if (( ++nlcnt % 10) == 0)
printf("\n");
} else if (j == 50)
printf("...\n");
}
printf("\n");
printf("DIST HISTOGRAM --\n");

for (j = 0; j < 1027; j++) {
if (n_stride[j] > 0) {
if (0 == j)
printf("%6s: %ld\n", "< -512", n_stride[j]);
else if (1026 == j)
printf("%6s: %ld\n", "> 512", n_stride[j]);
else
printf("%6d: %ld\n", j - 513, n_stride[j]);
}
}
hbin = 0;
for(j=0; j<OBOUNDS_ALLOC; j++) {

if (j == 0) {
printf("( -inf, %5ld]: %ld\n", (int64_t)(-(VBITS+1)), n_stride[j]);
hbin = 0;

} else if (j == OBOUNDS +1) {
printf("[%5ld, 0): %ld\n", (int64_t)-VBITS, hbin);
hbin = 0;

} else if (j == (OBOUNDS_ALLOC-2) ) {
printf("[ 0, %5ld]: %ld\n", VBITS, hbin);
hbin = 0;

} else if (j == (OBOUNDS_ALLOC-1)) {
printf("[%5ld, inf): %ld\n", VBITS+1, n_stride[j]);

} else {
hbin += n_stride[j];
}
}

if (first_spatter) {
first_spatter = false;
Expand All @@ -143,10 +171,13 @@ namespace gs_patterns_core
fprintf(fp, "%ld", target_metrics.patterns[i][target_metrics.offset[i] - 1]);
fprintf(fp, "], \"count\":1}");

fprintf(fp2, "%s,%s,%ld,%6.3f\n",
target_metrics.get_srcline()[target_metrics.top_idx[i]], target_metrics.getShortName().c_str(),
(long int)target_metrics.offset[i],
100.0 * (double) target_metrics.tot[i] / target_metrics.cnt);
fprintf(fp2, "0x%lx,%s,%d,%s,%d,%6.3f\n",
target_metrics.top[i],
target_metrics.get_srcline()[target_metrics.top_idx[i]],
target_metrics.size[i],
target_metrics.getShortName().c_str(),
target_metrics.offset[i],
100.0 * (double) target_metrics.tot[i] / target_metrics.cnt);
}
printf("***************************************************************************************\n\n");
}
Expand All @@ -159,17 +190,15 @@ namespace gs_patterns_core
for (int i = 0; i < target_metrics.ntop; i++) {

//Find smallest
smallest = 0;
for (int j = 0; j < target_metrics.offset[i]; j++) {
smallest = 0x7FFFFFFFFFFFFFFFL;
for (int j = 0; j < target_metrics.offset[i]; j++) {
if (target_metrics.patterns[i][j] < smallest)
smallest = target_metrics.patterns[i][j];
}

smallest *= -1;

//Normalize
for (int j = 0; j < target_metrics.offset[i]; j++) {
target_metrics.patterns[i][j] += smallest;
target_metrics.patterns[i][j] -= smallest;
}
}
}
Expand Down Expand Up @@ -255,9 +284,6 @@ namespace gs_patterns_core
}

if ((++mcnt % PERSAMPLE) == 0) {
#if SAMPLE
break;
#endif
printf(".");
fflush(stdout);
}
Expand All @@ -270,6 +296,9 @@ namespace gs_patterns_core
//found it
if (iaddr == gather_metrics.top[i])
{

gather_metrics.size[i] = ia.get_size();

if (gather_base[i] == 0)
gather_base[i] = maddr;

Expand All @@ -278,6 +307,7 @@ namespace gs_patterns_core
if (!gather_metrics.grow(i)) {
printf("WARNING: Unable to increase PSIZE. Truncating trace...\n");
breakout = true;
break;
}
}
gather_metrics.patterns[i][gather_metrics.offset[i]++] = (int64_t) (maddr - gather_base[i]);
Expand All @@ -293,6 +323,8 @@ namespace gs_patterns_core
//found it
if (iaddr == scatter_metrics.top[i])
{
scatter_metrics.size[i] = ia.get_size();

//set base
if (scatter_base[i] == 0)
scatter_base[i] = maddr;
Expand All @@ -302,6 +334,7 @@ namespace gs_patterns_core
if (!scatter_metrics.grow(i)) {
printf("WARNING: Unable to increase PSIZE. Truncating trace...\n");
breakout = true;
break;
}
}
scatter_metrics.patterns[i][scatter_metrics.offset[i]++] = (int64_t) (maddr - scatter_base[i]);
Expand Down
Loading

0 comments on commit 452f089

Please sign in to comment.