From f1120f06a7ea7fb6cf897df0a71329b3e54c3030 Mon Sep 17 00:00:00 2001 From: Anthony Date: Tue, 29 Oct 2024 20:23:51 -0400 Subject: [PATCH 1/2] CAT: New instruction benchmarks for FMA and Int. --- src/counter_analysis_toolkit/Makefile | 2 +- src/counter_analysis_toolkit/instructions.c | 343 ++++++++++++++++++++ 2 files changed, 344 insertions(+), 1 deletion(-) diff --git a/src/counter_analysis_toolkit/Makefile b/src/counter_analysis_toolkit/Makefile index 608c33a03..ed65548af 100644 --- a/src/counter_analysis_toolkit/Makefile +++ b/src/counter_analysis_toolkit/Makefile @@ -103,7 +103,7 @@ icache_seq_kernel_0.o: icache_seq.c icache_seq.h rm icache_seq_kernel_0.o instr: instructions.c instr.h - -$(CC) -c $(CFLAGS) $(OPT2) -ftree-vectorize $(INSTR) $(INCFLAGS) instructions.c -o instructions.o + -$(CC) -c $(CFLAGS) $(OPT2) -ftree-vectorize $(FLOP) $(INSTR) $(INCFLAGS) instructions.c -o instructions.o weak_symbols.o: weak_symbols.c vec.h -$(CC) -c $(CFLAGS) weak_symbols.c diff --git a/src/counter_analysis_toolkit/instructions.c b/src/counter_analysis_toolkit/instructions.c index 80edf0df8..7a021b198 100644 --- a/src/counter_analysis_toolkit/instructions.c +++ b/src/counter_analysis_toolkit/instructions.c @@ -115,6 +115,234 @@ void test_int_add(int p, int M, int N, int EventSet, FILE *fp){ } +void test_int_add_max(int p, int M, int N, int EventSet, FILE *fp){ + int ret; + long long int ev_values[2]; + int i32_00, i32_01, i32_02, i32_03, i32_04, i32_05, i32_06, i32_07; + int i32_08, i32_09, i32_10, i32_11; + int i32_100, i32_101, i32_102; + + /* Initialize the variables with values that the compiler cannot guess. */ + i32_00 = 2*p; + i32_01 = -p/3; + i32_02 = p/4; + i32_03 = -p/5; + i32_04 = p/6; + i32_05 = -p/7; + i32_06 = p/8; + i32_07 = -p/9; + i32_08 = 1+p/2; + i32_09 = 1-p/2; + i32_10 = 1+p/3; + i32_11 = 1-p/3; + + i32_100 = 17; + i32_101 = -18; + i32_102 = 12; + + // Start the counters. + ret = PAPI_start(EventSet); + if ( PAPI_OK != ret ) { + fprintf(stderr, "PAPI_start() error: %s\n", PAPI_strerror(ret)); + // If we can't measure events, no need to run the kernel. + goto clean_up; + } + + if( p == 12345678 ){ + p /= 2; + i32_100 *= 13; + i32_101 *= 12; + i32_102 *= 11; + }else{ + // Almost certainly this is what will execute and all variables will + // end up with the value zero, but the compiler doesn't know that. + i32_100 /= i32_00+16; + i32_101 /= i32_00+17; + i32_102 /= i32_00+11; + } + +#define I32_ADDS(_X) {i32_00 += _X; i32_01 += _X; i32_02 += _X; i32_03 += _X; i32_04 += _X; i32_05 += _X; i32_06 += _X; i32_07 += _X; i32_08 += _X; i32_09 += _X; i32_10 += _X; i32_11 += _X;} + + for(int i=0; i 100 ){ + I32_ADDS(i32_07); + } + } + } + + ret = PAPI_stop(EventSet, ev_values); + if ( PAPI_OK != ret ) { + fprintf(stderr, "PAPI_stop() error: %s\n", PAPI_strerror(ret)); + // If we can't measure events, no need to print anything. + goto clean_up; + } + fprintf(fp, "%d %lld # INT_ADD_count_ILP12: %lld (%.3lf)\n", N, ev_values[0], 12LL*3LL*N*M, (double)ev_values[0]/(12.0*3.0*N*M)); + + sum_i32 += i32_00 + i32_01 + i32_02 + i32_03 + i32_04 + i32_05 + i32_06 + i32_07; + sum_i32 += i32_08 + i32_09 + i32_10 + i32_11; + +clean_up: + + return; +} + + +void test_int_mul_max(int p, int M, int N, int EventSet, FILE *fp){ + int ret; + long long int ev_values[2]; + int i32_00, i32_01, i32_02, i32_03, i32_04, i32_05, i32_06, i32_07; + int i32_08, i32_09, i32_10, i32_11; + int i32_100, i32_101, i32_102; + + /* Initialize the variables with values that the compiler cannot guess. */ + i32_00 = 2*p; + i32_01 = -p/3; + i32_02 = p/4; + i32_03 = -p/5; + i32_04 = p/6; + i32_05 = -p/7; + i32_06 = p/8; + i32_07 = 1/p; + i32_08 = 1+p/2; + i32_09 = 1-p/2; + i32_10 = 1+p/3; + i32_11 = 1-p/3; + + i32_100 = 17; + i32_101 = -18; + i32_102 = 12; + + // Start the counters. + ret = PAPI_start(EventSet); + if ( PAPI_OK != ret ) { + fprintf(stderr, "PAPI_start() error: %s\n", PAPI_strerror(ret)); + // If we can't measure events, no need to run the kernel. + goto clean_up; + } + + if( p == 12345678 ){ + p /= 2; + i32_100 *= 13; + i32_101 *= 12; + i32_102 *= 11; + }else{ + // Almost certainly this is what will execute and all variables will + // end up with the value one, but the compiler doesn't know that. + i32_100 = 1 + i32_100 / (i32_00+16); + i32_101 = 1 + i32_101 / (i32_00+17); + i32_102 = 1 + i32_102 / (i32_00+11); + } + +#define I32_MULS(_X) {i32_00 *= _X; i32_01 *= _X; i32_02 *= _X; i32_03 *= _X; i32_04 *= _X; i32_05 *= _X; i32_06 *= _X; i32_07 *= _X; i32_08 *= _X; i32_09 *= _X; i32_10 *= _X; i32_11 *= _X;} + + for(int i=0; i 100 ){ + I32_MULS(i32_07); + } + } + } + + ret = PAPI_stop(EventSet, ev_values); + if ( PAPI_OK != ret ) { + fprintf(stderr, "PAPI_stop() error: %s\n", PAPI_strerror(ret)); + // If we can't measure events, no need to print anything. + goto clean_up; + } + fprintf(fp, "%d %lld # INT_MUL_count_ILP12: %lld (%.3lf)\n", N, ev_values[0], 12LL*3LL*N*M, (double)ev_values[0]/(12.0*3.0*N*M)); + + sum_i32 += i32_00 + i32_01 + i32_02 + i32_03 + i32_04 + i32_05 + i32_06 + i32_07; + sum_i32 += i32_08 + i32_09 + i32_10 + i32_11; + +clean_up: + + return; +} + + +void test_int_div_max(int p, int M, int N, int EventSet, FILE *fp){ + int ret; + long long int ev_values[2]; + int i32_00, i32_01, i32_02, i32_03, i32_04, i32_05, i32_06, i32_07; + int i32_08, i32_09, i32_10, i32_11; + int i32_100, i32_101, i32_102; + + /* Initialize the variables with values that the compiler cannot guess. */ + i32_00 = 2*p; + i32_01 = -p/3; + i32_02 = p/4; + i32_03 = -p/5; + i32_04 = p/6; + i32_05 = -p/7; + i32_06 = p/8; + i32_07 = 1+1/p; + i32_08 = 1+p/2; + i32_09 = 1-p/2; + i32_10 = 1+p/3; + i32_11 = 1-p/3; + + i32_100 = 17; + i32_101 = -18; + i32_102 = 12; + + // Start the counters. + ret = PAPI_start(EventSet); + if ( PAPI_OK != ret ) { + fprintf(stderr, "PAPI_start() error: %s\n", PAPI_strerror(ret)); + // If we can't measure events, no need to run the kernel. + goto clean_up; + } + + if( p == 12345678 ){ + p /= 2; + i32_100 *= 13; + i32_101 *= 12; + i32_102 *= 11; + }else{ + // Almost certainly this is what will execute and all variables will + // end up with the value one, but the compiler doesn't know that. + i32_100 = 1 + i32_100 / (i32_00+16); + i32_101 = 1 + i32_101 / (i32_00+17); + i32_102 = 1 + i32_102 / (i32_00+11); + } + +#define I32_DIVS(_X) {i32_00 /= _X; i32_01 /= _X; i32_02 /= _X; i32_03 /= _X; i32_04 /= _X; i32_05 /= _X; i32_06 /= _X; i32_07 /= _X; i32_08 /= _X; i32_09 /= _X; i32_10 /= _X; i32_11 /= _X;} + + for(int i=0; i 100 ){ + I32_DIVS(i32_07); + } + } + } + + ret = PAPI_stop(EventSet, ev_values); + if ( PAPI_OK != ret ) { + fprintf(stderr, "PAPI_stop() error: %s\n", PAPI_strerror(ret)); + // If we can't measure events, no need to print anything. + goto clean_up; + } + fprintf(fp, "%d %lld # INT_DIV_count_ILP12: %lld (%.3lf)\n", N, ev_values[0], 12LL*3LL*N*M, (double)ev_values[0]/(12.0*3.0*N*M)); + + sum_i32 += i32_00 + i32_01 + i32_02 + i32_03 + i32_04 + i32_05 + i32_06 + i32_07; + sum_i32 += i32_08 + i32_09 + i32_10 + i32_11; + +clean_up: + + return; +} + + //////////////////////////////////////////////////////////////////////////////// // f64 ADDITION @@ -889,6 +1117,77 @@ void test_f64_div_max(int p, int M, int N, int EventSet, FILE *fp){ return; } + +void test_f64_fma_max(int p, int M, int N, int EventSet, FILE *fp){ + int ret; + long long int ev_values[2]; + double f64_00, f64_01, f64_02, f64_03, f64_04, f64_05, f64_06, f64_07; + double f64_08, f64_09, f64_10, f64_11; + double f64_100, f64_101, f64_102; + double f64_B; + + /* Initialize the variables with values that the compiler cannot guess. */ + f64_00 = p/431.2; + f64_01 = -p/431.3; + f64_02 = p/431.4; + f64_03 = -p/431.5; + f64_04 = p/431.6; + f64_05 = -p/431.7; + f64_06 = p/431.8; + f64_07 = -p/431.9; + f64_08 = p/432.0; + f64_09 = -p/432.1; + f64_10 = p/432.2; + f64_11 = -p/432.3; + + f64_100 = 1.00001; + f64_101 = -1.00002; + f64_102 = 1.00003; + + // Start the counters. + ret = PAPI_start(EventSet); + if ( PAPI_OK != ret ) { + fprintf(stderr, "PAPI_start() error: %s\n", PAPI_strerror(ret)); + // If we can't measure events, no need to run the kernel. + goto clean_up; + } + + if( p != 12345678 ){ + f64_100 /= 1.000045; + f64_101 /= 1.000054; + f64_102 /= 1.000067; + } + f64_B = f64_100/34567.8; + +#define F64_FMAS(_A,_B) {f64_00 = _A*f64_00+_B; f64_01 = _A*f64_01+_B; f64_02 = _A*f64_02+_B; f64_03 = _A*f64_03+_B; f64_04 = _A*f64_04+_B; f64_05 = _A*f64_05+_B; f64_06 = _A*f64_06+_B; f64_07 = _A*f64_07+_B; f64_08 = _A*f64_08+_B; f64_09 = _A*f64_09+_B; f64_10 = _A*f64_10+_B; f64_11 = _A*f64_11+_B;} + + for(int i=0; i Date: Wed, 30 Oct 2024 17:28:42 -0400 Subject: [PATCH 2/2] CAT: More instruction tests and cleaner output. --- src/counter_analysis_toolkit/Makefile | 2 +- src/counter_analysis_toolkit/instructions.c | 1419 ++++++++++++++----- 2 files changed, 1052 insertions(+), 369 deletions(-) diff --git a/src/counter_analysis_toolkit/Makefile b/src/counter_analysis_toolkit/Makefile index ed65548af..45ec4ae58 100644 --- a/src/counter_analysis_toolkit/Makefile +++ b/src/counter_analysis_toolkit/Makefile @@ -162,7 +162,7 @@ vec_nonfma_dp: vec_nonfma_dp.c vec_scalar_verify.h -$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC256) vec_nonfma_dp.c -o vec_nonfma_dp-256B.o -$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC512) vec_nonfma_dp.c -o vec_nonfma_dp-512B.o -cat_collect: main.c +cat_collect: $(CC) $(CFLAGS) -fopenmp $(INCFLAGS) main.c $(wildcard *.o) -o cat_collect $(LDFLAGS) clean: diff --git a/src/counter_analysis_toolkit/instructions.c b/src/counter_analysis_toolkit/instructions.c index 7a021b198..9fd28c167 100644 --- a/src/counter_analysis_toolkit/instructions.c +++ b/src/counter_analysis_toolkit/instructions.c @@ -105,7 +105,7 @@ void test_int_add(int p, int M, int N, int EventSet, FILE *fp){ // If we can't measure events, no need to print anything. goto clean_up; } - fprintf(fp, "%d %lld # INT_ADD_count: %lld (%.3lf)\n", N, ev_values[0], 50LL*N*M, ev_values[0]/(50.0*N*M)); + fprintf(fp, "%d %lld %lld %.3lf\n", N*M, ev_values[0], 50LL*N*M, (double)ev_values[0]/(50.0*N*M)); sum_i32 += i32_00 + i32_01 + i32_02 + i32_03 + i32_04 + i32_05 + i32_06 + i32_07 + i32_08 + i32_09; @@ -180,7 +180,7 @@ void test_int_add_max(int p, int M, int N, int EventSet, FILE *fp){ // If we can't measure events, no need to print anything. goto clean_up; } - fprintf(fp, "%d %lld # INT_ADD_count_ILP12: %lld (%.3lf)\n", N, ev_values[0], 12LL*3LL*N*M, (double)ev_values[0]/(12.0*3.0*N*M)); + fprintf(fp, "%d %lld %lld %.3lf\n", N*M, ev_values[0], 12LL*3LL*N*M, (double)ev_values[0]/(12.0*3.0*N*M)); sum_i32 += i32_00 + i32_01 + i32_02 + i32_03 + i32_04 + i32_05 + i32_06 + i32_07; sum_i32 += i32_08 + i32_09 + i32_10 + i32_11; @@ -256,7 +256,7 @@ void test_int_mul_max(int p, int M, int N, int EventSet, FILE *fp){ // If we can't measure events, no need to print anything. goto clean_up; } - fprintf(fp, "%d %lld # INT_MUL_count_ILP12: %lld (%.3lf)\n", N, ev_values[0], 12LL*3LL*N*M, (double)ev_values[0]/(12.0*3.0*N*M)); + fprintf(fp, "%d %lld %lld %.3lf\n", N*M, ev_values[0], 50LL*N*M, (double)ev_values[0]/(50.0*N*M)); sum_i32 += i32_00 + i32_01 + i32_02 + i32_03 + i32_04 + i32_05 + i32_06 + i32_07; sum_i32 += i32_08 + i32_09 + i32_10 + i32_11; @@ -332,7 +332,7 @@ void test_int_div_max(int p, int M, int N, int EventSet, FILE *fp){ // If we can't measure events, no need to print anything. goto clean_up; } - fprintf(fp, "%d %lld # INT_DIV_count_ILP12: %lld (%.3lf)\n", N, ev_values[0], 12LL*3LL*N*M, (double)ev_values[0]/(12.0*3.0*N*M)); + fprintf(fp, "%d %lld %lld %.3lf\n", N*M, ev_values[0], 50LL*N*M, (double)ev_values[0]/(50.0*N*M)); sum_i32 += i32_00 + i32_01 + i32_02 + i32_03 + i32_04 + i32_05 + i32_06 + i32_07; sum_i32 += i32_08 + i32_09 + i32_10 + i32_11; @@ -344,18 +344,18 @@ void test_int_div_max(int p, int M, int N, int EventSet, FILE *fp){ //////////////////////////////////////////////////////////////////////////////// -// f64 ADDITION +// f32 ADD -void test_f64_add(int p, int M, int N, int EventSet, FILE *fp){ +void test_f32_add(int p, int M, int N, int EventSet, FILE *fp){ int ret; long long int ev_values[2]; - double f64_00, f64_01, f64_02, f64_03; + float f32_00, f32_01, f32_02, f32_03; /* Initialize the variables with values that the compiler cannot guess. */ - f64_00 = (double)p/1.02; - f64_01 = -(double)p/1.03; - f64_02 = (double)p/1.04; - f64_03 = -(double)p/1.05; + f32_00 = (float)p/1.02; + f32_01 = -(float)p/1.03; + f32_02 = (float)p/1.04; + f32_03 = -(float)p/1.05; // Start the counters. ret = PAPI_start(EventSet); @@ -365,20 +365,20 @@ void test_f64_add(int p, int M, int N, int EventSet, FILE *fp){ goto clean_up; } -#define FADD_BLOCK() {f64_01 += f64_00; f64_02 += f64_01; f64_03 += f64_02; f64_00 += f64_03;} +#define F32ADD_BLOCK() {f32_01 += f32_00; f32_02 += f32_01; f32_03 += f32_02; f32_00 += f32_03;} for(int i=0; i