From 12969108c1207f273b5395d0f3e5e9b26db4143b Mon Sep 17 00:00:00 2001 From: Alexander Christensen Date: Fri, 11 Aug 2023 13:16:18 -0500 Subject: [PATCH] optimizations for more efficient memory usage --- src/polychoric_matrix.c | 110 +++++++++++++++-------------------- src/xoshiro.c | 24 ++++++-- src/ziggurat.c | 125 ++++++++++++++++++++++++---------------- src/ziggurat.h | 7 +++ 4 files changed, 145 insertions(+), 121 deletions(-) diff --git a/src/polychoric_matrix.c b/src/polychoric_matrix.c index 153a462f..0f4abf54 100644 --- a/src/polychoric_matrix.c +++ b/src/polychoric_matrix.c @@ -91,9 +91,10 @@ int** joint_frequency_table( int X, Y, k; // Allocate memory space for table - int** joint_frequency = (int**) calloc(CUT, sizeof(int*)); + int** joint_frequency = (int**) malloc(CUT * sizeof(int*)); + int* joint_frequency_data = (int*) calloc(CUT * CUT, sizeof(int)); for (k = 0; k < CUT; k++) { - joint_frequency[k] = (int*) calloc(CUT, sizeof(int)); + joint_frequency[k] = &(joint_frequency_data[k * CUT]); } // Pre-compute matrix offset for i and j @@ -127,6 +128,10 @@ double** update_joint_frequency (int** joint_frequency_max, int* cat_X, int* cat // Initialize iterators int i, j; + + // Count the non-zero rows and columns + int non_zero_rows = 0; + int non_zero_columns = 0; // Initialize zero rows and columns bool* zero_rows = (bool*) calloc(CUT, sizeof(bool)); @@ -148,30 +153,17 @@ double** update_joint_frequency (int** joint_frequency_max, int* cat_X, int* cat } - // Check for zero sums + // Check for zero row sums if(row_sum == 0){ zero_rows[i] = true; + }else{ + non_zero_rows++; } + + // Check for zero column sums if(column_sum == 0){ zero_columns[i] = true; - } - - } - - // Count the non-zero rows and columns - int non_zero_rows = 0; - int non_zero_columns = 0; - - // Loop over zero vectors - for(i = 0; i < CUT; i++){ - - // Increase row count - if(!zero_rows[i]){ - non_zero_rows++; - } - - // Increase column count - if(!zero_columns[i]){ + }else{ non_zero_columns++; } @@ -179,8 +171,9 @@ double** update_joint_frequency (int** joint_frequency_max, int* cat_X, int* cat // Create new joint frequency table double** joint_frequency_trim = (double**) malloc(non_zero_rows * sizeof(double*)); - for(i = 0; i < non_zero_rows; i++) { - joint_frequency_trim[i] = (double*) calloc(non_zero_columns, sizeof(double)); + double* joint_frequency_data = (double*) calloc(non_zero_rows * non_zero_columns, sizeof(double)); + for (i = 0; i < non_zero_rows; i++) { + joint_frequency_trim[i] = &(joint_frequency_data[i * non_zero_columns]); } // Initialize row count @@ -269,9 +262,7 @@ struct ThresholdsResult thresholds(int* input_data, int rows, int i, int j, int double** joint_frequency = update_joint_frequency(joint_frequency_max, &cat_X, &cat_Y, &zero_count); // Free memory - for (k = 0; k < CUT; ++k) { - free(joint_frequency_max[k]); - } + free(joint_frequency_max[0]); free(joint_frequency_max); // Initialize added value @@ -731,9 +722,7 @@ double polychoric(int* input_data, int rows, int i, int j, int empty_method, dou ); // Free memory - for (int i = 0; i < thresholds_result.cat_X; i++) { - free(thresholds_result.joint_frequency[i]); - } + free(thresholds_result.joint_frequency[0]); free(thresholds_result.joint_frequency); free(thresholds_result.threshold_X); free(thresholds_result.threshold_Y); @@ -746,81 +735,72 @@ double polychoric(int* input_data, int rows, int i, int j, int empty_method, dou } // The updated polychoric_correlation_matrix function -double* polychoric_correlation_matrix( +void polychoric_correlation_matrix( int* input_data, int rows, int cols, int length, - int empty_method, double empty_value + int empty_method, double empty_value, double* polychoric_matrix ) { - + // Initialize iterators int i, j; double correlation; int matrix_offset[cols]; matrix_offset[0] = 0; - + // Initialize offset for(i = 1; i < cols; i++){ matrix_offset[i] = matrix_offset[i - 1] + cols; } - - // Allocate memory for polychoric_matrix as a 1D array - double* polychoric_matrix = malloc(length * sizeof(double)); - + // Perform polychoric correlations over the input_matrix for (i = 0; i < cols; i++) { - + // Fill diagonal polychoric_matrix[matrix_offset[i] + i] = 1; - + // Loop over other variables for (j = i + 1; j < cols; j++) { - + // Compute correlation correlation = polychoric( input_data, rows, i, j, empty_method, empty_value ); - + // Add to matrix polychoric_matrix[matrix_offset[i] + j] = correlation; - + // Fill opposite of triangle polychoric_matrix[matrix_offset[j] + i] = correlation; - + } } - - // Return correlations - return polychoric_matrix; - + } +// Interface with R SEXP r_polychoric_correlation_matrix( - SEXP r_input_matrix, SEXP r_empty_method, SEXP r_empty_value, - SEXP r_rows, SEXP r_cols + SEXP r_input_matrix, SEXP r_empty_method, + SEXP r_empty_value, SEXP r_rows, SEXP r_cols ) { - + // Initialize columns int cols = INTEGER(r_cols)[0]; int length = cols * cols; - + + // Initialize R result + SEXP r_result = PROTECT(allocVector(REALSXP, length)); + double* c_result = REAL(r_result); + // Call the C function - double* c_result = polychoric_correlation_matrix( + polychoric_correlation_matrix( INTEGER(r_input_matrix), INTEGER(r_rows)[0], cols, length, - INTEGER(r_empty_method)[0], REAL(r_empty_value)[0] + INTEGER(r_empty_method)[0], REAL(r_empty_value)[0], + c_result // Pass the pointer directly to the C function ); - - // Initialize R result - SEXP r_result = PROTECT(allocVector(REALSXP, length)); - - // Copy correlations - memcpy(REAL(r_result), c_result, length * sizeof(double)); - + // Free R result UNPROTECT(1); - - // Free memory - free(c_result); - + // Return R result return r_result; - + } diff --git a/src/xoshiro.c b/src/xoshiro.c index e9601de0..651959a9 100644 --- a/src/xoshiro.c +++ b/src/xoshiro.c @@ -130,9 +130,12 @@ SEXP r_xoshiro_uniform(SEXP n, SEXP r_seed) { // Create R vector SEXP r_output = PROTECT(allocVector(REALSXP, n_values)); + // Get a pointer to the double data of the R vector + double* vec_data = REAL(r_output); + // Generate a random number and store it in the array for(int i = 0; i < n_values; i++) { - REAL(r_output)[i] = xoshiro_uniform(&state); + vec_data[i] = xoshiro_uniform(&state); } // Release protected SEXP objects @@ -162,9 +165,12 @@ SEXP r_xoshiro_seeds(SEXP n, SEXP r_seed) { // Create R vector SEXP r_output = PROTECT(allocVector(REALSXP, n_values)); + // Get a pointer to the double data of the R vector + double* vec_data = REAL(r_output); + // Generate a random number and store it in the array for(int i = 0; i < n_values; i++) { - REAL(r_output)[i] = (double) next(&state); + vec_data[i] = (double) next(&state); } // Release protected SEXP objects @@ -196,12 +202,15 @@ SEXP r_xoshiro_shuffle(SEXP r_vector, SEXP r_seed) { // Protect the input SEXP PROTECT(r_vector); + // Get a pointer to the integer data of the R vector + int* vec_data = INTEGER(r_vector); + // Shuffle the array using the Fisher-Yates (or Knuth shuffle) algorithm for (int i = vector_length - 1; i > 0; i--) { int j = next(&state) % (i + 1); // generates random index between 0 and i - int tmp = INTEGER(r_vector)[j]; - INTEGER(r_vector)[j] = INTEGER(r_vector)[i]; - INTEGER(r_vector)[i] = tmp; + int tmp = vec_data[j]; + vec_data[j] = vec_data[i]; + vec_data[i] = tmp; } // Unprotect the SEXP @@ -233,9 +242,12 @@ SEXP r_xoshiro_shuffle_replace(SEXP r_vector, SEXP r_seed) { // Create R vector SEXP r_output = PROTECT(allocVector(REALSXP, vector_length)); + // Get a pointer to the double data of the R vector + double* vec_data = REAL(r_output); + // Shuffle for(int i = 0; i < vector_length; i++) { - REAL(r_output)[i] = (double) (next(&state) % vector_length) + 1; + vec_data[i] = (double) (next(&state) % vector_length) + 1; } // Release protected SEXP objects diff --git a/src/ziggurat.c b/src/ziggurat.c index 788a2c9c..25411617 100644 --- a/src/ziggurat.c +++ b/src/ziggurat.c @@ -108,50 +108,42 @@ double r4_nor ( xoshiro256_state* state, uint32_t kn[128], double fn[128], doubl { int hz; uint32_t iz; - const double r = 3.442620; - double value; - double x; - double y; + double x, y, value; hz = ( int ) next ( state ); // cast may cause negative (and that's OK) iz = ( hz & 127 ); - if ( abs ( hz ) < kn[iz] ) - { + if ( abs ( hz ) < kn[iz] ) { value = ( double ) ( hz ) * wn[iz]; - } - else - { - for ( ; ; ) - { - if ( iz == 0 ) - { - for ( ; ; ) - { + } else { + + for ( ; ; ) { + + if ( iz == 0 ) { + + for ( ; ; ) { + x = - 0.2904764 * log ( xoshiro_uniform( state ) ); y = - log ( xoshiro_uniform( state ) ); - if ( x * x <= y + y ) - { + if ( x * x <= y + y ) { break; } + } + + value = ( hz <= 0 ) ? ( -R - x ) : ( R + x ); - if ( hz <= 0 ) - { - value = - r - x; - } - else - { - value = + r + x; - } break; + } x = ( double ) ( hz ) * wn[iz]; - if ( fn[iz] + xoshiro_uniform( state ) * ( fn[iz-1] - fn[iz] ) - < exp ( - 0.5 * x * x ) ) - { + if ( + fn[iz] + xoshiro_uniform( state ) * + ( fn[iz-1] - fn[iz] ) < + exp ( - 0.5 * x * x ) + ) { value = x; break; } @@ -159,15 +151,17 @@ double r4_nor ( xoshiro256_state* state, uint32_t kn[128], double fn[128], doubl hz = ( int ) next ( state ); // cast may cause negative (and that's OK) iz = ( hz & 127 ); - if ( abs ( hz ) < kn[iz] ) - { + if ( abs ( hz ) < kn[iz] ) { value = ( double ) ( hz ) * wn[iz]; break; } + } + } return value; + } /******************************************************************************/ @@ -205,31 +199,62 @@ void r4_nor_setup ( uint32_t kn[128], double fn[128], double wn[128] ) Output, float FN[128], WN[128], data needed by R4_NOR. */ { - double dn = 3.442619855899; + + /* + + Original code for comparison is provide below. + Constants were calculated by hand computing each + calculation + + + double dn = 3.442619855899; + int i; + const double m1 = 2147483648.0; + double q; + double tn = 3.442619855899; + const double vn = 9.91256303526217E-03; + + q = vn / exp ( - 0.5 * dn * dn ); + + kn[0] = ( uint32_t ) ( ( dn / q ) * m1 ); + kn[1] = 0; + + wn[0] = ( q / m1 ); + wn[127] = ( dn / m1 ); + + fn[0] = 1.0; + fn[127] = ( exp ( - 0.5 * dn * dn ) ); + + for ( i = 126; 1 <= i; i-- ) + { + dn = sqrt ( - 2.0 * log ( vn / dn + exp ( - 0.5 * dn * dn ) ) ); + kn[i+1] = ( uint32_t ) ( ( dn / tn ) * m1 ); + tn = dn; + fn[i] = ( exp ( - 0.5 * dn * dn ) ); + wn[i] = ( dn / m1 ); + } + */ + int i; - const double m1 = 2147483648.0; - double q; - double tn = 3.442619855899; - const double vn = 9.91256303526217E-03; - - q = vn / exp ( - 0.5 * dn * dn ); + double dn = DN; + double tn = DN; - kn[0] = ( uint32_t ) ( ( dn / q ) * m1 ); + kn[0] = 1991057938; kn[1] = 0; - wn[0] = ( q / m1 ); - wn[127] = ( dn / m1 ); + wn[0] = 1.729040521542796563654E-09; + wn[127] = 1.603094793809112161398E-09; fn[0] = 1.0; - fn[127] = ( exp ( - 0.5 * dn * dn ) ); + fn[127] = 0.002669629083880925288704; for ( i = 126; 1 <= i; i-- ) { - dn = sqrt ( - 2.0 * log ( vn / dn + exp ( - 0.5 * dn * dn ) ) ); - kn[i+1] = ( uint32_t ) ( ( dn / tn ) * m1 ); + dn = sqrt ( - 2.0 * log ( VN / dn + exp ( - 0.5 * dn * dn ) ) ); + kn[i+1] = ( uint32_t ) ( ( dn / tn ) * M1 ); tn = dn; fn[i] = ( exp ( - 0.5 * dn * dn ) ); - wn[i] = ( dn / m1 ); + wn[i] = ( dn / M1 ); } return; @@ -239,9 +264,6 @@ void r4_nor_setup ( uint32_t kn[128], double fn[128], double wn[128] ) SEXP r_ziggurat(SEXP n, SEXP r_seed) { - // Initialize iterators - int i; - // Initialize values int n_values = INTEGER(n)[0]; uint64_t seed_value = (uint64_t) REAL(r_seed)[0]; @@ -263,10 +285,13 @@ SEXP r_ziggurat(SEXP n, SEXP r_seed) { // Create R vector SEXP r_output = PROTECT(allocVector(REALSXP, n_values)); - + + // Get a pointer to the double data of the R vector + double* vec_data = REAL(r_output); + // Generate random numbers - for(i = 0; i < n_values; i++) { - REAL(r_output)[i] = r4_nor(&state, kn, fn, wn); + for(int i = 0; i < n_values; i++) { + vec_data[i] = r4_nor(&state, kn, fn, wn); } // Release protected SEXP objects diff --git a/src/ziggurat.h b/src/ziggurat.h index c9e9f1bf..2c09108e 100644 --- a/src/ziggurat.h +++ b/src/ziggurat.h @@ -1,4 +1,11 @@ #include "xoshiro.h" +// Constants +#define R 3.442620 // r +#define DN 3.442619855899 // dn +#define M1 2147483648.0 // m1 +#define VN 9.91256303526217E-03 // vn + +// Function prototypes void r4_nor_setup ( uint32_t kn[128], double fn[128], double wn[128] ); double r4_nor ( xoshiro256_state* state, uint32_t kn[128], double fn[128], double wn[128] );