Skip to content

Commit

Permalink
Clean up all warnings
Browse files Browse the repository at this point in the history
Fix SYCL CG solver indexing
Fix CXX suffixed CMake flags
Remove all VLA uses
Address #12
  • Loading branch information
tom91136 committed Aug 11, 2024
1 parent 5b455f4 commit fdbb55a
Show file tree
Hide file tree
Showing 29 changed files with 453 additions and 437 deletions.
10 changes: 5 additions & 5 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.24.0")
cmake_policy(SET CMP0135 NEW)
endif ()

project(tealeaf VERSION 1.0 LANGUAGES CXX)
project(tealeaf VERSION 1.0 LANGUAGES C CXX)
set(CMAKE_CXX_EXTENSIONS OFF)
set(CMAKE_CXX_STANDARD_REQUIRED ON)

Expand Down Expand Up @@ -34,8 +34,8 @@ if ((NOT BUILD_TYPE STREQUAL RELEASE) AND (NOT BUILD_TYPE STREQUAL DEBUG))
endif ()

# setup some defaults flags for everything
set(DEFAULT_DEBUG_FLAGS -Wall -O2)
set(DEFAULT_RELEASE_FLAGS -Wall -O3)
set(DEFAULT_DEBUG_FLAGS -Wall -Wno-unused-parameter -Wno-unused-function -Wno-unused-variable -O2)
set(DEFAULT_RELEASE_FLAGS -Wall -Wno-unused-parameter -Wno-unused-function -Wno-unused-variable -O3)

macro(hint_flag FLAG DESCRIPTION)
if (NOT DEFINED ${FLAG})
Expand Down Expand Up @@ -73,7 +73,7 @@ if (USE_TBB)
FetchContent_Declare(
TBB
GIT_REPOSITORY https://github.com/oneapi-src/oneTBB.git
GIT_TAG v2021.9.0
GIT_TAG v2021.13.0
)
# Don't fail builds on waring (TBB has -Wall while not being free of warnings from unused symbols...)
set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
Expand All @@ -94,7 +94,7 @@ if (USE_ONEDPL)
FetchContent_Declare(
oneDPL
GIT_REPOSITORY https://github.com/oneapi-src/oneDPL.git
GIT_TAG oneDPL-2022.2.0-rc1
GIT_TAG oneDPL-2022.6.0-rc1
)
string(TOLOWER ${USE_ONEDPL} ONEDPL_BACKEND)
# XXX oneDPL looks for omp instead of openmp, which mismatches(!) with ONEDPL_PAR_BACKEND if using find_package
Expand Down
8 changes: 4 additions & 4 deletions cmake/register_models.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@ endmacro()

macro(register_append_cxx_flags CONFIG)
if ("${CONFIG}" STREQUAL "ANY")
list(APPEND DEFAULT_RELEASE_CXX_FLAGS ${ARGN})
list(APPEND DEFAULT_DEBUG_CXX_FLAGS ${ARGN})
list(APPEND DEFAULT_RELEASE_FLAGS ${ARGN})
list(APPEND DEFAULT_DEBUG_FLAGS ${ARGN})
elseif ("${CONFIG}" STREQUAL "RELEASE")
list(APPEND DEFAULT_RELEASE_CXX_FLAGS ${ARGN})
list(APPEND DEFAULT_RELEASE_FLAGS ${ARGN})
elseif ("${CONFIG}" STREQUAL "DEBUG")
list(APPEND DEFAULT_DEBUG_CXX_FLAGS ${ARGN})
list(APPEND DEFAULT_DEBUG_FLAGS ${ARGN})
else ()
message(FATAL_ERROR "register_flags supports only RELEASE, DEBUG, or ANY for all configs, got `${CONFIG}`")
endif ()
Expand Down
2 changes: 1 addition & 1 deletion driver/comms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ void initialise_ranks(Settings &settings) {
void finalise_comms() { MPI_Finalize(); }

// Sends a message out and receives a message in
void send_recv_message(Settings &settings, double *send_buffer, double *recv_buffer, int buffer_len, int neighbour, int send_tag,
void send_recv_message(Settings &settings, const double *send_buffer, double *recv_buffer, int buffer_len, int neighbour, int send_tag,
int recv_tag, MPI_Request *send_request, MPI_Request *recv_request) {
START_PROFILING(settings.kernel_profile);

Expand Down
2 changes: 1 addition & 1 deletion driver/comms.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,5 @@ void initialise_ranks(Settings &settings);
void sum_over_ranks(Settings &settings, double *a);
void min_over_ranks(Settings &settings, double *a);
void wait_for_requests(Settings &settings, int num_requests, MPI_Request *requests);
void send_recv_message(Settings &settings, double *send_buffer, double *recv_buffer, int buffer_len, int neighbour, int send_tag,
void send_recv_message(Settings &settings, const double *send_buffer, double *recv_buffer, int buffer_len, int neighbour, int send_tag,
int recv_tag, MPI_Request *send_request, MPI_Request *recv_request);
4 changes: 2 additions & 2 deletions driver/diffuse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

double calc_dt(Chunk *chunks);
void calc_min_timestep(Chunk *chunks, double *dt, int chunks_per_task);
void solve(Chunk *chunks, Settings &settings, int tt, double *wallclock_prev);
void solve(Chunk *chunks, Settings &settings, int tt, const double *wallclock_prev);

// The main timestep loop
bool diffuse(Chunk *chunks, Settings &settings) {
Expand All @@ -17,7 +17,7 @@ bool diffuse(Chunk *chunks, Settings &settings) {
}

// Performs a solve for a single timestep
void solve(Chunk *chunks, Settings &settings, int tt, double *wallclock_prev) {
void solve(Chunk *chunks, Settings &settings, int tt, const double *wallclock_prev) {
print_and_log(settings, "\n Timestep %d\n", tt + 1);
profiler_start_timer(settings.wallclock_profile);

Expand Down
9 changes: 4 additions & 5 deletions driver/eigenvalue_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "kernel_interface.h"
#include <cfloat>
#include <cmath>
#include <vector>
#include <cstring>

void tqli(double *d, double *e, int n);
Expand All @@ -13,10 +14,8 @@ void eigenvalue_driver_initialise(Chunk *chunks, Settings &settings, int num_cg_
START_PROFILING(settings.kernel_profile);

for (int cc = 0; cc < settings.num_chunks_per_rank; ++cc) {
double diag[num_cg_iters];
double offdiag[num_cg_iters];
std::memset(diag, 0, sizeof(diag));
std::memset(offdiag, 0, sizeof(offdiag));
std::vector<double> diag(num_cg_iters);
std::vector<double> offdiag(num_cg_iters);

// Prepare matrix
for (int ii = 0; ii < num_cg_iters; ++ii) {
Expand All @@ -31,7 +30,7 @@ void eigenvalue_driver_initialise(Chunk *chunks, Settings &settings, int num_cg_
}

// Calculate the eigenvalues (ignore eigenvectors)
tqli(diag, offdiag, num_cg_iters);
tqli(diag.data(), offdiag.data(), num_cg_iters);

chunks[cc].eigmin = DBL_MAX;
chunks[cc].eigmax = DBL_MIN;
Expand Down
86 changes: 44 additions & 42 deletions driver/parse_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include <cctype>
#include <cstdio>
#include <cstring>
#include <vector>

int read_states(FILE *tea_in, Settings &settings, State **states);
void read_settings(FILE *tea_in, Settings &settings);
Expand Down Expand Up @@ -70,26 +71,27 @@ void read_settings(FILE *tea_in, Settings &settings) {

// Get the number of states present in the config file
while (getline(&line, &len, tea_in) != EOF) {
char word[len];

std::vector<char> word(len);

// Parse the key-value pairs
if (starts_get_double("initial_timestep", line, word, &settings.dt_init)) continue;
if (starts_get_double("end_time", line, word, &settings.end_time)) continue;
if (starts_get_int("end_step", line, word, &settings.end_step)) continue;
if (starts_get_double("xmin", line, word, &settings.grid_x_min)) continue;
if (starts_get_double("ymin", line, word, &settings.grid_y_min)) continue;
if (starts_get_double("xmax", line, word, &settings.grid_x_max)) continue;
if (starts_get_double("ymax", line, word, &settings.grid_y_max)) continue;
if (settings.grid_x_cells == DEF_GRID_X_CELLS && starts_get_int("x_cells", line, word, &settings.grid_x_cells)) continue;
if (settings.grid_y_cells == DEF_GRID_Y_CELLS && starts_get_int("y_cells", line, word, &settings.grid_y_cells)) continue;
if (starts_get_int("summary_frequency", line, word, &settings.summary_frequency)) continue;
if (starts_get_int("presteps", line, word, &settings.presteps)) continue;
if (starts_get_int("ppcg_inner_steps", line, word, &settings.ppcg_inner_steps)) continue;
if (starts_get_double("epslim", line, word, &settings.eps_lim)) continue;
if (starts_get_int("max_iters", line, word, &settings.max_iters)) continue;
if (starts_get_double("eps", line, word, &settings.eps)) continue;
if (starts_get_int("num_chunks_per_rank", line, word, &settings.num_chunks_per_rank)) continue;
if (starts_get_int("halo_depth", line, word, &settings.halo_depth)) continue;
if (starts_get_double("initial_timestep", line, word.data(), &settings.dt_init)) continue;
if (starts_get_double("end_time", line, word.data(), &settings.end_time)) continue;
if (starts_get_int("end_step", line, word.data(), &settings.end_step)) continue;
if (starts_get_double("xmin", line, word.data(), &settings.grid_x_min)) continue;
if (starts_get_double("ymin", line, word.data(), &settings.grid_y_min)) continue;
if (starts_get_double("xmax", line, word.data(), &settings.grid_x_max)) continue;
if (starts_get_double("ymax", line, word.data(), &settings.grid_y_max)) continue;
if (settings.grid_x_cells == DEF_GRID_X_CELLS && starts_get_int("x_cells", line, word.data(), &settings.grid_x_cells)) continue;
if (settings.grid_y_cells == DEF_GRID_Y_CELLS && starts_get_int("y_cells", line, word.data(), &settings.grid_y_cells)) continue;
if (starts_get_int("summary_frequency", line, word.data(), &settings.summary_frequency)) continue;
if (starts_get_int("presteps", line, word.data(), &settings.presteps)) continue;
if (starts_get_int("ppcg_inner_steps", line, word.data(), &settings.ppcg_inner_steps)) continue;
if (starts_get_double("epslim", line, word.data(), &settings.eps_lim)) continue;
if (starts_get_int("max_iters", line, word.data(), &settings.max_iters)) continue;
if (starts_get_double("eps", line, word.data(), &settings.eps)) continue;
if (starts_get_int("num_chunks_per_rank", line, word.data(), &settings.num_chunks_per_rank)) continue;
if (starts_get_int("halo_depth", line, word.data(), &settings.halo_depth)) continue;

// Parse the switches
if (starts_with("check_result", line)) {
Expand Down Expand Up @@ -156,9 +158,9 @@ int read_states(FILE *tea_in, Settings &settings, State **states) {
// First find the number of states
while (getline(&line, &len, tea_in) != EOF) {
int state_num = 0;
char word[len];
std::vector<char> word(len);

if (starts_get_int("state", line, word, &state_num)) {
if (starts_get_int("state", line, word.data(), &state_num)) {
num_states = tealeaf_MAX(num_states, state_num);
}
}
Expand All @@ -181,42 +183,42 @@ int read_states(FILE *tea_in, Settings &settings, State **states) {
// not change the answer.
while (getline(&line, &len, tea_in) != EOF) {
int state_num = 0;
char word[len];
std::vector<char> word(len);

// State found
if (starts_get_int("state", line, word, &state_num)) {
if (starts_get_int("state", line, word.data(), &state_num)) {
State *state = &((*states)[state_num - 1]);

if (state->defined) {
die(__LINE__, __FILE__, "State number %d defined twice.\n", state_num);
}

read_value(line, "density", word);
state->density = atof(word);
read_value(line, "energy", word);
state->energy = atof(word);
read_value(line, "density", word.data());
state->density = atof(word.data());
read_value(line, "energy", word.data());
state->energy = atof(word.data());

// State 1 is the default state so geometry irrelevant
if (state_num > 1) {
read_value(line, "xmin", word);
state->x_min = atof(word) + settings.dx / 100.0;
read_value(line, "ymin", word);
state->y_min = atof(word) + settings.dy / 100.0;
read_value(line, "xmax", word);
state->x_max = atof(word) - settings.dx / 100.0;
read_value(line, "ymax", word);
state->y_max = atof(word) - settings.dy / 100.0;

read_value(line, "geometry", word);

if (tealeaf_strmatch(word, "rectangle")) {
read_value(line, "xmin", word.data());
state->x_min = atof(word.data()) + settings.dx / 100.0;
read_value(line, "ymin", word.data());
state->y_min = atof(word.data()) + settings.dy / 100.0;
read_value(line, "xmax", word.data());
state->x_max = atof(word.data()) - settings.dx / 100.0;
read_value(line, "ymax", word.data());
state->y_max = atof(word.data()) - settings.dy / 100.0;

read_value(line, "geometry", word.data());

if (tealeaf_strmatch(word.data(), "rectangle")) {
state->geometry = Geometry::RECTANGULAR;
} else if (tealeaf_strmatch(word, "circular")) {
} else if (tealeaf_strmatch(word.data(), "circular")) {
state->geometry = Geometry::CIRCULAR;

read_value(line, "radius", word);
state->radius = atof(word);
} else if (tealeaf_strmatch(word, "point")) {
read_value(line, "radius", word.data());
state->radius = atof(word.data());
} else if (tealeaf_strmatch(word.data(), "point")) {
state->geometry = Geometry::POINT;
}
}
Expand Down
4 changes: 2 additions & 2 deletions driver/profiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ void profiler_end_timer(Profile *profile, const char *entry_name) {
#ifdef __APPLE__
double elapsed = (profile->profiler_end - profile->profiler_start) * 1.0E-9;
#else
double elapsed = (profile->profiler_end.tv_sec - profile->profiler_start.tv_sec) +
(profile->profiler_end.tv_nsec - profile->profiler_start.tv_nsec) * 1.0E-9;
double elapsed = double(profile->profiler_end.tv_sec - profile->profiler_start.tv_sec) +
double(profile->profiler_end.tv_nsec - profile->profiler_start.tv_nsec) * 1.0E-9;
#endif

profile->profiler_entries[ii].time += elapsed;
Expand Down
8 changes: 5 additions & 3 deletions driver/remote_halo_driver.cpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#include <vector>

#include "chunk.h"
#include "comms.h"
#include "drivers.h"
Expand Down Expand Up @@ -46,7 +48,7 @@ void remote_halo_driver(Chunk *chunks, Settings &settings, int depth) {
#ifndef NO_MPI
// Two sends and two receives
int max_messages = settings.num_chunks_per_rank * 4;
MPI_Request requests[max_messages];
std::vector<MPI_Request> requests(max_messages, {});

int num_messages = 0;

Expand Down Expand Up @@ -80,7 +82,7 @@ void remote_halo_driver(Chunk *chunks, Settings &settings, int depth) {
for (int cc = 0; cc < settings.num_chunks_per_rank; ++cc) {
run_before_waitall_halo(&chunks[cc], settings);
}
wait_for_requests(settings, num_messages, requests);
wait_for_requests(settings, num_messages, requests.data());
for (int cc = 0; cc < settings.num_chunks_per_rank; ++cc) {
int buffer_len = 0;
for (int ii = 0; ii < NUM_FIELDS; ++ii) {
Expand Down Expand Up @@ -133,7 +135,7 @@ void remote_halo_driver(Chunk *chunks, Settings &settings, int depth) {
for (int cc = 0; cc < settings.num_chunks_per_rank; ++cc) {
run_before_waitall_halo(&chunks[cc], settings);
}
wait_for_requests(settings, num_messages, requests);
wait_for_requests(settings, num_messages, requests.data());
for (int cc = 0; cc < settings.num_chunks_per_rank; ++cc) {
int buffer_len = 0;
for (int ii = 0; ii < NUM_FIELDS; ++ii) {
Expand Down
3 changes: 1 addition & 2 deletions src/hip/cuknl_shared.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,7 @@ template <typename T> class reduce<T, 0> {
};

inline void check_errors(int line_num, const char *file) {
hipDeviceSynchronize();
if (auto result = hipGetLastError(); result != hipSuccess) {
if (auto result = hipDeviceSynchronize(); result != hipSuccess) {
die(line_num, file, "Error in %s - return code %d (%s)\n", file, result, hipGetErrorName(result));
}
}
Expand Down
6 changes: 3 additions & 3 deletions src/hip/jacobi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,9 @@ __global__ void jacobi_init(const int x_inner, const int y_inner, const int halo

if (row == 0 || col == 0) return;

double density_center;
double density_left;
double density_down;
double density_center{};
double density_left{};
double density_down{};

if (coefficient == CONDUCTIVITY) {
density_center = density[index];
Expand Down
26 changes: 19 additions & 7 deletions src/hip/kernel_initialise.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,18 @@
// Allocates, and zeroes and individual buffer
void allocate_device_buffer(double **a, int x, int y) {
#ifdef CLOVER_MANAGED_ALLOC
hipMallocManaged(a, x * y * sizeof(double));
if (auto result = hipMallocManaged(a, x * y * sizeof(double)); result != hipSuccess) {
die(__LINE__, __FILE__, "hipMallocManaged failed - return code %d (%s)\n", result, hipGetErrorName(result));
}
#else
hipMalloc(a, x * y * sizeof(double));
if (auto result = hipMalloc(a, x * y * sizeof(double)); result != hipSuccess) {
die(__LINE__, __FILE__, "hipMalloc failed - return code %d (%s)\n", result, hipGetErrorName(result));
}
#endif
check_errors(__LINE__, __FILE__);
hipMemset(*a, 0, x * y * sizeof(double));
if (auto result = hipMemset(*a, 0, x * y * sizeof(double)); result != hipSuccess) {
die(__LINE__, __FILE__, "hipMemset failed - return code %d (%s)\n", result, hipGetErrorName(result));
}
check_errors(__LINE__, __FILE__);
}

Expand All @@ -38,7 +44,7 @@ void allocate_host_buffer(double **a, int x, int y) {
}
}

void run_model_info(Settings &settings){
void run_model_info(Settings &settings) {
settings.model_name = "HIP";
#ifdef CLOVER_MANAGED_ALLOC
settings.model_kind = ModelKind::Unified;
Expand All @@ -49,11 +55,15 @@ void run_model_info(Settings &settings){

void run_kernel_initialise(Chunk *chunk, Settings &settings, int comms_lr_len, int comms_tb_len) {
int count;
hipGetDeviceCount(&count);
if (auto result = hipGetDeviceCount(&count); result != hipSuccess) {
die(__LINE__, __FILE__, "hipGetDeviceCount failed - return code %d (%s)\n", result, hipGetErrorName(result));
}
std::vector<std::pair<int, std::string>> devices(count);
for (int i = 0; i < count; ++i) {
hipDeviceProp_t props{};
hipGetDeviceProperties(&props, i);
if (auto result = hipGetDeviceProperties(&props, i); result != hipSuccess) {
die(__LINE__, __FILE__, "hipGetDeviceProperties failed - return code %d (%s)\n", result, hipGetErrorName(result));
}
devices[i] = {i, std::string(props.name)};
}

Expand Down Expand Up @@ -91,7 +101,9 @@ void run_kernel_initialise(Chunk *chunk, Settings &settings, int comms_lr_len, i
}

hipDeviceProp_t properties{};
hipGetDeviceProperties(&properties, selected);
if (auto result = hipGetDeviceProperties(&properties, selected); result != hipSuccess) {
die(__LINE__, __FILE__, "hipGetDeviceProperties failed - return code %d (%s)\n", result, hipGetErrorName(result));
}
print_and_log(settings, "# Rank %d using %s device id %d\n", settings.rank, properties.name, selected);

chunk->staging_left_send = static_cast<double *>(std::malloc(sizeof(double) * comms_lr_len));
Expand Down
Loading

0 comments on commit fdbb55a

Please sign in to comment.