Skip to content

Commit

Permalink
Merge pull request #12678 from edgargabriel/topic/fcoll-vulcan-accele…
Browse files Browse the repository at this point in the history
…rator-support

fcoll/vulcan accelerator support
  • Loading branch information
edgargabriel authored Sep 4, 2024
2 parents 72c952d + d30471c commit 1afb524
Show file tree
Hide file tree
Showing 9 changed files with 268 additions and 37 deletions.
4 changes: 4 additions & 0 deletions ompi/mca/common/ompio/common_ompio.h
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,8 @@ OMPI_DECLSPEC int mca_common_ompio_file_write_at (ompio_file_t *fh, OMPI_MPI_OFF
OMPI_DECLSPEC int mca_common_ompio_file_iwrite (ompio_file_t *fh, const void *buf, size_t count,
struct ompi_datatype_t *datatype, ompi_request_t **request);

OMPI_DECLSPEC int mca_common_ompio_file_iwrite_pregen (ompio_file_t *fh, ompi_request_t *request);

OMPI_DECLSPEC int mca_common_ompio_file_iwrite_at (ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset,
const void *buf, size_t count, struct ompi_datatype_t *datatype,
ompi_request_t **request);
Expand Down Expand Up @@ -297,6 +299,8 @@ OMPI_DECLSPEC int mca_common_ompio_file_read_at (ompio_file_t *fh, OMPI_MPI_OFFS
OMPI_DECLSPEC int mca_common_ompio_file_iread (ompio_file_t *fh, void *buf, size_t count,
struct ompi_datatype_t *datatype, ompi_request_t **request);

OMPI_DECLSPEC int mca_common_ompio_file_iread_pregen (ompio_file_t *fh, ompi_request_t *request);

OMPI_DECLSPEC int mca_common_ompio_file_iread_at (ompio_file_t *fh, OMPI_MPI_OFFSET_TYPE offset,
void *buf, size_t count, struct ompi_datatype_t *datatype,
ompi_request_t **request);
Expand Down
64 changes: 63 additions & 1 deletion ompi/mca/common/ompio/common_ompio_file_read.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
* Copyright (c) 2008-2019 University of Houston. All rights reserved.
* Copyright (c) 2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2024 Triad National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
Expand Down Expand Up @@ -530,6 +530,68 @@ int mca_common_ompio_file_iread (ompio_file_t *fh,
return ret;
}

/*
** This routine is invoked from file_read_all.
** It is only used if the temporary buffer is a gpu buffer,
** and the fbtl supports the ipreadv operation.
**
** The io-array has already been generated in file_read_all,
** and we use the pre-computed offsets to created a pseudo fview.
** The position of the file pointer is updated in the file_read_all
** operation, not here.
*/

int mca_common_ompio_file_iread_pregen (ompio_file_t *fh,
ompi_request_t *request)
{
uint32_t i;
size_t max_data;
size_t pipeline_buf_size;
mca_ompio_request_t *ompio_req = (mca_ompio_request_t *) request;

max_data = fh->f_io_array[0].length;
pipeline_buf_size = OMPIO_MCA_GET(fh, pipeline_buffer_size);

mca_common_ompio_register_progress ();

OMPIO_PREPARE_READ_BUF (fh, fh->f_io_array[0].memory_address, max_data, MPI_BYTE,
ompio_req->req_tbuf, &ompio_req->req_convertor, max_data,
pipeline_buf_size, NULL, i);

ompio_req->req_num_subreqs = ceil((double)max_data/pipeline_buf_size);
ompio_req->req_size = pipeline_buf_size;
ompio_req->req_max_data = max_data;
ompio_req->req_post_next_subreq = mca_common_ompio_post_next_read_subreq;
ompio_req->req_fh = fh;
ompio_req->req_ompi.req_status.MPI_ERROR = MPI_SUCCESS;

ompio_req->req_fview = (struct ompio_fview_t *) calloc(1, sizeof(struct ompio_fview_t));
if (NULL == ompio_req->req_fview) {
opal_output(1, "common_ompio: error allocating memory\n");
return OMPI_ERR_OUT_OF_RESOURCE;
}

ompio_req->req_fview->f_decoded_iov = (struct iovec*) malloc (fh->f_num_of_io_entries *
sizeof(struct iovec));
if (NULL == ompio_req->req_fview->f_decoded_iov) {
opal_output(1, "common_ompio_file_iread_pregen: could not allocate memory\n");
return OMPI_ERR_OUT_OF_RESOURCE;
}

ompio_req->req_fview->f_iov_count = fh->f_num_of_io_entries;
for (i=0; i < ompio_req->req_fview->f_iov_count; i++) {
ompio_req->req_fview->f_decoded_iov[i].iov_base = fh->f_io_array[i].offset;
ompio_req->req_fview->f_decoded_iov[i].iov_len = fh->f_io_array[i].length ;
}

fh->f_num_of_io_entries = 0;
free (fh->f_io_array);
fh->f_io_array = NULL;

mca_common_ompio_post_next_read_subreq(ompio_req, 0);
return OMPI_SUCCESS;
}

int mca_common_ompio_file_iread_at (ompio_file_t *fh,
OMPI_MPI_OFFSET_TYPE offset,
void *buf,
Expand Down
64 changes: 54 additions & 10 deletions ompi/mca/common/ompio/common_ompio_file_read_all.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
* Copyright (c) 2023 Jeffrey M. Squyres. All rights reserved.
* Copyright (c) 2024 Triad National Security, LLC. All rights
* reserved.
* Copyright (c) 2024 Advanced Micro Devices, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand All @@ -30,9 +31,12 @@
#include "ompi/mca/fcoll/base/fcoll_base_coll_array.h"
#include "ompi/mca/fcoll/base/base.h"
#include "ompi/mca/common/ompio/common_ompio.h"
#include "ompi/mca/common/ompio/common_ompio_request.h"
#include "ompi/mca/common/ompio/common_ompio_buffer.h"
#include "ompi/mca/io/io.h"
#include "math.h"
#include "ompi/mca/pml/pml.h"
#include "opal/mca/accelerator/accelerator.h"
#include <unistd.h>

#define DEBUG_ON 0
Expand Down Expand Up @@ -106,6 +110,9 @@ mca_common_ompio_base_file_read_all (struct ompio_file_t *fh,
int* blocklength_proc = NULL;
ptrdiff_t* displs_proc = NULL;

int is_gpu, is_managed;
bool use_accelerator_buffer = false;

#if OMPIO_FCOLL_WANT_TIME_BREAKDOWN
double read_time = 0.0, start_read_time = 0.0, end_read_time = 0.0;
double rcomm_time = 0.0, start_rcomm_time = 0.0, end_rcomm_time = 0.0;
Expand Down Expand Up @@ -138,6 +145,12 @@ mca_common_ompio_base_file_read_all (struct ompio_file_t *fh,
goto exit;
}

mca_common_ompio_check_gpu_buf (fh, buf, &is_gpu, &is_managed);
if (is_gpu && !is_managed && NULL != fh->f_fbtl->fbtl_ipreadv &&
fh->f_get_mca_parameter_value ("use_accelerator_buffers", strlen("use_accelerator_buffers"))) {
use_accelerator_buffer = true;
}

ret = mca_common_ompio_set_aggregator_props ((struct ompio_file_t *) fh,
base_num_io_procs,
max_data);
Expand Down Expand Up @@ -364,11 +377,22 @@ mca_common_ompio_base_file_read_all (struct ompio_file_t *fh,
goto exit;
}

global_buf = (char *) malloc (bytes_per_cycle);
if (NULL == global_buf){
opal_output(1, "OUT OF MEMORY\n");
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto exit;
if (use_accelerator_buffer) {
opal_output_verbose(10, ompi_fcoll_base_framework.framework_output,
"Allocating GPU device buffer for aggregation\n");
ret = opal_accelerator.mem_alloc(MCA_ACCELERATOR_NO_DEVICE_ID, (void**)&global_buf,
bytes_per_cycle);
if (OPAL_SUCCESS != ret) {
opal_output(1, "Could not allocate accelerator memory");
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto exit;
}
} else {global_buf = (char *) malloc (bytes_per_cycle);
if (NULL == global_buf){
opal_output(1, "OUT OF MEMORY\n");
ret = OMPI_ERR_OUT_OF_RESOURCE;
goto exit;
}
}

sendtype = (ompi_datatype_t **) malloc (fh->f_procs_per_group * sizeof(ompi_datatype_t *));
Expand Down Expand Up @@ -686,10 +710,26 @@ mca_common_ompio_base_file_read_all (struct ompio_file_t *fh,
#endif

if (fh->f_num_of_io_entries) {
if ( 0 > fh->f_fbtl->fbtl_preadv (fh)) {
opal_output (1, "READ FAILED\n");
ret = OMPI_ERROR;
goto exit;
if (use_accelerator_buffer) {
mca_ompio_request_t *ompio_req = NULL;
mca_common_ompio_request_alloc (&ompio_req, MCA_OMPIO_REQUEST_READ);

ret = mca_common_ompio_file_iread_pregen(fh, (ompi_request_t *) ompio_req);
if(0 > ret) {
opal_output (1, "common_ompio_file_read_all: mca_common_ompio_iread_pregen failed\n");
ompio_req->req_ompi.req_status.MPI_ERROR = ret;
ompio_req->req_ompi.req_status._ucount = 0;
}
ret = ompi_request_wait ((ompi_request_t**)&ompio_req, MPI_STATUS_IGNORE);
if (OMPI_SUCCESS != ret){
goto exit;
}
} else {
if ( 0 > fh->f_fbtl->fbtl_preadv (fh)) {
opal_output (1, "READ FAILED\n");
ret = OMPI_ERROR;
goto exit;
}
}
}

Expand Down Expand Up @@ -881,7 +921,11 @@ mca_common_ompio_base_file_read_all (struct ompio_file_t *fh,

exit:
if (NULL != global_buf) {
free (global_buf);
if (use_accelerator_buffer) {
opal_accelerator.mem_release(MCA_ACCELERATOR_NO_DEVICE_ID, global_buf);
} else {
free (global_buf);
}
global_buf = NULL;
}
if (NULL != sorted) {
Expand Down
69 changes: 68 additions & 1 deletion ompi/mca/common/ompio/common_ompio_file_write.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
* Copyright (c) 2008-2019 University of Houston. All rights reserved.
* Copyright (c) 2015-2018 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2022-2023 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2022-2024 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2024 Triad National Security, LLC. All rights
* reserved.
* $COPYRIGHT$
Expand Down Expand Up @@ -329,6 +329,7 @@ static void mca_common_ompio_post_next_write_subreq(struct mca_ompio_request_t *
decoded_iov.iov_base = req->req_tbuf;
decoded_iov.iov_len = req->req_size;
opal_convertor_pack (&req->req_convertor, &decoded_iov, &iov_count, &pos);

mca_common_ompio_build_io_array (req->req_fview, index, req->req_num_subreqs,
bytes_per_cycle, pos,
iov_count, &decoded_iov,
Expand Down Expand Up @@ -472,6 +473,72 @@ int mca_common_ompio_file_iwrite (ompio_file_t *fh,
return ret;
}

/*
** This routine is invoked from the fcoll component.
** It is only used if the temporary buffer is a gpu buffer,
** and the fbtl supports the ipwritev operation.
**
** The io-array has already been generated in fcoll/xxx/file_write_all,
** and we use the pre-computed offsets to created a pseudo fview.
** The position of the file pointer is updated in the fcoll
** component, not here.
*/

int mca_common_ompio_file_iwrite_pregen (ompio_file_t *fh,
ompi_request_t *request)
{
uint32_t i;
size_t max_data;
size_t pipeline_buf_size;
mca_ompio_request_t *ompio_req = (mca_ompio_request_t *) request;

if (NULL == fh->f_fbtl->fbtl_ipwritev) {
return MPI_ERR_INTERN;
}

max_data = fh->f_io_array[0].length;
pipeline_buf_size = OMPIO_MCA_GET(fh, pipeline_buffer_size);

mca_common_ompio_register_progress ();

OMPIO_PREPARE_BUF (fh, fh->f_io_array[0].memory_address, max_data, MPI_BYTE,
ompio_req->req_tbuf, &ompio_req->req_convertor, max_data,
pipeline_buf_size, NULL, i);

ompio_req->req_num_subreqs = ceil((double)max_data/pipeline_buf_size);
ompio_req->req_size = pipeline_buf_size;
ompio_req->req_max_data = max_data;
ompio_req->req_post_next_subreq = mca_common_ompio_post_next_write_subreq;
ompio_req->req_fh = fh;
ompio_req->req_ompi.req_status.MPI_ERROR = MPI_SUCCESS;

ompio_req->req_fview = (struct ompio_fview_t *) calloc(1, sizeof(struct ompio_fview_t));
if (NULL == ompio_req->req_fview) {
opal_output(1, "common_ompio: error allocating memory\n");
return OMPI_ERR_OUT_OF_RESOURCE;
}

ompio_req->req_fview->f_decoded_iov = (struct iovec*) malloc ( fh->f_num_of_io_entries *
sizeof(struct iovec));
if (NULL == ompio_req->req_fview->f_decoded_iov) {
opal_output(1, "common_ompio_file_iwrite_pregen: could not allocate memory\n");
return OMPI_ERR_OUT_OF_RESOURCE;
}

ompio_req->req_fview->f_iov_count = fh->f_num_of_io_entries;
for (i=0; i < ompio_req->req_fview->f_iov_count; i++) {
ompio_req->req_fview->f_decoded_iov[i].iov_base = fh->f_io_array[i].offset;
ompio_req->req_fview->f_decoded_iov[i].iov_len = fh->f_io_array[i].length ;
}

fh->f_num_of_io_entries = 0;
free (fh->f_io_array);
fh->f_io_array = NULL;

mca_common_ompio_post_next_write_subreq(ompio_req, 0);
return OMPI_SUCCESS;
}

int mca_common_ompio_file_iwrite_at (ompio_file_t *fh,
OMPI_MPI_OFFSET_TYPE offset,
const void *buf,
Expand Down
1 change: 1 addition & 0 deletions ompi/mca/fcoll/vulcan/fcoll_vulcan.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ extern int mca_fcoll_vulcan_priority;
extern int mca_fcoll_vulcan_num_groups;
extern int mca_fcoll_vulcan_write_chunksize;
extern int mca_fcoll_vulcan_async_io;
extern int mca_fcoll_vulcan_use_accelerator_buffers;

OMPI_DECLSPEC extern mca_fcoll_base_component_3_0_0_t mca_fcoll_vulcan_component;

Expand Down
Loading

0 comments on commit 1afb524

Please sign in to comment.