Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Checkpoint work on scheduler tool, few minor cleanups #1774

Merged
merged 4 commits into from
Jul 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ src/tools/prte/prte
src/tools/pcc/pcc
src/tools/pcc/pcc-wrapper-data.txt
src/tools/pterm/pterm
src/tools/psched/psched

src/util/hostfile/hostfile_lex.c
src/util/keyval/keyval_lex.c
Expand Down
1 change: 1 addition & 0 deletions config/prte_config_files.m4
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,6 @@ AC_DEFUN([PRTE_CONFIG_FILES],[
src/tools/prte_info/Makefile
src/tools/prte/Makefile
src/tools/pterm/Makefile
src/tools/psched/Makefile
])
])
7 changes: 4 additions & 3 deletions src/mca/ess/base/ess_base_std_prolog.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
* All rights reserved.
* Copyright (c) 2019 Intel, Inc. All rights reserved.
* Copyright (c) 2020 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2021-2022 Nanook Consulting. All rights reserved.
* Copyright (c) 2021-2023 Nanook Consulting. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -60,8 +60,9 @@ int prte_ess_base_std_prolog(void)
return PRTE_SUCCESS;

error:
pmix_show_help("help-prte-runtime", "prte_init:startup:internal-failure", true, error,
PRTE_ERROR_NAME(ret), ret);
pmix_show_help("help-prte-runtime",
"prte_init:startup:internal-failure", true,
error, PRTE_ERROR_NAME(ret), ret);

return ret;
}
24 changes: 12 additions & 12 deletions src/mca/state/dvm/state_dvm.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,18 +64,18 @@ static void ready_for_debug(int fd, short args, void *cbata);
* DVM module - used when mpirun is persistent
******************/
prte_state_base_module_t prte_state_dvm_module = {
init,
finalize,
prte_state_base_activate_job_state,
prte_state_base_add_job_state,
prte_state_base_set_job_state_callback,
prte_state_base_set_job_state_priority,
prte_state_base_remove_job_state,
prte_state_base_activate_proc_state,
prte_state_base_add_proc_state,
prte_state_base_set_proc_state_callback,
prte_state_base_set_proc_state_priority,
prte_state_base_remove_proc_state
.init = init,
.finalize = finalize,
.activate_job_state = prte_state_base_activate_job_state,
.add_job_state = prte_state_base_add_job_state,
.set_job_state_callback = prte_state_base_set_job_state_callback,
.set_job_state_priority = prte_state_base_set_job_state_priority,
.remove_job_state = prte_state_base_remove_job_state,
.activate_proc_state = prte_state_base_activate_proc_state,
.add_proc_state = prte_state_base_add_proc_state,
.set_proc_state_callback = prte_state_base_set_proc_state_callback,
.set_proc_state_priority = prte_state_base_set_proc_state_priority,
.remove_proc_state = prte_state_base_remove_proc_state
};

static void dvm_notify(int sd, short args, void *cbdata);
Expand Down
24 changes: 12 additions & 12 deletions src/mca/state/prted/state_prted.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,18 +49,18 @@ static int finalize(void);
* PRTED module
******************/
prte_state_base_module_t prte_state_prted_module = {
init,
finalize,
prte_state_base_activate_job_state,
prte_state_base_add_job_state,
prte_state_base_set_job_state_callback,
prte_state_base_set_job_state_priority,
prte_state_base_remove_job_state,
prte_state_base_activate_proc_state,
prte_state_base_add_proc_state,
prte_state_base_set_proc_state_callback,
prte_state_base_set_proc_state_priority,
prte_state_base_remove_proc_state
.init = init,
.finalize = finalize,
.activate_job_state = prte_state_base_activate_job_state,
.add_job_state = prte_state_base_add_job_state,
.set_job_state_callback = prte_state_base_set_job_state_callback,
.set_job_state_priority = prte_state_base_set_job_state_priority,
.remove_job_state = prte_state_base_remove_job_state,
.activate_proc_state = prte_state_base_activate_proc_state,
.add_proc_state = prte_state_base_add_proc_state,
.set_proc_state_callback = prte_state_base_set_proc_state_callback,
.set_proc_state_priority = prte_state_base_set_proc_state_priority,
.remove_proc_state = prte_state_base_remove_proc_state
};

/* Local functions */
Expand Down
9 changes: 9 additions & 0 deletions src/prted/pmix/pmix_server.c
Original file line number Diff line number Diff line change
Expand Up @@ -554,6 +554,7 @@ static void regcbfunc(pmix_status_t status, size_t ref, void *cbdata)
PRTE_HIDE_UNUSED_PARAMS(status, ref);

PMIX_ACQUIRE_OBJECT(lock);
lock->status = status;
PRTE_PMIX_WAKEUP_THREAD(lock);
}

Expand Down Expand Up @@ -596,6 +597,7 @@ int pmix_server_init(void)
PMIX_INFO_LIST_ADD(prc, ilist, PMIX_HOSTNAME, prte_process_info.nodename, PMIX_STRING);
if (PMIX_SUCCESS != prc) {
PMIX_INFO_LIST_RELEASE(ilist);
rc = prte_pmix_convert_status(prc);
return rc;
}

Expand All @@ -604,6 +606,7 @@ int pmix_server_init(void)
PMIX_INFO_LIST_ADD(prc, ilist, PMIX_EXTERNAL_AUX_EVENT_BASE, prte_event_base, PMIX_POINTER);
if (PMIX_SUCCESS != prc) {
PMIX_INFO_LIST_RELEASE(ilist);
rc = prte_pmix_convert_status(prc);
return rc;
}
#endif
Expand All @@ -620,12 +623,14 @@ int pmix_server_init(void)
PMIX_INFO_LIST_INSERT(prc, ilist, &myinf);
if (PMIX_SUCCESS != prc) {
PMIX_INFO_LIST_RELEASE(ilist);
rc = prte_pmix_convert_status(prc);
return rc;
}
// tell the server to share this topology for us
PMIX_INFO_LIST_ADD(prc, ilist, PMIX_SERVER_SHARE_TOPOLOGY, NULL, PMIX_BOOL);
if (PMIX_SUCCESS != prc) {
PMIX_INFO_LIST_RELEASE(ilist);
rc = prte_pmix_convert_status(prc);
return rc;
}

Expand Down Expand Up @@ -838,6 +843,7 @@ int pmix_server_init(void)
PMIX_INFO_LIST_ADD(prc, ilist, PMIX_HOSTNAME, prte_process_info.nodename, PMIX_STRING);
if (PMIX_SUCCESS != rc) {
PMIX_INFO_LIST_RELEASE(ilist);
rc = prte_pmix_convert_status(prc);
return rc;
}

Expand All @@ -848,6 +854,7 @@ int pmix_server_init(void)
free(tmp);
if (PMIX_SUCCESS != rc) {
PMIX_INFO_LIST_RELEASE(ilist);
rc = prte_pmix_convert_status(prc);
return rc;
}
}
Expand All @@ -871,7 +878,9 @@ int pmix_server_init(void)
prc = PMIX_ERR_LOST_CONNECTION;
PMIx_Register_event_handler(&prc, 1, NULL, 0, lost_connection_hdlr, regcbfunc, &lock);
PRTE_PMIX_WAIT_THREAD(&lock);
prc = lock.status;
PRTE_PMIX_DESTRUCT_LOCK(&lock);
rc = prte_pmix_convert_status(prc);

return rc;
}
Expand Down
7 changes: 3 additions & 4 deletions src/runtime/prte_init.c
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@
#include "src/runtime/prte_globals.h"
#include "src/runtime/prte_locks.h"
#include "src/runtime/runtime.h"
#include "src/runtime/runtime_internals.h"

/*
* Whether we have completed prte_init or we are in prte_finalize
Expand Down Expand Up @@ -116,8 +117,6 @@ static bool min_initialized = false;
#endif
const char prte_version_string[] = PRTE_IDENT_STRING;

static void preload_default_mca_params(void);

static bool check_exist(char *path)
{
struct stat buf;
Expand Down Expand Up @@ -222,7 +221,7 @@ int prte_init_util(prte_proc_type_t flags)
}

/* pre-load any default mca param files */
preload_default_mca_params();
prte_preload_default_mca_params();

/* Register all MCA Params */
if (PRTE_SUCCESS != (ret = prte_register_params())) {
Expand Down Expand Up @@ -417,7 +416,7 @@ static bool check_pmix_overlap(char *var, char *value)
return false;
}

static void preload_default_mca_params(void)
void prte_preload_default_mca_params(void)
{
char *file, *home, *tmp;
pmix_list_t params, params2, pfinal;
Expand Down
2 changes: 2 additions & 0 deletions src/runtime/runtime_internals.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ BEGIN_C_DECLS
*/
PRTE_EXPORT int prte_dt_init(void);

PRTE_EXPORT void prte_preload_default_mca_params(void);

END_C_DECLS

#endif /* PRTE_RUNTIME_INTERNALS_H */
6 changes: 4 additions & 2 deletions src/tools/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,14 @@ SUBDIRS += \
tools/pcc \
tools/prte_info \
tools/prte \
tools/pterm
tools/pterm \
tools/psched

DIST_SUBDIRS += \
tools/prted \
tools/prun \
tools/pcc \
tools/prte_info \
tools/prte \
tools/pterm
tools/pterm \
tools/psched
68 changes: 68 additions & 0 deletions src/tools/psched/Makefile.am
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#
# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
# University Research and Technology
# Corporation. All rights reserved.
# Copyright (c) 2004-2005 The University of Tennessee and The University
# of Tennessee Research Foundation. All rights
# reserved.
# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart,
# University of Stuttgart. All rights reserved.
# Copyright (c) 2004-2005 The Regents of the University of California.
# All rights reserved.
# Copyright (c) 2007-2020 Cisco Systems, Inc. All rights reserved
# Copyright (c) 2008 Sun Microsystems, Inc. All rights reserved.
# Copyright (c) 2016-2020 Intel, Inc. All rights reserved.
# Copyright (c) 2021-2023 Nanook Consulting. All rights reserved.
# $COPYRIGHT$
#
# Additional copyrights may follow
#
# $HEADER$
#

AM_LDFLAGS = $(prte_hwloc_LDFLAGS) $(prte_libevent_LDFLAGS) $(prte_pmix_LDFLAGS)
AM_CFLAGS = \
-DPRTE_CONFIGURE_USER="\"@PRTE_CONFIGURE_USER@\"" \
-DPRTE_CONFIGURE_HOST="\"@PRTE_CONFIGURE_HOST@\"" \
-DPRTE_CONFIGURE_DATE="\"@PRTE_CONFIGURE_DATE@\"" \
-DPRTE_BUILD_USER="\"$$USER\"" \
-DPRTE_BUILD_HOST="\"$${HOSTNAME:-`(hostname || uname -n) | sed 1q`}\"" \
-DPRTE_BUILD_DATE="\"`$(top_srcdir)/config/getdate.sh`\"" \
-DPRTE_BUILD_CFLAGS="\"@CFLAGS@\"" \
-DPRTE_BUILD_CPPFLAGS="\"@CPPFLAGS@\"" \
-DPRTE_BUILD_LDFLAGS="\"@LDFLAGS@\"" \
-DPRTE_BUILD_LIBS="\"@LIBS@\"" \
-DPRTE_CC_ABSOLUTE="\"@PRTE_CC_ABSOLUTE@\"" \
-DPRTE_GREEK_VERSION="\"@PRTE_GREEK_VERSION@\"" \
-DPRTE_REPO_REV="\"@PRTE_REPO_REV@\"" \
-DPMIX_RELEASE_DATE="\"@PMIX_RELEASE_DATE@\""

bin_PROGRAMS = psched

dist_prtedata_DATA = help-psched.txt

psched_SOURCES = \
psched.h \
psched.c \
errmgr.c \
state.c \
server.c \
scheduler.c \
backend.c \
event.c \
queries.c \
schizo.c \
session.c

# the following empty psched_LDFLAGS is used
# so that the psched can be compiled statically
# by simply changing the value of this from
# nothing to -all-static in the Makefile.in
# nice for systems that don't have all the shared
# libraries on the computes
psched_LDFLAGS =
psched_LDADD = \
$(prte_libevent_LIBS) \
$(prte_hwloc_LIBS) \
$(prte_pmix_LIBS) \
$(top_builddir)/src/libprrte.la
Loading