Skip to content

Commit

Permalink
Merge branch 'master' of github.com:ClusterLabs/pacemaker
Browse files Browse the repository at this point in the history
  • Loading branch information
beekhof committed Apr 17, 2013
2 parents bc70e38 + d63c13c commit 138556c
Show file tree
Hide file tree
Showing 15 changed files with 87 additions and 26 deletions.
1 change: 1 addition & 0 deletions crmd/crmd_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ void crmd_join_phase_log(int level);

const char *get_timer_desc(fsa_timer_t * timer);
gboolean too_many_st_failures(void);
void reset_st_fail_count(const char * target);

# define fsa_register_cib_callback(id, flag, data, fn) do { \
fsa_cib_conn->cmds->register_callback( \
Expand Down
2 changes: 1 addition & 1 deletion crmd/lrm.c
Original file line number Diff line number Diff line change
Expand Up @@ -1014,7 +1014,7 @@ lrm_clear_last_failure(const char *rsc_id, const char *node_name)
}
}
free(attr);

g_list_free(lrm_state_list);
}

static gboolean
Expand Down
2 changes: 1 addition & 1 deletion crmd/remote_lrmd_ra.c
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,7 @@ handle_remote_ra_exec(gpointer user_data)
fsa_cib_delete(XML_CIB_TAG_STATUS, status, cib_quorum_override, rc, NULL);
crm_info("Forced a remote LRM refresh before connection start: call=%d", rc);
crm_log_xml_trace(status, "CLEAR LRM");
free(status);
free_xml(status);

rc = handle_remote_ra_start(lrm_state, cmd, cmd->timeout);
if (rc == 0) {
Expand Down
14 changes: 14 additions & 0 deletions crmd/te_callbacks.c
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,20 @@ too_many_st_failures(void)
return FALSE;
}

void
reset_st_fail_count(const char *target)
{
struct st_fail_rec *rec = NULL;

if (stonith_failures) {
rec = g_hash_table_lookup(stonith_failures, target);
}

if (rec) {
rec->count = 0;
}
}

void
tengine_stonith_callback(stonith_t * stonith, stonith_callback_data_t * data)
{
Expand Down
7 changes: 7 additions & 0 deletions crmd/te_events.c
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ update_failcount(xmlNode * event, const char *event_node_uuid, int rc, int targe
const char *value = NULL;
const char *id = crm_element_value(event, XML_LRM_ATTR_TASK_KEY);
const char *on_uname = get_uname_from_event(event);
const char *origin = crm_element_value(event, XML_ATTR_ORIGIN);

if (rc == 99) {
/* this is an internal code for "we're busy, try again" */
Expand All @@ -144,6 +145,12 @@ update_failcount(xmlNode * event, const char *event_node_uuid, int rc, int targe
return FALSE;
}

if (safe_str_eq(origin, "build_active_RAs")) {
crm_debug("No update for %s (rc=%d) on %s: Old failure from lrm status refresh",
id, rc, on_uname);
return FALSE;
}

if (failed_stop_offset == NULL) {
failed_stop_offset = strdup(INFINITY_S);
}
Expand Down
5 changes: 5 additions & 0 deletions crmd/te_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,11 @@ tengine_stonith_notify(stonith_t * st, stonith_event_t * st_event)
return;
}

if (st_event->result == pcmk_ok &&
safe_str_eq(st_event->operation, T_STONITH_NOTIFY_FENCE)) {
reset_st_fail_count(st_event->target);
}

crm_notice("Peer %s was%s terminated (%s) by %s for %s: %s (ref=%s) by client %s",
st_event->target, st_event->result == pcmk_ok ? "" : " not",
st_event->operation,
Expand Down
16 changes: 16 additions & 0 deletions doc/Pacemaker_Explained/en-US/Ch-Resources.txt
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,22 @@ indexterm:[Resource,Option,target-role]
indexterm:[multiple-active,Resource Option]
indexterm:[Resource,Option,multiple-active]

|remote-node
|+<none>+ (disabled)
|The name of the remote-node this resource defines. This both enables the resource as a remote-node and defines the unique name used to identify the remote-node. If no other parameters are set, this value will also be assumed as the hostname to connect to at port 3121. +WARNING+ This value cannot overlap with any resource or node IDs.

|remote-port
|+3121+
|Configure a custom port to use for the guest connection to pacemaker_remote.

|remote-addr
|+remote-node+ value used as hostname
|The ip address or hostname to connect to if remote-node's name is not the hostname of the guest.

|+remote-connect-timeout+
|+60s+
|How long before a pending guest connection will time out.

|=========================================================

If you performed the following commands on the previous LSB Email resource
Expand Down
4 changes: 2 additions & 2 deletions doc/Pacemaker_Remote/en-US/Ch-Example.txt
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ Last updated: Wed Mar 13 13:52:39 2013
Last change: Wed Mar 13 13:25:17 2013 via crmd on node1
Stack: corosync
Current DC: node1 (24815808) - partition with quorum
Version: 1.1.9
Version: 1.1.10
2 Nodes configured, unknown expected votes
2 Resources configured.

Expand All @@ -91,7 +91,7 @@ Last updated: Wed Mar 13 13:52:39 2013
Last change: Wed Mar 13 13:25:17 2013 via crmd on node1
Stack: corosync
Current DC: node1 (24815808) - partition with quorum
Version: 1.1.9
Version: 1.1.10
2 Nodes configured, unknown expected votes
2 Resources configured.

Expand Down
2 changes: 1 addition & 1 deletion doc/Pacemaker_Remote/en-US/Ch-Intro.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
= Extending High Availability Cluster into Virtual Nodes =

== Overview ==
The recent addition of the +pacemaker_remote+ service supported by +Pacemaker version 1.1.9.1 and greater+ allows nodes not running the cluster stack (pacemaker+corosync) to integrate into the cluster and have the cluster manage their resources just as if they were a real cluster node. This means that pacemaker clusters are now capable of managing both launching virtual environments (KVM/LXC) as well as launching the resources that live withing those virtual environments without requiring the virtual environments to run pacemaker or corosync.
The recent addition of the +pacemaker_remote+ service supported by +Pacemaker version 1.1.10 and greater+ allows nodes not running the cluster stack (pacemaker+corosync) to integrate into the cluster and have the cluster manage their resources just as if they were a real cluster node. This means that pacemaker clusters are now capable of managing both launching virtual environments (KVM/LXC) as well as launching the resources that live withing those virtual environments without requiring the virtual environments to run pacemaker or corosync.

== Terms ==
+cluster-node+ - A baremetal hardware node running the High Availability stack (pacemaker + corosync)
Expand Down
12 changes: 6 additions & 6 deletions doc/Pacemaker_Remote/en-US/Ch-KVM-Tutorial.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

== Step 1: Setup the Host ==

This tutorial was created using Fedora 18 on the host and guest nodes. Anything that is capable of running libvirt and pacemaker v1.1.9.1 or greater will do though. An installation guide for installing Fedora 18 can be found here, http://docs.fedoraproject.org/en-US/Fedora/18/html/Installation_Guide/.
This tutorial was created using Fedora 18 on the host and guest nodes. Anything that is capable of running libvirt and pacemaker v1.1.10 or greater will do though. An installation guide for installing Fedora 18 can be found here, http://docs.fedoraproject.org/en-US/Fedora/18/html/Installation_Guide/.

Fedora 18 (or similar distro) host preparation steps.

Expand Down Expand Up @@ -94,7 +94,7 @@ Verify pacemaker status. At first the 'pcs cluster status' output will look lik
Last change: Thu Mar 14 12:25:55 2013 via crmd on example-host
Stack: corosync
Current DC:
Version: 1.1.9.1
Version: 1.1.10
1 Nodes configured, unknown expected votes
0 Resources configured.
----
Expand Down Expand Up @@ -285,7 +285,7 @@ Last updated: Thu Mar 14 16:41:22 2013
Last change: Thu Mar 14 16:41:08 2013 via crmd on example-host
Stack: corosync
Current DC: example-host (1795270848) - partition WITHOUT quorum
Version: 1.1.9.1
Version: 1.1.10
1 Nodes configured, unknown expected votes
0 Resources configured.

Expand Down Expand Up @@ -344,7 +344,7 @@ Last updated: Fri Mar 15 09:30:30 2013
Last change: Thu Mar 14 17:21:35 2013 via cibadmin on example-host
Stack: corosync
Current DC: example-host (1795270848) - partition WITHOUT quorum
Version: 1.1.9.1
Version: 1.1.10
2 Nodes configured, unknown expected votes
2 Resources configured.

Expand Down Expand Up @@ -426,7 +426,7 @@ Last updated: Fri Mar 15 11:00:31 2013
Last change: Fri Mar 15 09:54:16 2013 via cibadmin on example-host
Stack: corosync
Current DC: example-host (1795270848) - partition WITHOUT quorum
Version: 1.1.9.1
Version: 1.1.10
2 Nodes configured, unknown expected votes
7 Resources configured.

Expand Down Expand Up @@ -455,7 +455,7 @@ Last updated: Fri Mar 15 11:03:17 2013
Last change: Fri Mar 15 09:54:16 2013 via cibadmin on example-host
Stack: corosync
Current DC: example-host (1795270848) - partition WITHOUT quorum
Version: 1.1.9.1
Version: 1.1.10
2 Nodes configured, unknown expected votes
7 Resources configured.

Expand Down
10 changes: 5 additions & 5 deletions doc/Pacemaker_Remote/en-US/Ch-LXC-Tutorial.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

== Step 1: Setup LXC Host ==

This tutorial was tested with Fedora 18. Anything that is capable of running libvirt and pacemaker v1.1.9.1 or greater will do though. An installation guide for installing Fedora 18 can be found here, http://docs.fedoraproject.org/en-US/Fedora/18/html/Installation_Guide/.
This tutorial was tested with Fedora 18. Anything that is capable of running libvirt and pacemaker v1.1.10 or greater will do though. An installation guide for installing Fedora 18 can be found here, http://docs.fedoraproject.org/en-US/Fedora/18/html/Installation_Guide/.

Fedora 18 (or similar distro) host preparation steps.

Expand Down Expand Up @@ -96,7 +96,7 @@ Verify pacemaker status. At first the 'pcs cluster status' output will look lik
Last change: Thu Mar 14 12:25:55 2013 via crmd on example-host
Stack: corosync
Current DC:
Version: 1.1.9.1
Version: 1.1.10
1 Nodes configured, unknown expected votes
0 Resources configured.
----
Expand Down Expand Up @@ -201,7 +201,7 @@ Last updated: Thu Mar 14 16:41:22 2013
Last change: Thu Mar 14 16:41:08 2013 via crmd on example-host
Stack: corosync
Current DC: example-host (1795270848) - partition WITHOUT quorum
Version: 1.1.9.1
Version: 1.1.10
1 Nodes configured, unknown expected votes
0 Resources configured.

Expand Down Expand Up @@ -239,7 +239,7 @@ Last updated: Mon Mar 18 17:15:46 2013
Last change: Mon Mar 18 17:15:26 2013 via cibadmin on guest1
Stack: corosync
Current DC: example-host (175810752) - partition WITHOUT quorum
Version: 1.1.9.1
Version: 1.1.10
4 Nodes configured, unknown expected votes
6 Resources configured.

Expand Down Expand Up @@ -277,7 +277,7 @@ Last updated: Mon Mar 18 17:31:54 2013
Last change: Mon Mar 18 17:31:05 2013 via cibadmin on example-host
Stack: corosync
Current DC: example=host (175810752) - partition WITHOUT quorum
Version: 1.1.9.1
Version: 1.1.10
4 Nodes configured, unknown expected votes
11 Resources configured.

Expand Down
2 changes: 1 addition & 1 deletion doc/Pacemaker_Remote/en-US/Ch-Options.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ When configuring a virtual machine or lxc resource to act as a remote-node, thes
|Configure a custom port to use for the guest connection to pacemaker_remote.

|+remote-addr+
|node name
|+remote-node+ value used as hostname
|The ip address or hostname to connect to if remote-node's name is not the hostname of the guest.

|+remote-connect-timeout+
Expand Down
19 changes: 12 additions & 7 deletions lib/common/remote.c
Original file line number Diff line number Diff line change
Expand Up @@ -728,11 +728,11 @@ check_connect_finished(gpointer userdata)
} else {
close(sock);
}
free(cb_data);

if (cb_data->callback) {
cb_data->callback(cb_data->userdata, rc);
}
free(cb_data);
return FALSE;

reschedule:
Expand Down Expand Up @@ -821,12 +821,12 @@ int
crm_remote_tcp_connect_async(const char *host, int port, int timeout, /*ms */
void *userdata, void (*callback) (void *userdata, int sock))
{
struct addrinfo *res;
struct addrinfo *rp;
struct addrinfo *res = NULL;
struct addrinfo *rp = NULL;
struct addrinfo hints;
const char *server = host;
int ret_ga;
int sock;
int sock = -1;

/* getaddrinfo */
memset(&hints, 0, sizeof(struct addrinfo));
Expand All @@ -843,7 +843,7 @@ crm_remote_tcp_connect_async(const char *host, int port, int timeout, /*ms */

if (!res || !res->ai_addr) {
crm_err("getaddrinfo failed");
return -1;
goto async_cleanup;
}

for (rp = res; rp != NULL; rp = rp->ai_next) {
Expand Down Expand Up @@ -879,7 +879,8 @@ crm_remote_tcp_connect_async(const char *host, int port, int timeout, /*ms */
if (callback) {
if (internal_tcp_connect_async
(sock, rp->ai_addr, rp->ai_addrlen, timeout, userdata, callback) == 0) {
return 0; /* Success for now, we'll hear back later in the callback */
sock = 0;
goto async_cleanup; /* Success for now, we'll hear back later in the callback */
}

} else {
Expand All @@ -891,8 +892,12 @@ crm_remote_tcp_connect_async(const char *host, int port, int timeout, /*ms */
close(sock);
sock = -1;
}
freeaddrinfo(res);

async_cleanup:

if (res) {
freeaddrinfo(res);
}
return sock;
}

Expand Down
1 change: 1 addition & 0 deletions lib/services/services_linux.c
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,7 @@ services_os_action_execute(svc_action_t * op, gboolean synchronous)

close(op->opaque->stdout_fd);
close(op->opaque->stderr_fd);
close(sfd);

if (sigismember(&old_mask, SIGCHLD) == 0) {
if (sigprocmask(SIG_UNBLOCK, &mask, NULL) < 0) {
Expand Down
16 changes: 14 additions & 2 deletions tools/cibadmin.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ int request_id = 0;
int operation_status = 0;
cib_t *the_cib = NULL;
gboolean force_flag = FALSE;
gboolean quiet = FALSE;
int bump_log_num = 0;

/* *INDENT-OFF* */
static struct crm_option long_options[] = {
Expand Down Expand Up @@ -227,7 +229,7 @@ main(int argc, char **argv)

int option_index = 0;

crm_log_init(NULL, LOG_CRIT, FALSE, FALSE, argc, argv, FALSE);
crm_system_name = "cibadmin";
crm_set_options(NULL, "command [options] [data]", long_options,
"Provides direct access to the cluster configuration."
"\n\nAllows the configuration, or sections of it, to be queried, modified, replaced and deleted."
Expand Down Expand Up @@ -266,6 +268,7 @@ main(int argc, char **argv)
break;
case 'Q':
cib_action = CIB_OP_QUERY;
quiet = TRUE;
break;
case 'P':
cib_action = CIB_OP_APPLY_DIFF;
Expand Down Expand Up @@ -316,7 +319,7 @@ main(int argc, char **argv)
break;
case 'V':
command_options = command_options | cib_verbose;
crm_bump_log_level(argc, argv);
bump_log_num++;
break;
case '?':
case '$':
Expand Down Expand Up @@ -384,6 +387,15 @@ main(int argc, char **argv)
break;
}
}

if (bump_log_num > 0) {
quiet = FALSE;
}
crm_log_init(NULL, LOG_CRIT, FALSE, FALSE, argc, argv, quiet);
while (bump_log_num > 0) {
crm_bump_log_level(argc, argv);
bump_log_num--;
}

if (optind < argc) {
printf("non-option ARGV-elements: ");
Expand Down

0 comments on commit 138556c

Please sign in to comment.