485 lines
21 KiB
Diff
485 lines
21 KiB
Diff
From f240edb5dc9637d7fbd0a134fb45610fba6a3950 Mon Sep 17 00:00:00 2001
|
|
From: rpm-build <rpm-build>
|
|
Date: Wed, 25 Oct 2023 14:18:19 +0800
|
|
Subject: [PATCH] improve error messages in pcs resource move
|
|
|
|
---
|
|
CHANGELOG.md | 3 +
|
|
pcs/cli/reports/messages.py | 13 ++
|
|
pcs/common/reports/messages.py | 17 ++-
|
|
pcs/lib/commands/resource.py | 116 ++++++++++++------
|
|
pcs/pcs.8.in | 2 +-
|
|
pcs/usage.py | 8 +-
|
|
pcs_test/tier0/cli/reports/test_messages.py | 49 ++++++++
|
|
.../tier0/common/reports/test_messages.py | 34 ++++-
|
|
.../resource/test_resource_move_autoclean.py | 8 ++
|
|
9 files changed, 206 insertions(+), 44 deletions(-)
|
|
|
|
diff --git a/CHANGELOG.md b/CHANGELOG.md
|
|
index c6007ac..9a32655 100644
|
|
--- a/CHANGELOG.md
|
|
+++ b/CHANGELOG.md
|
|
@@ -19,11 +19,14 @@
|
|
`pcs config checkpoint diff` command ([rhbz#2175881])
|
|
- Pcs daemon was allowing expired accounts, and accounts with expired
|
|
passwords to login when using PAM auth. ([huntr#220307])
|
|
+- Improved error messages and documentation of `pcs resource move` command
|
|
+ ([rhbz#2219554])
|
|
|
|
[ghissue#441]: https://github.com/ClusterLabs/pcs/issues/441
|
|
[ghpull#431]: https://github.com/ClusterLabs/pcs/pull/431
|
|
[rhbz#1996062]: https://bugzilla.redhat.com/show_bug.cgi?id=1996062
|
|
[rhbz#2019836]: https://bugzilla.redhat.com/show_bug.cgi?id=2019836
|
|
+[rhbz#2219554]: https://bugzilla.redhat.com/show_bug.cgi?id=2219554
|
|
[rhbz#2022463]: https://bugzilla.redhat.com/show_bug.cgi?id=2022463
|
|
[rhbz#2028902]: https://bugzilla.redhat.com/show_bug.cgi?id=2028902
|
|
[rhbz#2033248]: https://bugzilla.redhat.com/show_bug.cgi?id=2033248
|
|
diff --git a/pcs/cli/reports/messages.py b/pcs/cli/reports/messages.py
|
|
index 892ec15..5e1741b 100644
|
|
--- a/pcs/cli/reports/messages.py
|
|
+++ b/pcs/cli/reports/messages.py
|
|
@@ -516,6 +516,19 @@ class InvalidStonithAgentName(CliReportMessageCustom):
|
|
)
|
|
|
|
|
|
+class ResourceMoveAutocleanSimulationFailure(CliReportMessageCustom):
|
|
+ _obj: messages.ResourceMoveAutocleanSimulationFailure
|
|
+
|
|
+ @property
|
|
+ def message(self) -> str:
|
|
+ if not self._obj.move_constraint_left_in_cib:
|
|
+ return self._obj.message
|
|
+ node = format_optional(self._obj.node, " {}")
|
|
+ return (
|
|
+ f"{self._obj.message} Run 'pcs resource clear "
|
|
+ f"{self._obj.resource_id}{node}' to remove the constraint."
|
|
+ )
|
|
+
|
|
def _create_report_msg_map() -> Dict[str, type]:
|
|
result: Dict[str, type] = {}
|
|
for report_msg_cls in get_all_subclasses(CliReportMessageCustom):
|
|
diff --git a/pcs/common/reports/messages.py b/pcs/common/reports/messages.py
|
|
index 37c79ba..c183e96 100644
|
|
--- a/pcs/common/reports/messages.py
|
|
+++ b/pcs/common/reports/messages.py
|
|
@@ -6195,19 +6195,32 @@ class ResourceMoveAutocleanSimulationFailure(ReportItemMessage):
|
|
resource_id -- id of the resource to be moved
|
|
others_affected -- True if also other resource would be affected, False
|
|
otherwise
|
|
+ node -- target node the resource should be moved to
|
|
+ move_constraint_left_in_cib -- move has happened and the failure occurred
|
|
+ when trying to remove the move constraint from the live cib
|
|
"""
|
|
|
|
resource_id: str
|
|
others_affected: bool
|
|
+ node: Optional[str] = None
|
|
+ move_constraint_left_in_cib: bool = False
|
|
_code = codes.RESOURCE_MOVE_AUTOCLEAN_SIMULATION_FAILURE
|
|
|
|
@property
|
|
def message(self) -> str:
|
|
- return (
|
|
+ template = (
|
|
"Unable to ensure that moved resource '{resource_id}'{others} will "
|
|
"stay on the same node after a constraint used for moving it is "
|
|
"removed."
|
|
- ).format(
|
|
+ )
|
|
+ if self.move_constraint_left_in_cib:
|
|
+ template += (
|
|
+ " The constraint to move the resource has not been removed "
|
|
+ "from configuration. Consider removing it manually. Be aware "
|
|
+ "that removing the constraint may cause resources to move "
|
|
+ "to other nodes."
|
|
+ )
|
|
+ return template.format(
|
|
resource_id=self.resource_id,
|
|
others=" or other resources" if self.others_affected else "",
|
|
)
|
|
diff --git a/pcs/lib/commands/resource.py b/pcs/lib/commands/resource.py
|
|
index 82ce73e..39b24f0 100644
|
|
--- a/pcs/lib/commands/resource.py
|
|
+++ b/pcs/lib/commands/resource.py
|
|
@@ -1602,6 +1602,14 @@ def _nodes_exist_reports(
|
|
for node_name in (set(node_names) - existing_node_names)
|
|
]
|
|
|
|
+class ResourceMoveAutocleanSimulationFailure(Exception):
|
|
+ def __init__(self, other_resources_affected: bool):
|
|
+ super().__init__()
|
|
+ self._other_resources_affected = other_resources_affected
|
|
+
|
|
+ @property
|
|
+ def other_resources_affected(self) -> bool:
|
|
+ return self._other_resources_affected
|
|
|
|
def move_autoclean(
|
|
env: LibraryEnvironment,
|
|
@@ -1624,7 +1632,9 @@ def move_autoclean(
|
|
strict -- if True affecting other resources than the specified resource
|
|
will cause failure. If False affecting other resources is alowed.
|
|
"""
|
|
+ # pylint: disable=too-many-branches
|
|
# pylint: disable=too-many-locals
|
|
+ # pylint: disable=too-many-statements
|
|
wait_timeout = max(wait_timeout, 0)
|
|
if not env.is_cib_live:
|
|
raise LibraryError(
|
|
@@ -1655,6 +1665,8 @@ def move_autoclean(
|
|
reports.messages.CannotMoveResourceNotRunning(resource_id)
|
|
)
|
|
)
|
|
+ # add a move constraint to a temporary cib and get a cib diff which adds
|
|
+ # the move constraint
|
|
with get_tmp_cib(env.report_processor, cib_xml) as rsc_moved_cib_file:
|
|
stdout, stderr, retval = resource_move(
|
|
env.cmd_runner(dict(CIB_file=rsc_moved_cib_file.name)),
|
|
@@ -1676,6 +1688,8 @@ def move_autoclean(
|
|
add_constraint_cib_diff = diff_cibs_xml(
|
|
env.cmd_runner(), env.report_processor, cib_xml, rsc_moved_cib_xml
|
|
)
|
|
+ # clear the move constraint from the temporary cib and get a cib diff which
|
|
+ # removes the move constraint
|
|
with get_tmp_cib(
|
|
env.report_processor, rsc_moved_cib_xml
|
|
) as rsc_moved_constraint_cleared_cib_file:
|
|
@@ -1703,17 +1717,18 @@ def move_autoclean(
|
|
rsc_moved_cib_xml,
|
|
constraint_removed_cib,
|
|
)
|
|
-
|
|
+ # if both the diffs are no-op, nothing needs to be done
|
|
if not (add_constraint_cib_diff and remove_constraint_cib_diff):
|
|
env.report_processor.report(
|
|
reports.ReportItem.info(reports.messages.NoActionNecessary())
|
|
)
|
|
return
|
|
-
|
|
+ # simulate applying the diff which adds the move constraint
|
|
_, move_transitions, after_move_simulated_cib = simulate_cib(
|
|
env.cmd_runner(), get_cib(rsc_moved_cib_xml)
|
|
)
|
|
if strict:
|
|
+ # check if other resources would be affected
|
|
resources_affected_by_move = (
|
|
simulate_tools.get_resources_from_operations(
|
|
simulate_tools.get_operations_from_transitions(
|
|
@@ -1730,16 +1745,37 @@ def move_autoclean(
|
|
)
|
|
)
|
|
)
|
|
- _ensure_resource_moved_and_not_moved_back(
|
|
- env.cmd_runner,
|
|
- env.report_processor,
|
|
- etree_to_str(after_move_simulated_cib),
|
|
- remove_constraint_cib_diff,
|
|
- resource_id,
|
|
- strict,
|
|
- resource_state_before,
|
|
- node,
|
|
- )
|
|
+ # verify that:
|
|
+ # - a cib with added move constraint causes the resource to move by
|
|
+ # comparing the original status of the resource with a status computed
|
|
+ # from the cib with the added constraint
|
|
+ # - applying the diff which removes the move constraint won't trigger
|
|
+ # moving the resource (or other resources) around
|
|
+ try:
|
|
+ _ensure_resource_moved_and_not_moved_back(
|
|
+ env.cmd_runner,
|
|
+ env.report_processor,
|
|
+ etree_to_str(after_move_simulated_cib),
|
|
+ remove_constraint_cib_diff,
|
|
+ resource_id,
|
|
+ strict,
|
|
+ resource_state_before,
|
|
+ node,
|
|
+ )
|
|
+ except ResourceMoveAutocleanSimulationFailure as e:
|
|
+ raise LibraryError(
|
|
+ reports.ReportItem.error(
|
|
+ reports.messages.ResourceMoveAutocleanSimulationFailure(
|
|
+ resource_id,
|
|
+ e.other_resources_affected,
|
|
+ node=node,
|
|
+ move_constraint_left_in_cib=False,
|
|
+ )
|
|
+ )
|
|
+ ) from e
|
|
+
|
|
+ # apply the diff which adds the move constraint to the live cib and wait
|
|
+ # for the cluster to settle
|
|
push_cib_diff_xml(env.cmd_runner(), add_constraint_cib_diff)
|
|
env.report_processor.report(
|
|
ReportItem.info(
|
|
@@ -1747,16 +1783,36 @@ def move_autoclean(
|
|
)
|
|
)
|
|
env.wait_for_idle(wait_timeout)
|
|
- _ensure_resource_moved_and_not_moved_back(
|
|
- env.cmd_runner,
|
|
- env.report_processor,
|
|
- get_cib_xml(env.cmd_runner()),
|
|
- remove_constraint_cib_diff,
|
|
- resource_id,
|
|
- strict,
|
|
- resource_state_before,
|
|
- node,
|
|
- )
|
|
+ # verify that:
|
|
+ # - the live cib (now containing the move constraint) causes the resource
|
|
+ # to move by comparing the original status of the resource with a status
|
|
+ # computed from the live cib
|
|
+ # - applying the diff which removes the move constraint won't trigger
|
|
+ # moving the resource (or other resources) around
|
|
+ try:
|
|
+ _ensure_resource_moved_and_not_moved_back(
|
|
+ env.cmd_runner,
|
|
+ env.report_processor,
|
|
+ get_cib_xml(env.cmd_runner()),
|
|
+ remove_constraint_cib_diff,
|
|
+ resource_id,
|
|
+ strict,
|
|
+ resource_state_before,
|
|
+ node,
|
|
+ )
|
|
+ except ResourceMoveAutocleanSimulationFailure as e:
|
|
+ raise LibraryError(
|
|
+ reports.ReportItem.error(
|
|
+ reports.messages.ResourceMoveAutocleanSimulationFailure(
|
|
+ resource_id,
|
|
+ e.other_resources_affected,
|
|
+ node=node,
|
|
+ move_constraint_left_in_cib=True,
|
|
+ )
|
|
+ )
|
|
+ ) from e
|
|
+ # apply the diff which removes the move constraint to the live cib and wait
|
|
+ # for the cluster to settle
|
|
push_cib_diff_xml(env.cmd_runner(), remove_constraint_cib_diff)
|
|
env.report_processor.report(
|
|
ReportItem.info(
|
|
@@ -1822,13 +1878,7 @@ def _ensure_resource_moved_and_not_moved_back(
|
|
)
|
|
if strict:
|
|
if clean_operations:
|
|
- raise LibraryError(
|
|
- reports.ReportItem.error(
|
|
- reports.messages.ResourceMoveAutocleanSimulationFailure(
|
|
- resource_id, others_affected=True
|
|
- )
|
|
- )
|
|
- )
|
|
+ raise ResourceMoveAutocleanSimulationFailure(True)
|
|
else:
|
|
if any(
|
|
rsc == resource_id
|
|
@@ -1836,13 +1886,7 @@ def _ensure_resource_moved_and_not_moved_back(
|
|
clean_operations
|
|
)
|
|
):
|
|
- raise LibraryError(
|
|
- reports.ReportItem.error(
|
|
- reports.messages.ResourceMoveAutocleanSimulationFailure(
|
|
- resource_id, others_affected=False
|
|
- )
|
|
- )
|
|
- )
|
|
+ raise ResourceMoveAutocleanSimulationFailure(False)
|
|
|
|
|
|
def ban(env, resource_id, node=None, master=False, lifetime=None, wait=False):
|
|
diff --git a/pcs/pcs.8.in b/pcs/pcs.8.in
|
|
index 94514c1..8f4b647 100644
|
|
--- a/pcs/pcs.8.in
|
|
+++ b/pcs/pcs.8.in
|
|
@@ -152,7 +152,7 @@ debug\-monitor <resource id> [\fB\-\-full\fR]
|
|
This command will force the specified resource to be monitored on this node ignoring the cluster recommendations and print the output from monitoring the resource. Using \fB\-\-full\fR will give more detailed output. This is mainly used for debugging resources that fail to be monitored.
|
|
.TP
|
|
move <resource id> [destination node] [\fB\-\-promoted\fR] [\fB\-\-strict\fR] [\fB\-\-wait\fR[=n]]
|
|
-Move the resource off the node it is currently running on. This is achieved by creating a \-INFINITY location constraint to ban the node. If destination node is specified the resource will be moved to that node by creating an INFINITY location constraint to prefer the destination node. The constraint needed for moving the resource will be automatically removed once the resource is running on it's new location. The command will fail in case it is not possible to verify that the resource will not be moved back after deleting the constraint.
|
|
+Move the resource off the node it is currently running on. This is achieved by creating a \-INFINITY location constraint to ban the node. If destination node is specified the resource will be moved to that node by creating an INFINITY location constraint to prefer the destination node. The constraint needed for moving the resource will be automatically removed once the resource is running on its new location. The command will fail in case it is not possible to verify that the resource will not be moved back after deleting the constraint. If this happens after the location constraint has been created, the constraint will be left in the configuration.
|
|
|
|
If \fB\-\-strict\fR is specified, the command will also fail if other resources would be affected.
|
|
|
|
diff --git a/pcs/usage.py b/pcs/usage.py
|
|
index bc88591..aab6a78 100644
|
|
--- a/pcs/usage.py
|
|
+++ b/pcs/usage.py
|
|
@@ -369,9 +369,11 @@ Commands:
|
|
If destination node is specified the resource will be moved to that
|
|
node by creating an INFINITY location constraint to prefer the
|
|
destination node. The constraint needed for moving the resource will be
|
|
- automatically removed once the resource is running on it's new
|
|
- location. The command will fail in case it is not possible to verify
|
|
- that the resource will not be moved back after deleting the constraint.
|
|
+ automatically removed once the resource is running on its new location.
|
|
+ The command will fail in case it is not possible to verify that the
|
|
+ resource will not be moved back after deleting the constraint. If this
|
|
+ happens after the location constraint has been created, the constraint
|
|
+ will be left in the configuration.
|
|
|
|
If --strict is specified, the command will also fail if other resources
|
|
would be affected.
|
|
diff --git a/pcs_test/tier0/cli/reports/test_messages.py b/pcs_test/tier0/cli/reports/test_messages.py
|
|
index 6671021..ac8625c 100644
|
|
--- a/pcs_test/tier0/cli/reports/test_messages.py
|
|
+++ b/pcs_test/tier0/cli/reports/test_messages.py
|
|
@@ -618,3 +618,52 @@ class StonithRestartlessUpdateUnableToPerform(CliReportMessageTestBase):
|
|
report_msg,
|
|
f"{report_msg.message}, please use command 'pcs stonith update' instead",
|
|
)
|
|
+
|
|
+class ResourceMoveAutocleanSimulationFailure(CliReportMessageTestBase):
|
|
+ def test_constraint_not_created(self):
|
|
+ self.assert_message(
|
|
+ messages.ResourceMoveAutocleanSimulationFailure(
|
|
+ "R1", others_affected=True
|
|
+ ),
|
|
+ (
|
|
+ "Unable to ensure that moved resource 'R1' or other resources "
|
|
+ "will stay on the same node after a constraint used for moving "
|
|
+ "it is removed."
|
|
+ ),
|
|
+ )
|
|
+
|
|
+ def test_without_node(self):
|
|
+ self.assert_message(
|
|
+ messages.ResourceMoveAutocleanSimulationFailure(
|
|
+ "R1", others_affected=True, move_constraint_left_in_cib=True
|
|
+ ),
|
|
+ (
|
|
+ "Unable to ensure that moved resource 'R1' or other resources "
|
|
+ "will stay on the same node after a constraint used for moving "
|
|
+ "it is removed."
|
|
+ " The constraint to move the resource has not been removed "
|
|
+ "from configuration. Consider removing it manually. Be aware "
|
|
+ "that removing the constraint may cause resources to move to "
|
|
+ "other nodes."
|
|
+ " Run 'pcs resource clear R1' to remove the constraint."
|
|
+ ),
|
|
+ )
|
|
+
|
|
+ def test_with_node(self):
|
|
+ self.assert_message(
|
|
+ messages.ResourceMoveAutocleanSimulationFailure(
|
|
+ "R1",
|
|
+ others_affected=False,
|
|
+ node="node1",
|
|
+ move_constraint_left_in_cib=True,
|
|
+ ),
|
|
+ (
|
|
+ "Unable to ensure that moved resource 'R1' will stay on the "
|
|
+ "same node after a constraint used for moving it is removed."
|
|
+ " The constraint to move the resource has not been removed "
|
|
+ "from configuration. Consider removing it manually. Be aware "
|
|
+ "that removing the constraint may cause resources to move to "
|
|
+ "other nodes."
|
|
+ " Run 'pcs resource clear R1 node1' to remove the constraint."
|
|
+ ),
|
|
+ )
|
|
\ No newline at end of file
|
|
diff --git a/pcs_test/tier0/common/reports/test_messages.py b/pcs_test/tier0/common/reports/test_messages.py
|
|
index a56ef38..7b1b799 100644
|
|
--- a/pcs_test/tier0/common/reports/test_messages.py
|
|
+++ b/pcs_test/tier0/common/reports/test_messages.py
|
|
@@ -4542,7 +4542,7 @@ class ResourceMoveAffectsOtherResources(NameBuildTest):
|
|
|
|
|
|
class ResourceMoveAutocleanSimulationFailure(NameBuildTest):
|
|
- def test_success(self):
|
|
+ def test_simulation(self):
|
|
self.assert_message_from_report(
|
|
(
|
|
"Unable to ensure that moved resource 'R1' will stay on the "
|
|
@@ -4553,7 +4553,7 @@ class ResourceMoveAutocleanSimulationFailure(NameBuildTest):
|
|
),
|
|
)
|
|
|
|
- def test_others_affected(self):
|
|
+ def test_simulation_others_affected(self):
|
|
self.assert_message_from_report(
|
|
(
|
|
"Unable to ensure that moved resource 'R1' or other resources "
|
|
@@ -4565,6 +4565,36 @@ class ResourceMoveAutocleanSimulationFailure(NameBuildTest):
|
|
),
|
|
)
|
|
|
|
+ def test_live(self):
|
|
+ self.assert_message_from_report(
|
|
+ (
|
|
+ "Unable to ensure that moved resource 'R1' will stay on the "
|
|
+ "same node after a constraint used for moving it is removed."
|
|
+ " The constraint to move the resource has not been removed "
|
|
+ "from configuration. Consider removing it manually. Be aware "
|
|
+ "that removing the constraint may cause resources to move to "
|
|
+ "other nodes."
|
|
+ ),
|
|
+ reports.ResourceMoveAutocleanSimulationFailure(
|
|
+ "R1", others_affected=False, move_constraint_left_in_cib=True
|
|
+ ),
|
|
+ )
|
|
+
|
|
+ def test_live_others_affected(self):
|
|
+ self.assert_message_from_report(
|
|
+ (
|
|
+ "Unable to ensure that moved resource 'R1' or other resources "
|
|
+ "will stay on the same node after a constraint used for moving "
|
|
+ "it is removed."
|
|
+ " The constraint to move the resource has not been removed "
|
|
+ "from configuration. Consider removing it manually. Be aware "
|
|
+ "that removing the constraint may cause resources to move to "
|
|
+ "other nodes."
|
|
+ ),
|
|
+ reports.ResourceMoveAutocleanSimulationFailure(
|
|
+ "R1", others_affected=True, move_constraint_left_in_cib=True
|
|
+ ),
|
|
+ )
|
|
|
|
class ParseErrorJsonFile(NameBuildTest):
|
|
def test_success(self):
|
|
diff --git a/pcs_test/tier0/lib/commands/resource/test_resource_move_autoclean.py b/pcs_test/tier0/lib/commands/resource/test_resource_move_autoclean.py
|
|
index 90ed2d8..6e5c286 100644
|
|
--- a/pcs_test/tier0/lib/commands/resource/test_resource_move_autoclean.py
|
|
+++ b/pcs_test/tier0/lib/commands/resource/test_resource_move_autoclean.py
|
|
@@ -1261,6 +1261,8 @@ class MoveAutocleanFailures(MoveAutocleanCommonSetup):
|
|
reports.codes.RESOURCE_MOVE_AUTOCLEAN_SIMULATION_FAILURE,
|
|
resource_id=self.resource_id,
|
|
others_affected=False,
|
|
+ node=None,
|
|
+ move_constraint_left_in_cib=False,
|
|
)
|
|
],
|
|
expected_in_processor=False,
|
|
@@ -1292,6 +1294,8 @@ class MoveAutocleanFailures(MoveAutocleanCommonSetup):
|
|
reports.codes.RESOURCE_MOVE_AUTOCLEAN_SIMULATION_FAILURE,
|
|
resource_id=self.resource_id,
|
|
others_affected=True,
|
|
+ node=None,
|
|
+ move_constraint_left_in_cib=False,
|
|
)
|
|
],
|
|
expected_in_processor=False,
|
|
@@ -1324,6 +1328,8 @@ class MoveAutocleanFailures(MoveAutocleanCommonSetup):
|
|
reports.codes.RESOURCE_MOVE_AUTOCLEAN_SIMULATION_FAILURE,
|
|
resource_id=self.resource_id,
|
|
others_affected=False,
|
|
+ node=None,
|
|
+ move_constraint_left_in_cib=True,
|
|
)
|
|
],
|
|
expected_in_processor=False,
|
|
@@ -1355,6 +1361,8 @@ class MoveAutocleanFailures(MoveAutocleanCommonSetup):
|
|
reports.codes.RESOURCE_MOVE_AUTOCLEAN_SIMULATION_FAILURE,
|
|
resource_id=self.resource_id,
|
|
others_affected=True,
|
|
+ node=None,
|
|
+ move_constraint_left_in_cib=True,
|
|
)
|
|
],
|
|
expected_in_processor=False,
|
|
--
|
|
2.33.0
|
|
|