!50 Improve error messages in pcs resource move

From: @xiangbudaomz 
Reviewed-by: @jxy_git 
Signed-off-by: @jxy_git
This commit is contained in:
openeuler-ci-bot 2023-10-25 07:30:25 +00:00 committed by Gitee
commit c0157dace5
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
2 changed files with 489 additions and 2 deletions

View File

@ -0,0 +1,484 @@
From f240edb5dc9637d7fbd0a134fb45610fba6a3950 Mon Sep 17 00:00:00 2001
From: rpm-build <rpm-build>
Date: Wed, 25 Oct 2023 14:18:19 +0800
Subject: [PATCH] improve error messages in pcs resource move
---
CHANGELOG.md | 3 +
pcs/cli/reports/messages.py | 13 ++
pcs/common/reports/messages.py | 17 ++-
pcs/lib/commands/resource.py | 116 ++++++++++++------
pcs/pcs.8.in | 2 +-
pcs/usage.py | 8 +-
pcs_test/tier0/cli/reports/test_messages.py | 49 ++++++++
.../tier0/common/reports/test_messages.py | 34 ++++-
.../resource/test_resource_move_autoclean.py | 8 ++
9 files changed, 206 insertions(+), 44 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index c6007ac..9a32655 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -19,11 +19,14 @@
`pcs config checkpoint diff` command ([rhbz#2175881])
- Pcs daemon was allowing expired accounts, and accounts with expired
passwords to login when using PAM auth. ([huntr#220307])
+- Improved error messages and documentation of `pcs resource move` command
+ ([rhbz#2219554])
[ghissue#441]: https://github.com/ClusterLabs/pcs/issues/441
[ghpull#431]: https://github.com/ClusterLabs/pcs/pull/431
[rhbz#1996062]: https://bugzilla.redhat.com/show_bug.cgi?id=1996062
[rhbz#2019836]: https://bugzilla.redhat.com/show_bug.cgi?id=2019836
+[rhbz#2219554]: https://bugzilla.redhat.com/show_bug.cgi?id=2219554
[rhbz#2022463]: https://bugzilla.redhat.com/show_bug.cgi?id=2022463
[rhbz#2028902]: https://bugzilla.redhat.com/show_bug.cgi?id=2028902
[rhbz#2033248]: https://bugzilla.redhat.com/show_bug.cgi?id=2033248
diff --git a/pcs/cli/reports/messages.py b/pcs/cli/reports/messages.py
index 892ec15..5e1741b 100644
--- a/pcs/cli/reports/messages.py
+++ b/pcs/cli/reports/messages.py
@@ -516,6 +516,19 @@ class InvalidStonithAgentName(CliReportMessageCustom):
)
+class ResourceMoveAutocleanSimulationFailure(CliReportMessageCustom):
+ _obj: messages.ResourceMoveAutocleanSimulationFailure
+
+ @property
+ def message(self) -> str:
+ if not self._obj.move_constraint_left_in_cib:
+ return self._obj.message
+ node = format_optional(self._obj.node, " {}")
+ return (
+ f"{self._obj.message} Run 'pcs resource clear "
+ f"{self._obj.resource_id}{node}' to remove the constraint."
+ )
+
def _create_report_msg_map() -> Dict[str, type]:
result: Dict[str, type] = {}
for report_msg_cls in get_all_subclasses(CliReportMessageCustom):
diff --git a/pcs/common/reports/messages.py b/pcs/common/reports/messages.py
index 37c79ba..c183e96 100644
--- a/pcs/common/reports/messages.py
+++ b/pcs/common/reports/messages.py
@@ -6195,19 +6195,32 @@ class ResourceMoveAutocleanSimulationFailure(ReportItemMessage):
resource_id -- id of the resource to be moved
others_affected -- True if also other resource would be affected, False
otherwise
+ node -- target node the resource should be moved to
+ move_constraint_left_in_cib -- move has happened and the failure occurred
+ when trying to remove the move constraint from the live cib
"""
resource_id: str
others_affected: bool
+ node: Optional[str] = None
+ move_constraint_left_in_cib: bool = False
_code = codes.RESOURCE_MOVE_AUTOCLEAN_SIMULATION_FAILURE
@property
def message(self) -> str:
- return (
+ template = (
"Unable to ensure that moved resource '{resource_id}'{others} will "
"stay on the same node after a constraint used for moving it is "
"removed."
- ).format(
+ )
+ if self.move_constraint_left_in_cib:
+ template += (
+ " The constraint to move the resource has not been removed "
+ "from configuration. Consider removing it manually. Be aware "
+ "that removing the constraint may cause resources to move "
+ "to other nodes."
+ )
+ return template.format(
resource_id=self.resource_id,
others=" or other resources" if self.others_affected else "",
)
diff --git a/pcs/lib/commands/resource.py b/pcs/lib/commands/resource.py
index 82ce73e..39b24f0 100644
--- a/pcs/lib/commands/resource.py
+++ b/pcs/lib/commands/resource.py
@@ -1602,6 +1602,14 @@ def _nodes_exist_reports(
for node_name in (set(node_names) - existing_node_names)
]
+class ResourceMoveAutocleanSimulationFailure(Exception):
+ def __init__(self, other_resources_affected: bool):
+ super().__init__()
+ self._other_resources_affected = other_resources_affected
+
+ @property
+ def other_resources_affected(self) -> bool:
+ return self._other_resources_affected
def move_autoclean(
env: LibraryEnvironment,
@@ -1624,7 +1632,9 @@ def move_autoclean(
strict -- if True affecting other resources than the specified resource
will cause failure. If False affecting other resources is alowed.
"""
+ # pylint: disable=too-many-branches
# pylint: disable=too-many-locals
+ # pylint: disable=too-many-statements
wait_timeout = max(wait_timeout, 0)
if not env.is_cib_live:
raise LibraryError(
@@ -1655,6 +1665,8 @@ def move_autoclean(
reports.messages.CannotMoveResourceNotRunning(resource_id)
)
)
+ # add a move constraint to a temporary cib and get a cib diff which adds
+ # the move constraint
with get_tmp_cib(env.report_processor, cib_xml) as rsc_moved_cib_file:
stdout, stderr, retval = resource_move(
env.cmd_runner(dict(CIB_file=rsc_moved_cib_file.name)),
@@ -1676,6 +1688,8 @@ def move_autoclean(
add_constraint_cib_diff = diff_cibs_xml(
env.cmd_runner(), env.report_processor, cib_xml, rsc_moved_cib_xml
)
+ # clear the move constraint from the temporary cib and get a cib diff which
+ # removes the move constraint
with get_tmp_cib(
env.report_processor, rsc_moved_cib_xml
) as rsc_moved_constraint_cleared_cib_file:
@@ -1703,17 +1717,18 @@ def move_autoclean(
rsc_moved_cib_xml,
constraint_removed_cib,
)
-
+ # if both the diffs are no-op, nothing needs to be done
if not (add_constraint_cib_diff and remove_constraint_cib_diff):
env.report_processor.report(
reports.ReportItem.info(reports.messages.NoActionNecessary())
)
return
-
+ # simulate applying the diff which adds the move constraint
_, move_transitions, after_move_simulated_cib = simulate_cib(
env.cmd_runner(), get_cib(rsc_moved_cib_xml)
)
if strict:
+ # check if other resources would be affected
resources_affected_by_move = (
simulate_tools.get_resources_from_operations(
simulate_tools.get_operations_from_transitions(
@@ -1730,16 +1745,37 @@ def move_autoclean(
)
)
)
- _ensure_resource_moved_and_not_moved_back(
- env.cmd_runner,
- env.report_processor,
- etree_to_str(after_move_simulated_cib),
- remove_constraint_cib_diff,
- resource_id,
- strict,
- resource_state_before,
- node,
- )
+ # verify that:
+ # - a cib with added move constraint causes the resource to move by
+ # comparing the original status of the resource with a status computed
+ # from the cib with the added constraint
+ # - applying the diff which removes the move constraint won't trigger
+ # moving the resource (or other resources) around
+ try:
+ _ensure_resource_moved_and_not_moved_back(
+ env.cmd_runner,
+ env.report_processor,
+ etree_to_str(after_move_simulated_cib),
+ remove_constraint_cib_diff,
+ resource_id,
+ strict,
+ resource_state_before,
+ node,
+ )
+ except ResourceMoveAutocleanSimulationFailure as e:
+ raise LibraryError(
+ reports.ReportItem.error(
+ reports.messages.ResourceMoveAutocleanSimulationFailure(
+ resource_id,
+ e.other_resources_affected,
+ node=node,
+ move_constraint_left_in_cib=False,
+ )
+ )
+ ) from e
+
+ # apply the diff which adds the move constraint to the live cib and wait
+ # for the cluster to settle
push_cib_diff_xml(env.cmd_runner(), add_constraint_cib_diff)
env.report_processor.report(
ReportItem.info(
@@ -1747,16 +1783,36 @@ def move_autoclean(
)
)
env.wait_for_idle(wait_timeout)
- _ensure_resource_moved_and_not_moved_back(
- env.cmd_runner,
- env.report_processor,
- get_cib_xml(env.cmd_runner()),
- remove_constraint_cib_diff,
- resource_id,
- strict,
- resource_state_before,
- node,
- )
+ # verify that:
+ # - the live cib (now containing the move constraint) causes the resource
+ # to move by comparing the original status of the resource with a status
+ # computed from the live cib
+ # - applying the diff which removes the move constraint won't trigger
+ # moving the resource (or other resources) around
+ try:
+ _ensure_resource_moved_and_not_moved_back(
+ env.cmd_runner,
+ env.report_processor,
+ get_cib_xml(env.cmd_runner()),
+ remove_constraint_cib_diff,
+ resource_id,
+ strict,
+ resource_state_before,
+ node,
+ )
+ except ResourceMoveAutocleanSimulationFailure as e:
+ raise LibraryError(
+ reports.ReportItem.error(
+ reports.messages.ResourceMoveAutocleanSimulationFailure(
+ resource_id,
+ e.other_resources_affected,
+ node=node,
+ move_constraint_left_in_cib=True,
+ )
+ )
+ ) from e
+ # apply the diff which removes the move constraint to the live cib and wait
+ # for the cluster to settle
push_cib_diff_xml(env.cmd_runner(), remove_constraint_cib_diff)
env.report_processor.report(
ReportItem.info(
@@ -1822,13 +1878,7 @@ def _ensure_resource_moved_and_not_moved_back(
)
if strict:
if clean_operations:
- raise LibraryError(
- reports.ReportItem.error(
- reports.messages.ResourceMoveAutocleanSimulationFailure(
- resource_id, others_affected=True
- )
- )
- )
+ raise ResourceMoveAutocleanSimulationFailure(True)
else:
if any(
rsc == resource_id
@@ -1836,13 +1886,7 @@ def _ensure_resource_moved_and_not_moved_back(
clean_operations
)
):
- raise LibraryError(
- reports.ReportItem.error(
- reports.messages.ResourceMoveAutocleanSimulationFailure(
- resource_id, others_affected=False
- )
- )
- )
+ raise ResourceMoveAutocleanSimulationFailure(False)
def ban(env, resource_id, node=None, master=False, lifetime=None, wait=False):
diff --git a/pcs/pcs.8.in b/pcs/pcs.8.in
index 94514c1..8f4b647 100644
--- a/pcs/pcs.8.in
+++ b/pcs/pcs.8.in
@@ -152,7 +152,7 @@ debug\-monitor <resource id> [\fB\-\-full\fR]
This command will force the specified resource to be monitored on this node ignoring the cluster recommendations and print the output from monitoring the resource. Using \fB\-\-full\fR will give more detailed output. This is mainly used for debugging resources that fail to be monitored.
.TP
move <resource id> [destination node] [\fB\-\-promoted\fR] [\fB\-\-strict\fR] [\fB\-\-wait\fR[=n]]
-Move the resource off the node it is currently running on. This is achieved by creating a \-INFINITY location constraint to ban the node. If destination node is specified the resource will be moved to that node by creating an INFINITY location constraint to prefer the destination node. The constraint needed for moving the resource will be automatically removed once the resource is running on it's new location. The command will fail in case it is not possible to verify that the resource will not be moved back after deleting the constraint.
+Move the resource off the node it is currently running on. This is achieved by creating a \-INFINITY location constraint to ban the node. If destination node is specified the resource will be moved to that node by creating an INFINITY location constraint to prefer the destination node. The constraint needed for moving the resource will be automatically removed once the resource is running on its new location. The command will fail in case it is not possible to verify that the resource will not be moved back after deleting the constraint. If this happens after the location constraint has been created, the constraint will be left in the configuration.
If \fB\-\-strict\fR is specified, the command will also fail if other resources would be affected.
diff --git a/pcs/usage.py b/pcs/usage.py
index bc88591..aab6a78 100644
--- a/pcs/usage.py
+++ b/pcs/usage.py
@@ -369,9 +369,11 @@ Commands:
If destination node is specified the resource will be moved to that
node by creating an INFINITY location constraint to prefer the
destination node. The constraint needed for moving the resource will be
- automatically removed once the resource is running on it's new
- location. The command will fail in case it is not possible to verify
- that the resource will not be moved back after deleting the constraint.
+ automatically removed once the resource is running on its new location.
+ The command will fail in case it is not possible to verify that the
+ resource will not be moved back after deleting the constraint. If this
+ happens after the location constraint has been created, the constraint
+ will be left in the configuration.
If --strict is specified, the command will also fail if other resources
would be affected.
diff --git a/pcs_test/tier0/cli/reports/test_messages.py b/pcs_test/tier0/cli/reports/test_messages.py
index 6671021..ac8625c 100644
--- a/pcs_test/tier0/cli/reports/test_messages.py
+++ b/pcs_test/tier0/cli/reports/test_messages.py
@@ -618,3 +618,52 @@ class StonithRestartlessUpdateUnableToPerform(CliReportMessageTestBase):
report_msg,
f"{report_msg.message}, please use command 'pcs stonith update' instead",
)
+
+class ResourceMoveAutocleanSimulationFailure(CliReportMessageTestBase):
+ def test_constraint_not_created(self):
+ self.assert_message(
+ messages.ResourceMoveAutocleanSimulationFailure(
+ "R1", others_affected=True
+ ),
+ (
+ "Unable to ensure that moved resource 'R1' or other resources "
+ "will stay on the same node after a constraint used for moving "
+ "it is removed."
+ ),
+ )
+
+ def test_without_node(self):
+ self.assert_message(
+ messages.ResourceMoveAutocleanSimulationFailure(
+ "R1", others_affected=True, move_constraint_left_in_cib=True
+ ),
+ (
+ "Unable to ensure that moved resource 'R1' or other resources "
+ "will stay on the same node after a constraint used for moving "
+ "it is removed."
+ " The constraint to move the resource has not been removed "
+ "from configuration. Consider removing it manually. Be aware "
+ "that removing the constraint may cause resources to move to "
+ "other nodes."
+ " Run 'pcs resource clear R1' to remove the constraint."
+ ),
+ )
+
+ def test_with_node(self):
+ self.assert_message(
+ messages.ResourceMoveAutocleanSimulationFailure(
+ "R1",
+ others_affected=False,
+ node="node1",
+ move_constraint_left_in_cib=True,
+ ),
+ (
+ "Unable to ensure that moved resource 'R1' will stay on the "
+ "same node after a constraint used for moving it is removed."
+ " The constraint to move the resource has not been removed "
+ "from configuration. Consider removing it manually. Be aware "
+ "that removing the constraint may cause resources to move to "
+ "other nodes."
+ " Run 'pcs resource clear R1 node1' to remove the constraint."
+ ),
+ )
\ No newline at end of file
diff --git a/pcs_test/tier0/common/reports/test_messages.py b/pcs_test/tier0/common/reports/test_messages.py
index a56ef38..7b1b799 100644
--- a/pcs_test/tier0/common/reports/test_messages.py
+++ b/pcs_test/tier0/common/reports/test_messages.py
@@ -4542,7 +4542,7 @@ class ResourceMoveAffectsOtherResources(NameBuildTest):
class ResourceMoveAutocleanSimulationFailure(NameBuildTest):
- def test_success(self):
+ def test_simulation(self):
self.assert_message_from_report(
(
"Unable to ensure that moved resource 'R1' will stay on the "
@@ -4553,7 +4553,7 @@ class ResourceMoveAutocleanSimulationFailure(NameBuildTest):
),
)
- def test_others_affected(self):
+ def test_simulation_others_affected(self):
self.assert_message_from_report(
(
"Unable to ensure that moved resource 'R1' or other resources "
@@ -4565,6 +4565,36 @@ class ResourceMoveAutocleanSimulationFailure(NameBuildTest):
),
)
+ def test_live(self):
+ self.assert_message_from_report(
+ (
+ "Unable to ensure that moved resource 'R1' will stay on the "
+ "same node after a constraint used for moving it is removed."
+ " The constraint to move the resource has not been removed "
+ "from configuration. Consider removing it manually. Be aware "
+ "that removing the constraint may cause resources to move to "
+ "other nodes."
+ ),
+ reports.ResourceMoveAutocleanSimulationFailure(
+ "R1", others_affected=False, move_constraint_left_in_cib=True
+ ),
+ )
+
+ def test_live_others_affected(self):
+ self.assert_message_from_report(
+ (
+ "Unable to ensure that moved resource 'R1' or other resources "
+ "will stay on the same node after a constraint used for moving "
+ "it is removed."
+ " The constraint to move the resource has not been removed "
+ "from configuration. Consider removing it manually. Be aware "
+ "that removing the constraint may cause resources to move to "
+ "other nodes."
+ ),
+ reports.ResourceMoveAutocleanSimulationFailure(
+ "R1", others_affected=True, move_constraint_left_in_cib=True
+ ),
+ )
class ParseErrorJsonFile(NameBuildTest):
def test_success(self):
diff --git a/pcs_test/tier0/lib/commands/resource/test_resource_move_autoclean.py b/pcs_test/tier0/lib/commands/resource/test_resource_move_autoclean.py
index 90ed2d8..6e5c286 100644
--- a/pcs_test/tier0/lib/commands/resource/test_resource_move_autoclean.py
+++ b/pcs_test/tier0/lib/commands/resource/test_resource_move_autoclean.py
@@ -1261,6 +1261,8 @@ class MoveAutocleanFailures(MoveAutocleanCommonSetup):
reports.codes.RESOURCE_MOVE_AUTOCLEAN_SIMULATION_FAILURE,
resource_id=self.resource_id,
others_affected=False,
+ node=None,
+ move_constraint_left_in_cib=False,
)
],
expected_in_processor=False,
@@ -1292,6 +1294,8 @@ class MoveAutocleanFailures(MoveAutocleanCommonSetup):
reports.codes.RESOURCE_MOVE_AUTOCLEAN_SIMULATION_FAILURE,
resource_id=self.resource_id,
others_affected=True,
+ node=None,
+ move_constraint_left_in_cib=False,
)
],
expected_in_processor=False,
@@ -1324,6 +1328,8 @@ class MoveAutocleanFailures(MoveAutocleanCommonSetup):
reports.codes.RESOURCE_MOVE_AUTOCLEAN_SIMULATION_FAILURE,
resource_id=self.resource_id,
others_affected=False,
+ node=None,
+ move_constraint_left_in_cib=True,
)
],
expected_in_processor=False,
@@ -1355,6 +1361,8 @@ class MoveAutocleanFailures(MoveAutocleanCommonSetup):
reports.codes.RESOURCE_MOVE_AUTOCLEAN_SIMULATION_FAILURE,
resource_id=self.resource_id,
others_affected=True,
+ node=None,
+ move_constraint_left_in_cib=True,
)
],
expected_in_processor=False,
--
2.33.0

View File

@ -1,6 +1,6 @@
Name: pcs
Version: 0.11.2
Release: 8
Release: 9
License: GPLv2 and BSD-2-Clause and ASL 2.0 and MIT
URL: https://github.com/ClusterLabs/pcs
Summary: Pacemaker Configuration System
@ -48,7 +48,7 @@ Patch7: fix-pcs-quorum-device-remove.patch
Patch8: tests-fix-datetime-race-condition.patch
Patch9: Fix-CVE-2022-1049.patch
Patch10: Fix-CVE-2022-2735.patch
Patch11: improve-error-messages-in-pcs-resource-move.patch
# git for patches
BuildRequires: git-core
BuildRequires: make
@ -411,6 +411,9 @@ run_all_tests
%license pyagentx_LICENSE.txt
%changelog
* Wed Oct 25 2023 zouzhimin <zouzhimin@kylinos.cn> - 0.11.2-9
- improve error messages in pcs resource move
* Fri Oct 20 2023 bizhiyuan <bizhiyuan@kylinos.cn> - 0.11.2-8
- Fix-CVE-2022-2735