108 lines
3.7 KiB
Diff
108 lines
3.7 KiB
Diff
|
|
From 240caa32b9cab90a38ab863fd64e6becf5d1393c Mon Sep 17 00:00:00 2001
|
||
|
|
From: Evan Hunt <each@isc.org>
|
||
|
|
Date: Thu, 25 May 2023 23:53:50 -0700
|
||
|
|
Subject: [PATCH] Stale answer lookups could loop when over recursion quota
|
||
|
|
|
||
|
|
When a query was aborted because of the recursion quota being exceeded,
|
||
|
|
but triggered a stale answer response and a stale data refresh query,
|
||
|
|
it could cause named to loop back where we are iterating and following
|
||
|
|
a delegation. Having no good answer in cache, we would fall back to
|
||
|
|
using serve-stale again, use the stale data, try to refresh the RRset,
|
||
|
|
and loop back again, without ever terminating until crashing due to
|
||
|
|
stack overflow.
|
||
|
|
|
||
|
|
This happens because in the functions 'query_notfound()' and
|
||
|
|
'query_delegation_recurse()', we check whether we can fall back to
|
||
|
|
serving stale data. We shouldn't do so if we are already refreshing
|
||
|
|
an RRset due to having prioritized stale data in cache.
|
||
|
|
|
||
|
|
In other words, we need to add an extra check to 'query_usestale()' to
|
||
|
|
disallow serving stale data if we are currently refreshing a stale
|
||
|
|
RRset.
|
||
|
|
|
||
|
|
As an additional mitigation to prevent looping, we now use the result
|
||
|
|
code ISC_R_ALREADYRUNNING rather than ISC_R_FAILURE when a recursion
|
||
|
|
loop is encountered, and we check for that condition in
|
||
|
|
'query_usestale()' as well.
|
||
|
|
|
||
|
|
---
|
||
|
|
lib/ns/query.c | 30 ++++++++++++++++++++++--------
|
||
|
|
1 file changed, 22 insertions(+), 8 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/lib/ns/query.c b/lib/ns/query.c
|
||
|
|
index 1444de5..3ce6742 100644
|
||
|
|
--- a/lib/ns/query.c
|
||
|
|
+++ b/lib/ns/query.c
|
||
|
|
@@ -5696,6 +5696,7 @@ query_refresh_rrset(query_ctx_t *orig_qctx) {
|
||
|
|
qctx.client->query.dboptions &= ~(DNS_DBFIND_STALETIMEOUT |
|
||
|
|
DNS_DBFIND_STALEOK |
|
||
|
|
DNS_DBFIND_STALEENABLED);
|
||
|
|
+ qctx.client->nodetach = false;
|
||
|
|
|
||
|
|
/*
|
||
|
|
* We'll need some resources...
|
||
|
|
@@ -5920,7 +5921,14 @@ query_lookup(query_ctx_t *qctx) {
|
||
|
|
"%s stale answer used, an attempt to "
|
||
|
|
"refresh the RRset will still be made",
|
||
|
|
namebuf);
|
||
|
|
+
|
||
|
|
qctx->refresh_rrset = STALE(qctx->rdataset);
|
||
|
|
+
|
||
|
|
+ /*
|
||
|
|
+ * If we are refreshing the RRSet, we must not
|
||
|
|
+ * detach from the client in query_send().
|
||
|
|
+ */
|
||
|
|
+ qctx->client->nodetach = qctx->refresh_rrset;
|
||
|
|
}
|
||
|
|
} else {
|
||
|
|
/*
|
||
|
|
@@ -6272,7 +6280,7 @@ ns_query_recurse(ns_client_t *client, dns_rdatatype_t qtype, dns_name_t *qname,
|
||
|
|
if (recparam_match(&client->query.recparam, qtype, qname, qdomain)) {
|
||
|
|
ns_client_log(client, NS_LOGCATEGORY_CLIENT, NS_LOGMODULE_QUERY,
|
||
|
|
ISC_LOG_INFO, "recursion loop detected");
|
||
|
|
- return (ISC_R_FAILURE);
|
||
|
|
+ return (ISC_R_ALREADYRUNNING);
|
||
|
|
}
|
||
|
|
|
||
|
|
recparam_update(&client->query.recparam, qtype, qname, qdomain);
|
||
|
|
@@ -7235,10 +7243,21 @@ query_usestale(query_ctx_t *qctx, isc_result_t result) {
|
||
|
|
return (false);
|
||
|
|
}
|
||
|
|
|
||
|
|
- if (result == DNS_R_DUPLICATE || result == DNS_R_DROP) {
|
||
|
|
+ if (qctx->refresh_rrset) {
|
||
|
|
+ /*
|
||
|
|
+ * This is a refreshing query, we have already prioritized
|
||
|
|
+ * stale data, so don't enable serve-stale again.
|
||
|
|
+ */
|
||
|
|
+ return (false);
|
||
|
|
+ }
|
||
|
|
+
|
||
|
|
+ if (result == DNS_R_DUPLICATE || result == DNS_R_DROP ||
|
||
|
|
+ result == ISC_R_ALREADYRUNNING)
|
||
|
|
+ {
|
||
|
|
/*
|
||
|
|
* Don't enable serve-stale if the result signals a duplicate
|
||
|
|
- * query or query that is being dropped.
|
||
|
|
+ * query or a query that is being dropped or can't proceed
|
||
|
|
+ * because of a recursion loop.
|
||
|
|
*/
|
||
|
|
return (false);
|
||
|
|
}
|
||
|
|
@@ -11490,12 +11509,7 @@ ns_query_done(query_ctx_t *qctx) {
|
||
|
|
/*
|
||
|
|
* Client may have been detached after query_send(), so
|
||
|
|
* we test and store the flag state here, for safety.
|
||
|
|
- * If we are refreshing the RRSet, we must not detach from the client
|
||
|
|
- * in the query_send(), so we need to override the flag.
|
||
|
|
*/
|
||
|
|
- if (qctx->refresh_rrset) {
|
||
|
|
- qctx->client->nodetach = true;
|
||
|
|
- }
|
||
|
|
nodetach = qctx->client->nodetach;
|
||
|
|
query_send(qctx->client);
|
||
|
|
|
||
|
|
--
|
||
|
|
2.33.0
|
||
|
|
|