From 6eabe2d43d464d34df1670192aef6c4966ab1a94 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 30 May 2023 16:42:18 +0200 Subject: [PATCH 1/2] awk: fix precedence of = relative to == backport from upstream: https://git.alpinelinux.org/aports/plain/main/busybox/CVE-2023-42364-CVE-2023-42365.patch Discovered while adding code to disallow assignments to non-lvalues function old new delta parse_expr 936 991 +55 .rodata 105243 105247 +4 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 2/0 up/down: 59/0) Total: 59 bytes Signed-off-by: Denys Vlasenko (cherry picked from commit 0256e00a9d077588bd3a39f5a1ef7e2eaa2911e4) --- editors/awk.c | 66 ++++++++++++++++++++++++++++++--------------- testsuite/awk.tests | 5 ++++ 2 files changed, 50 insertions(+), 21 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index 728ee8685..3f4e0600d 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -337,7 +337,9 @@ static void debug_parse_print_tc(uint32_t n) #undef P #undef PRIMASK #undef PRIMASK2 -#define P(x) (x << 24) +/* Smaller 'x' means _higher_ operator precedence */ +#define PRECEDENCE(x) (x << 24) +#define P(x) PRECEDENCE(x) #define PRIMASK 0x7F000000 #define PRIMASK2 0x7E000000 @@ -360,7 +362,7 @@ enum { OC_MOVE = 0x1f00, OC_PGETLINE = 0x2000, OC_REGEXP = 0x2100, OC_REPLACE = 0x2200, OC_RETURN = 0x2300, OC_SPRINTF = 0x2400, OC_TERNARY = 0x2500, OC_UNARY = 0x2600, OC_VAR = 0x2700, - OC_DONE = 0x2800, + OC_CONST = 0x2800, OC_DONE = 0x2900, ST_IF = 0x3000, ST_DO = 0x3100, ST_FOR = 0x3200, ST_WHILE = 0x3300 @@ -440,9 +442,9 @@ static const uint32_t tokeninfo[] ALIGN4 = { #define TI_PREINC (OC_UNARY|xV|P(9)|'P') #define TI_PREDEC (OC_UNARY|xV|P(9)|'M') TI_PREINC, TI_PREDEC, OC_FIELD|xV|P(5), - OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74), OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-', - OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&', - OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&', + OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(38), OC_REPLACE|NV|P(38)|'+', OC_REPLACE|NV|P(38)|'-', + OC_REPLACE|NV|P(38)|'*', OC_REPLACE|NV|P(38)|'/', OC_REPLACE|NV|P(38)|'%', OC_REPLACE|NV|P(38)|'&', + OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(38)|'&', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*', OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1, #define TI_LESS (OC_COMPARE|VV|P(39)|2) @@ -1290,7 +1292,7 @@ static uint32_t next_token(uint32_t expected) save_tclass = tc; save_info = t_info; tc = TC_BINOPX; - t_info = OC_CONCAT | SS | P(35); + t_info = OC_CONCAT | SS | PRECEDENCE(35); } t_tclass = tc; @@ -1350,9 +1352,8 @@ static node *parse_expr(uint32_t term_tc) { node sn; node *cn = &sn; - node *vn, *glptr; + node *glptr; uint32_t tc, expected_tc; - var *v; debug_printf_parse("%s() term_tc(%x):", __func__, term_tc); debug_parse_print_tc(term_tc); @@ -1363,11 +1364,12 @@ static node *parse_expr(uint32_t term_tc) expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP | term_tc; while (!((tc = next_token(expected_tc)) & term_tc)) { + node *vn; if (glptr && (t_info == TI_LESS)) { /* input redirection (<) attached to glptr node */ debug_printf_parse("%s: input redir\n", __func__); - cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37)); + cn = glptr->l.n = new_node(OC_CONCAT | SS | PRECEDENCE(37)); cn->a.n = glptr; expected_tc = TS_OPERAND | TS_UOPPRE; glptr = NULL; @@ -1379,24 +1381,42 @@ static node *parse_expr(uint32_t term_tc) * previous operators with higher priority */ vn = cn; while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2)) - || ((t_info == vn->info) && t_info == TI_COLON) + || (t_info == vn->info && t_info == TI_COLON) ) { vn = vn->a.n; if (!vn->a.n) syntax_error(EMSG_UNEXP_TOKEN); } if (t_info == TI_TERNARY) //TODO: why? - t_info += P(6); + t_info += PRECEDENCE(6); cn = vn->a.n->r.n = new_node(t_info); cn->a.n = vn->a.n; if (tc & TS_BINOP) { cn->l.n = vn; -//FIXME: this is the place to detect and reject assignments to non-lvalues. -//Currently we allow "assignments" to consts and temporaries, nonsense like this: -// awk 'BEGIN { "qwe" = 1 }' -// awk 'BEGIN { 7 *= 7 }' -// awk 'BEGIN { length("qwe") = 1 }' -// awk 'BEGIN { (1+1) += 3 }' + + /* Prevent: + * awk 'BEGIN { "qwe" = 1 }' + * awk 'BEGIN { 7 *= 7 }' + * awk 'BEGIN { length("qwe") = 1 }' + * awk 'BEGIN { (1+1) += 3 }' + */ + /* Assignment? (including *= and friends) */ + if (((t_info & OPCLSMASK) == OC_MOVE) + || ((t_info & OPCLSMASK) == OC_REPLACE) + ) { + debug_printf_parse("%s: MOVE/REPLACE vn->info:%08x\n", __func__, vn->info); + /* Left side is a (variable or array element) + * or function argument + * or $FIELD ? + */ + if ((vn->info & OPCLSMASK) != OC_VAR + && (vn->info & OPCLSMASK) != OC_FNARG + && (vn->info & OPCLSMASK) != OC_FIELD + ) { + syntax_error(EMSG_UNEXP_TOKEN); /* no. bad */ + } + } + expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP; if (t_info == TI_PGETLINE) { /* it's a pipe */ @@ -1432,6 +1452,8 @@ static node *parse_expr(uint32_t term_tc) /* one should be very careful with switch on tclass - * only simple tclasses should be used (TC_xyz, not TS_xyz) */ switch (tc) { + var *v; + case TC_VARIABLE: case TC_ARRAY: debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__); @@ -1452,14 +1474,14 @@ static node *parse_expr(uint32_t term_tc) case TC_NUMBER: case TC_STRING: debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__); - cn->info = OC_VAR; + cn->info = OC_CONST; v = cn->l.v = xzalloc(sizeof(var)); - if (tc & TC_NUMBER) + if (tc & TC_NUMBER) { setvar_i(v, t_double); - else { + } else { setvar_s(v, t_string); - expected_tc &= ~TC_UOPPOST; /* "str"++ is not allowed */ } + expected_tc &= ~TC_UOPPOST; /* NUM++, "str"++ not allowed */ break; case TC_REGEXP: @@ -3088,6 +3110,8 @@ static var *evaluate(node *op, var *res) /* -- recursive node type -- */ + case XC( OC_CONST ): + debug_printf_eval("CONST "); case XC( OC_VAR ): debug_printf_eval("VAR\n"); L.v = op->l.v; diff --git a/testsuite/awk.tests b/testsuite/awk.tests index bbf0fbff1..a71ef3b26 100755 --- a/testsuite/awk.tests +++ b/testsuite/awk.tests @@ -485,4 +485,9 @@ testing 'awk assign while test' \ "" \ "foo" +testing "awk = has higher precedence than == (despite what gawk manpage claims)" \ + "awk 'BEGIN { v=1; print 2==v; print 2==v=2; print v; print v=3==3; print v}'" \ + '0\n1\n2\n1\n3\n' \ + '' '' + exit $FAILCOUNT -- 2.45.2 From 947f3d2d2739afe248bf5343eaf9e35f3fd95dc2 Mon Sep 17 00:00:00 2001 From: Natanael Copa Date: Tue, 21 May 2024 14:46:08 +0200 Subject: [PATCH 2/2] awk: fix ternary operator and precedence of = Adjust the = precedence test to match behavior of gawk, mawk and FreeBSD. awk 'BEGIN {print v=3==3; print v}' should print two '1'. To fix this, and to unbreak the ternary conditional operator, we restore the precedence of = in the token list, but override this with a lower priority when the assignment is on the right side of a compare. This fixes commit 0256e00a9d07 (awk: fix precedence of = relative to ==) Signed-off-by: Natanael Copa (cherry picked from commit 1714301c405ef03b39605c85c23f22a190cddd95) --- editors/awk.c | 18 ++++++++++++++---- testsuite/awk.tests | 9 +++++++-- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/editors/awk.c b/editors/awk.c index 3f4e0600d..85e0f50cd 100644 --- a/editors/awk.c +++ b/editors/awk.c @@ -442,9 +442,10 @@ static const uint32_t tokeninfo[] ALIGN4 = { #define TI_PREINC (OC_UNARY|xV|P(9)|'P') #define TI_PREDEC (OC_UNARY|xV|P(9)|'M') TI_PREINC, TI_PREDEC, OC_FIELD|xV|P(5), - OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(38), OC_REPLACE|NV|P(38)|'+', OC_REPLACE|NV|P(38)|'-', - OC_REPLACE|NV|P(38)|'*', OC_REPLACE|NV|P(38)|'/', OC_REPLACE|NV|P(38)|'%', OC_REPLACE|NV|P(38)|'&', - OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(38)|'&', OC_BINARY|NV|P(15)|'&', +#define TI_ASSIGN (OC_MOVE|VV|P(74)) + OC_COMPARE|VV|P(39)|5, TI_ASSIGN, OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-', + OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&', + OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-', OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%', OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*', OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3, OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1, #define TI_LESS (OC_COMPARE|VV|P(39)|2) @@ -1376,11 +1377,19 @@ static node *parse_expr(uint32_t term_tc) continue; } if (tc & (TS_BINOP | TC_UOPPOST)) { + int prio; debug_printf_parse("%s: TS_BINOP | TC_UOPPOST tc:%x\n", __func__, tc); /* for binary and postfix-unary operators, jump back over * previous operators with higher priority */ vn = cn; - while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2)) + /* Let assignment get higher priority when used on right + * side in compare. i.e: 2==v=3 */ + if (t_info == TI_ASSIGN && (vn->a.n->info & OPCLSMASK) == OC_COMPARE) { + prio = PRECEDENCE(38); + } else { + prio = (t_info & PRIMASK); + } + while ((prio > (vn->a.n->info & PRIMASK2)) || (t_info == vn->info && t_info == TI_COLON) ) { vn = vn->a.n; @@ -1412,6 +1421,7 @@ static node *parse_expr(uint32_t term_tc) if ((vn->info & OPCLSMASK) != OC_VAR && (vn->info & OPCLSMASK) != OC_FNARG && (vn->info & OPCLSMASK) != OC_FIELD + && (vn->info & OPCLSMASK) != OC_COMPARE ) { syntax_error(EMSG_UNEXP_TOKEN); /* no. bad */ } diff --git a/testsuite/awk.tests b/testsuite/awk.tests index a71ef3b26..c2f57605b 100755 --- a/testsuite/awk.tests +++ b/testsuite/awk.tests @@ -485,9 +485,14 @@ testing 'awk assign while test' \ "" \ "foo" -testing "awk = has higher precedence than == (despite what gawk manpage claims)" \ +testing "awk = has higher precedence than == on right side" \ "awk 'BEGIN { v=1; print 2==v; print 2==v=2; print v; print v=3==3; print v}'" \ - '0\n1\n2\n1\n3\n' \ + '0\n1\n2\n1\n1\n' \ + '' '' + +testing 'awk ternary precedence' \ + "awk 'BEGIN { a = 0 ? \"yes\": \"no\"; print a }'" \ + 'no\n' \ '' '' exit $FAILCOUNT -- 2.45.2